• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 1997-2013, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /*
7 ********************************************************************************
8 * File NCCBTST.C
9 *
10 * Modification History:
11 *        Name                            Description
12 *    Madhu Katragadda     7/21/1999      Testing error callback routines
13 ********************************************************************************
14 */
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "nccbtst.h"
27 #include "unicode/ucnv_cb.h"
28 #include "unicode/utf16.h"
29 
30 #define NEW_MAX_BUFFER 999
31 
32 #define nct_min(x,y)  ((x<y) ? x : y)
33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
34 
35 static int32_t  gInBufferSize = 0;
36 static int32_t  gOutBufferSize = 0;
37 static char     gNuConvTestName[1024];
38 
printSeq(const uint8_t * a,int len)39 static void printSeq(const uint8_t* a, int len)
40 {
41     int i=0;
42     log_verbose("\n{");
43     while (i<len)
44         log_verbose("0x%02X, ", a[i++]);
45     log_verbose("}\n");
46 }
47 
printUSeq(const UChar * a,int len)48 static void printUSeq(const UChar* a, int len)
49 {
50     int i=0;
51     log_verbose("{");
52     while (i<len)
53         log_verbose("  0x%04x, ", a[i++]);
54     log_verbose("}\n");
55 }
56 
printSeqErr(const uint8_t * a,int len)57 static void printSeqErr(const uint8_t* a, int len)
58 {
59     int i=0;
60     fprintf(stderr, "{");
61     while (i<len)
62         fprintf(stderr, "  0x%02x, ", a[i++]);
63     fprintf(stderr, "}\n");
64 }
65 
printUSeqErr(const UChar * a,int len)66 static void printUSeqErr(const UChar* a, int len)
67 {
68     int i=0;
69     fprintf(stderr, "{");
70     while (i<len)
71         fprintf(stderr, "0x%04x, ", a[i++]);
72     fprintf(stderr,"}\n");
73 }
74 
setNuConvTestName(const char * codepage,const char * direction)75 static void setNuConvTestName(const char *codepage, const char *direction)
76 {
77     sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
78             codepage,
79             direction,
80             (int)gInBufferSize,
81             (int)gOutBufferSize);
82 }
83 
84 
85 static void TestCallBackFailure(void);
86 
87 void addTestConvertErrorCallBack(TestNode** root);
88 
addTestConvertErrorCallBack(TestNode ** root)89 void addTestConvertErrorCallBack(TestNode** root)
90 {
91     addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
92     addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
93     addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
94     /* BEGIN android-removed
95        To save space, Android does not build complete CJK conversion tables.
96        We skip the test here.
97     addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
98        END android-removed */
99 
100 #if !UCONFIG_NO_LEGACY_CONVERSION
101     addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
102     addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
103 #endif
104 
105     addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
106 }
107 
TestSkipCallBack()108 static void TestSkipCallBack()
109 {
110     TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
111     TestSkip(1,NEW_MAX_BUFFER);
112     TestSkip(1,1);
113     TestSkip(NEW_MAX_BUFFER, 1);
114 }
115 
TestStopCallBack()116 static void TestStopCallBack()
117 {
118     TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
119     TestStop(1,NEW_MAX_BUFFER);
120     TestStop(1,1);
121     TestStop(NEW_MAX_BUFFER, 1);
122 }
123 
TestSubCallBack()124 static void TestSubCallBack()
125 {
126     TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
127     TestSub(1,NEW_MAX_BUFFER);
128     TestSub(1,1);
129     TestSub(NEW_MAX_BUFFER, 1);
130 
131 #if !UCONFIG_NO_LEGACY_CONVERSION
132     TestEBCDIC_STATEFUL_Sub(1, 1);
133     TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
134     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
135     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
136 #endif
137 }
138 
TestSubWithValueCallBack()139 static void TestSubWithValueCallBack()
140 {
141     TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
142     TestSubWithValue(1,NEW_MAX_BUFFER);
143     TestSubWithValue(1,1);
144     TestSubWithValue(NEW_MAX_BUFFER, 1);
145 }
146 
147 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()148 static void TestLegalAndOtherCallBack()
149 {
150     TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
151     TestLegalAndOthers(1,NEW_MAX_BUFFER);
152     TestLegalAndOthers(1,1);
153     TestLegalAndOthers(NEW_MAX_BUFFER, 1);
154 }
155 
TestSingleByteCallBack()156 static void TestSingleByteCallBack()
157 {
158     TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
159     TestSingleByte(1,NEW_MAX_BUFFER);
160     TestSingleByte(1,1);
161     TestSingleByte(NEW_MAX_BUFFER, 1);
162 }
163 #endif
164 
TestSkip(int32_t inputsize,int32_t outputsize)165 static void TestSkip(int32_t inputsize, int32_t outputsize)
166 {
167     static const uint8_t expskipIBM_949[]= {
168         0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
169 
170     static const uint8_t expskipIBM_943[] = {
171         0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
172 
173     static const uint8_t expskipIBM_930[] = {
174         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
175 
176     gInBufferSize = inputsize;
177     gOutBufferSize = outputsize;
178 
179     /*From Unicode*/
180     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
181 
182 #if !UCONFIG_NO_LEGACY_CONVERSION
183     {
184         static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
185         static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
186 
187         static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
188         static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
189 
190         if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
191                 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
192                 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
193             log_err("u-> ibm-949 with skip did not match.\n");
194         if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
195                 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
196                 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
197             log_err("u-> ibm-943 with skip did not match.\n");
198     }
199 
200     {
201         static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
202         static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
203         static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
204 
205         /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
206         if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
207                                    fromUBytes, sizeof(fromUBytes),
208                                    "ibm-930",
209                                    UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
210                                    NULL, 0)
211         ) {
212             log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
213         }
214     }
215 #endif
216 
217     {
218         static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219         static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
220         static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
221 
222         static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
223         static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
224         static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
225 
226         /* US-ASCII */
227         if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
228                                    usasciiFromUBytes, sizeof(usasciiFromUBytes),
229                                    "US-ASCII",
230                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
231                                    NULL, 0)
232         ) {
233             log_err("u->US-ASCII with skip did not match.\n");
234         }
235 
236 #if !UCONFIG_NO_LEGACY_CONVERSION
237         /* SBCS NLTC codepage 367 for US-ASCII */
238         if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
239                                    usasciiFromUBytes, sizeof(usasciiFromUBytes),
240                                    "ibm-367",
241                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
242                                    NULL, 0)
243         ) {
244             log_err("u->ibm-367 with skip did not match.\n");
245         }
246 #endif
247 
248         /* ISO-Latin-1 */
249         if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
250                                    latin1FromUBytes, sizeof(latin1FromUBytes),
251                                    "LATIN_1",
252                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
253                                    NULL, 0)
254         ) {
255             log_err("u->LATIN_1 with skip did not match.\n");
256         }
257 
258 #if !UCONFIG_NO_LEGACY_CONVERSION
259         /* windows-1252 */
260         if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
261                                    latin1FromUBytes, sizeof(latin1FromUBytes),
262                                    "windows-1252",
263                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
264                                    NULL, 0)
265         ) {
266             log_err("u->windows-1252 with skip did not match.\n");
267         }
268     }
269 
270     {
271         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
272         static const uint8_t toIBM943[]= { 0x61, 0x61 };
273         static const int32_t offset[]= {0, 4};
274 
275          /* EUC_JP*/
276         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
277         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
278             0x61, 0x8e, 0xe0,
279         };
280         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
281 
282         /*EUC_TW*/
283         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
284         static const uint8_t to_euc_tw[]={
285             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
286             0x61, 0xe6, 0xca, 0x8a,
287         };
288         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
289 
290         /*ISO-2022-JP*/
291         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
292         static const uint8_t to_iso_2022_jp[]={
293             0x41,
294             0x42,
295 
296         };
297         static const int32_t from_iso_2022_jpOffs [] ={0,2};
298 
299         /*ISO-2022-JP*/
300         UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
301         static const uint8_t to_iso_2022_jp2[]={
302             0x41,
303             0x43,
304 
305         };
306         static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
307 
308         /*ISO-2022-cn*/
309         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
310         static const uint8_t to_iso_2022_cn[]={
311             0x41, 0x42
312         };
313         static const int32_t from_iso_2022_cnOffs [] ={
314             0, 2
315         };
316 
317         /*ISO-2022-CN*/
318         static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
319         static const uint8_t to_iso_2022_cn1[]={
320             0x41, 0x43
321 
322         };
323         static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
324 
325         /*ISO-2022-kr*/
326         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
327         static const uint8_t to_iso_2022_kr[]={
328             0x1b,   0x24,   0x29,   0x43,
329             0x41,
330             0x0e,   0x25,   0x50,
331             0x25,   0x50,
332             0x0f,   0x42,
333         };
334         static const int32_t from_iso_2022_krOffs [] ={
335             -1,-1,-1,-1,
336             0,
337             1,1,1,
338             3,3,
339             4,4
340         };
341 
342         /*ISO-2022-kr*/
343         static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
344         static const uint8_t to_iso_2022_kr1[]={
345             0x1b,   0x24,   0x29,   0x43,
346             0x41,
347             0x0e,   0x25,   0x50,
348             0x25,   0x50,
349 
350         };
351         static const int32_t from_iso_2022_krOffs1 [] ={
352             -1,-1,-1,-1,
353             0,
354             1,1,1,
355             3,3,
356 
357         };
358         /* HZ encoding */
359         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
360 
361         static const uint8_t to_hz[]={
362             0x7e,   0x7d,   0x41,
363             0x7e,   0x7b,   0x26,   0x30,
364             0x26,   0x30,
365             0x7e,   0x7d,   0x42,
366 
367         };
368         static const int32_t from_hzOffs [] ={
369             0,0,0,
370             1,1,1,1,
371             3,3,
372             4,4,4,4
373         };
374 
375         static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
376 
377         static const uint8_t to_hz1[]={
378             0x7e,   0x7d,   0x41,
379             0x7e,   0x7b,   0x26,   0x30,
380             0x26,   0x30,
381 
382 
383         };
384         static const int32_t from_hzOffs1 [] ={
385             0,0,0,
386             1,1,1,1,
387             3,3,
388 
389         };
390 
391 #endif
392 
393         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
394 
395         static const uint8_t to_SCSU[]={
396             0x41,
397             0x42
398 
399 
400         };
401         static const int32_t from_SCSUOffs [] ={
402             0,
403             2,
404 
405         };
406 
407 #if !UCONFIG_NO_LEGACY_CONVERSION
408         /* ISCII */
409         static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
410         static const uint8_t to_iscii[]={
411             0x41,
412             0x42,
413         };
414         static const int32_t from_isciiOffs [] ={
415             0,2,
416 
417         };
418         /*ISCII*/
419         static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
420         static const uint8_t to_iscii1[]={
421             0x44,
422             0x43,
423 
424         };
425         static const int32_t from_isciiOffs1 [] ={0,2};
426 
427         if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
428                 toIBM943, sizeof(toIBM943), "ibm-943",
429                 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
430             log_err("u-> ibm-943 with skip did not match.\n");
431 
432         if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
433                 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
434                 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
435             log_err("u-> euc-jp with skip did not match.\n");
436 
437         if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
438                 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
439                 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
440             log_err("u-> euc-tw with skip did not match.\n");
441 
442         /*iso_2022_jp*/
443         if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
444                 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
445                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
446             log_err("u-> iso-2022-jp with skip did not match.\n");
447 
448         /* with context */
449         if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
450                 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
451                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
452             log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
453 
454         /*iso_2022_cn*/
455         if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
456                 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
457                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
458             log_err("u-> iso-2022-cn with skip did not match.\n");
459         /*with context*/
460         if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
461                 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
462                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
463             log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
464 
465         /*iso_2022_kr*/
466         if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
467                 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
468                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
469             log_err("u-> iso-2022-kr with skip did not match.\n");
470           /*with context*/
471         if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
472                 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
473                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
474             log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
475 
476         /*hz*/
477         if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
478                 to_hz, sizeof(to_hz), "HZ",
479                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
480             log_err("u-> HZ with skip did not match.\n");
481           /*with context*/
482         if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
483                 to_hz1, sizeof(to_hz1), "hz",
484                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
485             log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
486 #endif
487 
488         /*SCSU*/
489         if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
490                 to_SCSU, sizeof(to_SCSU), "SCSU",
491                 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
492             log_err("u-> SCSU with skip did not match.\n");
493 
494 #if !UCONFIG_NO_LEGACY_CONVERSION
495         /*ISCII*/
496         if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
497                 to_iscii, sizeof(to_iscii), "ISCII,version=0",
498                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
499             log_err("u-> iscii with skip did not match.\n");
500         /*with context*/
501         if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
502                 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
503                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
504             log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
505 #endif
506     }
507 
508     log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
509     {
510         static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
511             0xFB, 0xEE, 0x28,       /* from source offset 0 */
512             0x24, 0x1E, 0x52,
513             0xB2,
514             0x20,
515             0xB3,
516             0xB1,
517             0x0D,
518             0x0A,
519 
520             0x20,                   /* from 8 */
521             0x00,
522             0xD0, 0x6C,
523             0xB6,
524             0xD8, 0xA5,
525             0x20,
526             0x68,
527             0x59,
528 
529             0xF9, 0x28,             /* from 16 */
530             0x6D,
531             0x20,
532             0x73,
533             0xE0, 0x2D,
534             0xDE, 0x43,
535             0xD0, 0x33,
536             0x20,
537 
538             0xFA, 0x83,             /* from 24 */
539             0x25, 0x01,
540             0xFB, 0x16, 0x87,
541             0x4B, 0x16,
542             0x20,
543             0xE6, 0xBD,
544             0xEB, 0x5B,
545             0x4B, 0xCC,
546 
547             0xF9, 0xA2,             /* from 32 */
548             0xFC, 0x10, 0x3E,
549             0xFE, 0x16, 0x3A, 0x8C,
550             0x20,
551             0xFC, 0x03, 0xAC,
552 
553             0x01,                   /* from 41 */
554             0xDE, 0x83,
555             0x20,
556             0x09
557         };
558         static const UChar expected[]={
559             0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
560             0x0063, 0x0061, 0x000D, 0x000A,
561 
562             0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
563             0x0930, 0x0020, 0x0918, 0x0909,
564 
565             0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
566             0x4000, 0x4E00, 0x7777, 0x0020,
567 
568             0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
569             0x0020, 0xD7A3, 0xDC00, 0xD800,
570 
571             0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
572             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
573 
574             0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
575             0x0009
576         };
577         static const int32_t offsets[]={
578             0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
579             8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
580             16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
581             24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
582             32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
583             41, 42, 42, 43, 44
584         };
585 
586         /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
587         if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
588                                  sampleText, sizeof(sampleText),
589                                  "BOCU-1",
590                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
591         ) {
592             log_err("u->BOCU-1 with skip did not match.\n");
593         }
594     }
595 
596     log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
597     {
598         const uint8_t sampleText[]={
599             0x61,                               /* 'a' */
600             0xc4, 0xb5,                         /* U+0135 */
601             0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
602             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
603             0xee, 0x80, 0x80,                   /* PUA U+e000 */
604             0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
605             0x62,                               /* 'b' */
606             0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
607             0xd0, 0x80                          /* U+0400 */
608         };
609         UChar expected[]={
610             0x0061,
611             0x0135,
612             0xd020,
613             0xd801, 0xdc01,
614             0xe000,
615             0xdc01,
616             0x0062,
617             0xd801,
618             0x0400
619         };
620         int32_t offsets[]={
621             0,
622             1, 1,
623             2, 2, 2,
624             3, 3, 3, 4, 4, 4,
625             5, 5, 5,
626             6, 6, 6,
627             7,
628             8, 8, 8,
629             9, 9
630         };
631 
632         /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
633 
634         /* without offsets */
635         if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
636                                  sampleText, sizeof(sampleText),
637                                  "CESU-8",
638                                  UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
639         ) {
640             log_err("u->CESU-8 with skip did not match.\n");
641         }
642 
643         /* with offsets */
644         if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
645                                  sampleText, sizeof(sampleText),
646                                  "CESU-8",
647                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
648         ) {
649             log_err("u->CESU-8 with skip did not match.\n");
650         }
651     }
652 
653     /*to Unicode*/
654     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
655 
656 #if !UCONFIG_NO_LEGACY_CONVERSION
657     {
658 
659         static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
660         static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
661         static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
662 
663         static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
664         static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
665         static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
666 
667         if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
668                  IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
669                 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
670             log_err("ibm-949->u with skip did not match.\n");
671         if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
672                  IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
673                 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
674             log_err("ibm-943->u with skip did not match.\n");
675 
676 
677         if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
678                  IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
679                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
680             log_err("ibm-930->u with skip did not match.\n");
681 
682 
683         if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
684                  IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
685                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
686             log_err("ibm-930->u with skip did not match.\n");
687     }
688 #endif
689 
690     {
691         static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
692         static const UChar usasciiToU[] = { 0x61, 0x31 };
693         static const int32_t usasciiToUOffsets[] = { 0, 2 };
694 
695         static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
696         static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
697         static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
698 
699         /* US-ASCII */
700         if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
701                                  usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
702                                  "US-ASCII",
703                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
704                                  NULL, 0)
705         ) {
706             log_err("US-ASCII->u with skip did not match.\n");
707         }
708 
709 #if !UCONFIG_NO_LEGACY_CONVERSION
710         /* SBCS NLTC codepage 367 for US-ASCII */
711         if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
712                                  usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
713                                  "ibm-367",
714                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
715                                  NULL, 0)
716         ) {
717             log_err("ibm-367->u with skip did not match.\n");
718         }
719 #endif
720 
721         /* ISO-Latin-1 */
722         if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
723                                  latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
724                                  "LATIN_1",
725                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
726                                  NULL, 0)
727         ) {
728             log_err("LATIN_1->u with skip did not match.\n");
729         }
730 
731 #if !UCONFIG_NO_LEGACY_CONVERSION
732         /* windows-1252 */
733         if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
734                                  latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
735                                  "windows-1252",
736                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
737                                  NULL, 0)
738         ) {
739             log_err("windows-1252->u with skip did not match.\n");
740         }
741 #endif
742     }
743 
744 #if !UCONFIG_NO_LEGACY_CONVERSION
745     {
746         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
747             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
748         };
749         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
750         };
751         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
752 
753 
754          /* euc-jp*/
755         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
756             0x8f, 0xda, 0xa1,  /*unassigned*/
757            0x8e, 0xe0,
758         };
759         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
760         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
761 
762          /*EUC_TW*/
763         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
764             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
765            0xe6, 0xca, 0x8a,
766         };
767         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
768         static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
769                 /*iso-2022-jp*/
770         static const uint8_t sampleTxt_iso_2022_jp[]={
771             0x41,
772             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
773             0x1b,   0x28,   0x42,   0x42,
774 
775         };
776         static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
777         static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
778 
779         /*iso-2022-cn*/
780         static const uint8_t sampleTxt_iso_2022_cn[]={
781             0x0f,   0x41,   0x44,
782             0x1B,   0x24,   0x29,   0x47,
783             0x0E,   0x40,   0x6f, /*unassigned*/
784             0x0f,   0x42,
785 
786         };
787 
788         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
789         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
790 
791         /*iso-2022-kr*/
792         static const uint8_t sampleTxt_iso_2022_kr[]={
793           0x1b, 0x24, 0x29,  0x43,
794           0x41,
795           0x0E, 0x7f, 0x1E,
796           0x0e, 0x25, 0x50,
797           0x0f, 0x51,
798           0x42, 0x43,
799 
800         };
801         static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
802         static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
803 
804         /*hz*/
805         static const uint8_t sampleTxt_hz[]={
806             0x41,
807             0x7e,   0x7b,   0x26,   0x30,
808             0x7f,   0x1E, /*unassigned*/
809             0x26,   0x30,
810             0x7e,   0x7d,   0x42,
811             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
812             0x7e,   0x7d,   0x42,
813         };
814         static const UChar hztoUnicode[]={
815             0x41,
816             0x03a0,
817             0x03A0,
818             0x42,
819             0x42,};
820 
821         static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
822 
823         /*ISCII*/
824         static const uint8_t sampleTxt_iscii[]={
825             0x41,
826             0xa1,
827             0xEB,    /*unassigned*/
828             0x26,
829             0x30,
830             0xa2,
831             0xEC,    /*unassigned*/
832             0x42,
833         };
834         static const UChar isciitoUnicode[]={
835             0x41,
836             0x0901,
837             0x26,
838             0x30,
839             0x0902,
840             0x42,
841             };
842 
843         static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
844 
845         /*LMBCS*/
846         static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
847             0x12, 0x92, 0xa0, /*unassigned*/
848             0x12, 0x92, 0xA1,
849         };
850         static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
851         static const int32_t fromLMBCS[] = {0, 6};
852 
853         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
854              EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
855             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
856         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
857 
858         if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
859              EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
860             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
861         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
862 
863         if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
864                  euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
865                 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
866             log_err("euc-jp->u with skip did not match.\n");
867 
868 
869 
870         if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
871                  euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
872                 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
873             log_err("euc-tw->u with skip did not match.\n");
874 
875 
876         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
877                  iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
878                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
879             log_err("iso-2022-jp->u with skip did not match.\n");
880 
881         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
882                  iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
883                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
884             log_err("iso-2022-cn->u with skip did not match.\n");
885 
886         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
887                  iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
888                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
889             log_err("iso-2022-kr->u with skip did not match.\n");
890 
891         if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
892                  hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
893                 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
894             log_err("HZ->u with skip did not match.\n");
895 
896         if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
897                  isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
898                 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
899             log_err("iscii->u with skip did not match.\n");
900 
901         if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
902                 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
903                 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
904             log_err("LMBCS->u with skip did not match.\n");
905 
906     }
907 #endif
908 
909     log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
910     {
911         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
912             0xe0, 0x80,  0x61,};
913         UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
914         int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
915 
916         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
917                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
918                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
919             log_err("utf8->u with skip did not match.\n");;
920     }
921 
922     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
923     {
924         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
925         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
926         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
927 
928         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
929                  expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
930                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
931             log_err("scsu->u with skip did not match.\n");
932     }
933 
934     log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
935     {
936         const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
937             0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
938             0x24, 0x1E, 0x52,       /* 3 */
939             0xB2,                   /* 6 */
940             0x20,                   /* 7 */
941             0x40, 0x07,             /* 8 - wrong trail byte */
942             0xB3,                   /* 10 */
943             0xB1,                   /* 11 */
944             0xD0, 0x20,             /* 12 - wrong trail byte */
945             0x0D,                   /* 14 */
946             0x0A,                   /* 15 */
947             0x20,                   /* 16 */
948             0x00,                   /* 17 */
949             0xD0, 0x6C,             /* 18 */
950             0xB6,                   /* 20 */
951             0xD8, 0xA5,             /* 21 */
952             0x20,                   /* 23 */
953             0x68,                   /* 24 */
954             0x59,                   /* 25 */
955             0xF9, 0x28,             /* 26 */
956             0x6D,                   /* 28 */
957             0x20,                   /* 29 */
958             0x73,                   /* 30 */
959             0xE0, 0x2D,             /* 31 */
960             0xDE, 0x43,             /* 33 */
961             0xD0, 0x33,             /* 35 */
962             0x20,                   /* 37 */
963             0xFA, 0x83,             /* 38 */
964             0x25, 0x01,             /* 40 */
965             0xFB, 0x16, 0x87,       /* 42 */
966             0x4B, 0x16,             /* 45 */
967             0x20,                   /* 47 */
968             0xE6, 0xBD,             /* 48 */
969             0xEB, 0x5B,             /* 50 */
970             0x4B, 0xCC,             /* 52 */
971             0xF9, 0xA2,             /* 54 */
972             0xFC, 0x10, 0x3E,       /* 56 */
973             0xFE, 0x16, 0x3A, 0x8C, /* 59 */
974             0x20,                   /* 63 */
975             0xFC, 0x03, 0xAC,       /* 64 */
976             0xFF,                   /* 67 - FF just resets the state without encoding anything */
977             0x01,                   /* 68 */
978             0xDE, 0x83,             /* 69 */
979             0x20,                   /* 71 */
980             0x09                    /* 72 */
981         };
982         UChar expected[]={
983             0xFEFF, 0x0061, 0x0062, 0x0020,
984             0x0063, 0x0061, 0x000D, 0x000A,
985             0x0020, 0x0000, 0x00DF, 0x00E6,
986             0x0930, 0x0020, 0x0918, 0x0909,
987             0x3086, 0x304D, 0x0020, 0x3053,
988             0x4000, 0x4E00, 0x7777, 0x0020,
989             0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
990             0x0020, 0xD7A3, 0xDC00, 0xD800,
991             0xD800, 0xDC00, 0xD845, 0xDDDD,
992             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
993             0xDFFF, 0x0001, 0x0E40, 0x0020,
994             0x0009
995         };
996         int32_t offsets[]={
997             0, 3, 6, 7, /* skip 8, */
998             10, 11, /* skip 12, */
999             14, 15, 16, 17, 18,
1000             20, 21, 23, 24, 25, 26, 28, 29,
1001             30, 31, 33, 35, 37, 38,
1002             40, 42, 45, 47, 48,
1003             50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1004             63, 64, /* trail */ 64, /* reset only 67, */
1005             68, 69,
1006             71, 72
1007         };
1008 
1009         if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1010                                  expected, ARRAY_LENGTH(expected), "BOCU-1",
1011                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1012         ) {
1013             log_err("BOCU-1->u with skip did not match.\n");
1014         }
1015     }
1016 
1017     log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1018     {
1019         const uint8_t sampleText[]={
1020             0x61,                               /* 0  'a' */
1021             0xc0, 0x80,                         /* 1  non-shortest form */
1022             0xc4, 0xb5,                         /* 3  U+0135 */
1023             0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1024             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1025             0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1026             0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1027             0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1028             0x62,                               /* 24 'b' */
1029             0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1030             0xed, 0xa0,                         /* 28 incomplete sequence */
1031             0xd0, 0x80                          /* 30 U+0400 */
1032         };
1033         UChar expected[]={
1034             0x0061,
1035             /* skip */
1036             0x0135,
1037             0xd020,
1038             0xd801, 0xdc01,
1039             0xe000,
1040             0xdc01,
1041             /* skip */
1042             0x0062,
1043             0xd801,
1044             0x0400
1045         };
1046         int32_t offsets[]={
1047             0,
1048             /* skip 1, */
1049             3,
1050             5,
1051             8, 11,
1052             14,
1053             17,
1054             /* skip 20, 20, */
1055             24,
1056             25,
1057             /* skip 28 */
1058             30
1059         };
1060 
1061         /* without offsets */
1062         if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1063                                  expected, ARRAY_LENGTH(expected), "CESU-8",
1064                                  UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1065         ) {
1066             log_err("CESU-8->u with skip did not match.\n");
1067         }
1068 
1069         /* with offsets */
1070         if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1071                                  expected, ARRAY_LENGTH(expected), "CESU-8",
1072                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1073         ) {
1074             log_err("CESU-8->u with skip did not match.\n");
1075         }
1076     }
1077 }
1078 
TestStop(int32_t inputsize,int32_t outputsize)1079 static void TestStop(int32_t inputsize, int32_t outputsize)
1080 {
1081     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1082     static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1083 
1084     static const uint8_t expstopIBM_949[]= {
1085         0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1086 
1087     static const uint8_t expstopIBM_943[] = {
1088         0x9f, 0xaf, 0x9f, 0xb1};
1089 
1090     static const uint8_t expstopIBM_930[] = {
1091         0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1092 
1093     static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1094     static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1095     static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1096 
1097 
1098     static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1099     static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1100     static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1101 
1102     static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1103     static const int32_t  fromIBM943Offs [] = { 0, 2};
1104     static const int32_t  fromIBM930Offs [] = { 1, 3};
1105 
1106     gInBufferSize = inputsize;
1107     gOutBufferSize = outputsize;
1108 
1109     /*From Unicode*/
1110 
1111 #if !UCONFIG_NO_LEGACY_CONVERSION
1112     if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1113             expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1114             UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1115         log_err("u-> ibm-949 with stop did not match.\n");
1116     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1117             expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1118             UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1119         log_err("u-> ibm-943 with stop did not match.\n");
1120     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1121             expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1122             UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1123         log_err("u-> ibm-930 with stop did not match.\n");
1124 
1125     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1126     {
1127         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1128         static const uint8_t toIBM943[]= { 0x61,};
1129         static const int32_t offset[]= {0,} ;
1130 
1131          /*EUC_JP*/
1132         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1133         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1134         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1135 
1136         /*EUC_TW*/
1137         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1138         static const uint8_t to_euc_tw[]={
1139             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1140         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1141 
1142         /*ISO-2022-JP*/
1143         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1144         static const uint8_t to_iso_2022_jp[]={
1145              0x41,
1146 
1147         };
1148         static const int32_t from_iso_2022_jpOffs [] ={0,};
1149 
1150         /*ISO-2022-cn*/
1151         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1152         static const uint8_t to_iso_2022_cn[]={
1153             0x41,
1154 
1155         };
1156         static const int32_t from_iso_2022_cnOffs [] ={
1157             0,0,
1158             2,2,
1159         };
1160 
1161         /*ISO-2022-kr*/
1162         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1163         static const uint8_t to_iso_2022_kr[]={
1164             0x1b,   0x24,   0x29,   0x43,
1165             0x41,
1166             0x0e,   0x25,   0x50,
1167         };
1168         static const int32_t from_iso_2022_krOffs [] ={
1169             -1,-1,-1,-1,
1170              0,
1171             1,1,1,
1172         };
1173 
1174         /* HZ encoding */
1175         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1176 
1177         static const uint8_t to_hz[]={
1178             0x7e,   0x7d, 0x41,
1179             0x7e,   0x7b,   0x26,   0x30,
1180 
1181         };
1182         static const int32_t from_hzOffs [] ={
1183             0, 0,0,
1184             1,1,1,1,
1185         };
1186 
1187         /*ISCII*/
1188         static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1189         static const uint8_t to_iscii[]={
1190             0x41,
1191         };
1192         static const int32_t from_isciiOffs [] ={
1193             0,
1194         };
1195 
1196         if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1197                 toIBM943, sizeof(toIBM943), "ibm-943",
1198                 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1199             log_err("u-> ibm-943 with stop did not match.\n");
1200 
1201         if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1202                 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1203                 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1204             log_err("u-> euc-jp with stop did not match.\n");
1205 
1206         if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1207                 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1208                 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1209             log_err("u-> euc-tw with stop did not match.\n");
1210 
1211         if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1212                 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1213                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1214             log_err("u-> iso-2022-jp with stop did not match.\n");
1215 
1216         if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1217                 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1218                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1219             log_err("u-> iso-2022-jp with stop did not match.\n");
1220 
1221         if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1222                 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1223                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1224             log_err("u-> iso-2022-cn with stop did not match.\n");
1225 
1226         if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1227                 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1228                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1229             log_err("u-> iso-2022-kr with stop did not match.\n");
1230 
1231         if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1232                 to_hz, sizeof(to_hz), "HZ",
1233                 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1234             log_err("u-> HZ with stop did not match.\n");\
1235 
1236         if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1237                 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1238                 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1239             log_err("u-> iscii with stop did not match.\n");
1240 
1241 
1242     }
1243 #endif
1244 
1245     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1246     {
1247         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1248 
1249         static const uint8_t to_SCSU[]={
1250             0x41,
1251 
1252         };
1253         int32_t from_SCSUOffs [] ={
1254             0,
1255 
1256         };
1257         if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1258                 to_SCSU, sizeof(to_SCSU), "SCSU",
1259                 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1260             log_err("u-> SCSU with skip did not match.\n");
1261 
1262     }
1263 
1264     /*to Unicode*/
1265 
1266 #if !UCONFIG_NO_LEGACY_CONVERSION
1267     if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1268              IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1269             UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1270         log_err("ibm-949->u with stop did not match.\n");
1271     if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1272              IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1273             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1274         log_err("ibm-943->u with stop did not match.\n");
1275     if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1276              IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1277             UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1278         log_err("ibm-930->u with stop did not match.\n");
1279 
1280     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1281     {
1282 
1283         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1284             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1285         };
1286         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1287         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1288 
1289 
1290          /*EUC-JP*/
1291         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1292             0x8f, 0xda, 0xa1,  /*unassigned*/
1293            0x8e, 0xe0,
1294         };
1295         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1296         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1297 
1298           /*EUC_TW*/
1299         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1300             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1301            0xe6, 0xca, 0x8a,
1302         };
1303         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1304         int32_t from_euc_twOffs [] ={ 0, 1, 3};
1305 
1306 
1307 
1308          if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1309              EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1310             UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1311         log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1312 
1313         if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1314              euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1315             UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1316         log_err("euc-jp->u with stop did not match.\n");
1317 
1318         if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1319                  euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1320                 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1321             log_err("euc-tw->u with stop did not match.\n");
1322     }
1323 #endif
1324 
1325     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1326     {
1327         static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1328             0xe0, 0x80,  0x61,};
1329         static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1330         static const int32_t offsets1[] = {   0x0000, 0x0001};
1331 
1332         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1333                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1334                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1335             log_err("utf8->u with stop did not match.\n");;
1336     }
1337     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1338     {
1339         static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1340         static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1341         static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1342 
1343         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1344                  expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1345                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1346             log_err("scsu->u with stop did not match.\n");;
1347     }
1348 
1349 }
1350 
TestSub(int32_t inputsize,int32_t outputsize)1351 static void TestSub(int32_t inputsize, int32_t outputsize)
1352 {
1353     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1354     static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1355 
1356     static const uint8_t expsubIBM_949[] =
1357      { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1358 
1359     static const uint8_t expsubIBM_943[] = {
1360         0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1361 
1362     static const uint8_t expsubIBM_930[] = {
1363         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1364 
1365     static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1366     static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367     static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1368 
1369     static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1370     static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1371     static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1372 
1373     static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1374     static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1375     static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1376 
1377     gInBufferSize = inputsize;
1378     gOutBufferSize = outputsize;
1379 
1380     /*from unicode*/
1381 
1382 #if !UCONFIG_NO_LEGACY_CONVERSION
1383     if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1384             expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1385             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1386         log_err("u-> ibm-949 with subst did not match.\n");
1387     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1388             expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1389             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1390         log_err("u-> ibm-943 with subst did not match.\n");
1391     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1392             expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1393             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1394         log_err("u-> ibm-930 with subst did not match.\n");
1395 
1396     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1397     {
1398         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1399         static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1400         static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1401 
1402 
1403         /* EUC_JP*/
1404         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1405         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1406             0xf4, 0xfe, 0xf4, 0xfe,
1407             0x61, 0x8e, 0xe0,
1408         };
1409         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1410 
1411         /*EUC_TW*/
1412         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1413         static const uint8_t to_euc_tw[]={
1414             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1415             0xfd, 0xfe, 0xfd, 0xfe,
1416             0x61, 0xe6, 0xca, 0x8a,
1417         };
1418 
1419         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1420 
1421         if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1422                 toIBM943, sizeof(toIBM943), "ibm-943",
1423                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1424             log_err("u-> ibm-943 with substitute did not match.\n");
1425 
1426         if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1427                 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1428                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1429             log_err("u-> euc-jp with substitute did not match.\n");
1430 
1431         if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1432                 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1433                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1434             log_err("u-> euc-tw with substitute did not match.\n");
1435     }
1436 #endif
1437 
1438     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1439     {
1440         UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1441 
1442         const uint8_t to_SCSU[]={
1443             0x41,
1444             0x0e, 0xff,0xfd,
1445             0x42
1446 
1447 
1448         };
1449         int32_t from_SCSUOffs [] ={
1450             0,
1451             1,1,1,
1452             2,
1453 
1454         };
1455         const uint8_t to_SCSU_1[]={
1456             0x41,
1457 
1458         };
1459         int32_t from_SCSUOffs_1 [] ={
1460             0,
1461 
1462         };
1463         if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1464                 to_SCSU, sizeof(to_SCSU), "SCSU",
1465                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1466             log_err("u-> SCSU with substitute did not match.\n");
1467 
1468         if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1469                 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1470                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1471             log_err("u-> SCSU with substitute did not match.\n");
1472     }
1473 
1474     log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1475     {
1476         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1477         static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1478                            0xf0, 0x90, 0x90, 0x81,
1479                            0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1480                            0xef, 0xbf, 0xbf, 0x61,
1481 
1482         };
1483         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1484         if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1485                 expectedUTF8, sizeof(expectedUTF8), "utf8",
1486                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1487             log_err("u-> utf8 with stop did not match.\n");
1488         }
1489     }
1490 
1491     log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1492     {
1493         static const UChar in[]={ 0x0041, 0xfeff };
1494 
1495         static const uint8_t out[]={
1496 #if U_IS_BIG_ENDIAN
1497             0xfe, 0xff,
1498             0x00, 0x41,
1499             0xfe, 0xff
1500 #else
1501             0xff, 0xfe,
1502             0x41, 0x00,
1503             0xff, 0xfe
1504 #endif
1505         };
1506         static const int32_t offsets[]={
1507             -1, -1, 0, 0, 1, 1
1508         };
1509 
1510         if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1511                                    out, sizeof(out), "UTF-16",
1512                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1513         ) {
1514             log_err("u->UTF-16 with substitute did not match.\n");
1515         }
1516     }
1517 
1518     log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1519     {
1520         static const UChar in[]={ 0x0041, 0xfeff };
1521 
1522         static const uint8_t out[]={
1523 #if U_IS_BIG_ENDIAN
1524             0x00, 0x00, 0xfe, 0xff,
1525             0x00, 0x00, 0x00, 0x41,
1526             0x00, 0x00, 0xfe, 0xff
1527 #else
1528             0xff, 0xfe, 0x00, 0x00,
1529             0x41, 0x00, 0x00, 0x00,
1530             0xff, 0xfe, 0x00, 0x00
1531 #endif
1532         };
1533         static const int32_t offsets[]={
1534             -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1535         };
1536 
1537         if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1538                                    out, sizeof(out), "UTF-32",
1539                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1540         ) {
1541             log_err("u->UTF-32 with substitute did not match.\n");
1542         }
1543     }
1544 
1545     /*to unicode*/
1546 
1547 #if !UCONFIG_NO_LEGACY_CONVERSION
1548     if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1549              IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1550             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1551         log_err("ibm-949->u with substitute did not match.\n");
1552     if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1553              IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1554             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1555         log_err("ibm-943->u with substitute did not match.\n");
1556     if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1557              IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1558             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1559         log_err("ibm-930->u with substitute did not match.\n");
1560 
1561     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1562     {
1563 
1564         const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1565             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1566         };
1567         UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1568         };
1569         int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1570 
1571 
1572         /* EUC_JP*/
1573         const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1574             0x8f, 0xda, 0xa1,  /*unassigned*/
1575            0x8e, 0xe0, 0x8a
1576         };
1577         UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1578         int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1579 
1580         /*EUC_TW*/
1581         const uint8_t sampleTxt_euc_tw[]={
1582             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1583             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1584             0xe6, 0xca, 0x8a,
1585         };
1586         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1587         int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1588 
1589 
1590         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1591            EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1592           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1593             log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1594 
1595 
1596         if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1597            euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1598           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1599             log_err("euc-jp->u with substitute did not match.\n");
1600 
1601 
1602         if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1603            euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1604           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1605             log_err("euc-tw->u with substitute  did not match.\n");
1606 
1607 
1608         if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1609            euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1610           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1611             log_err("euc-jp->u with substitute did not match.\n");
1612     }
1613 #endif
1614 
1615     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1616     {
1617         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1618             0xe0, 0x80,  0x61,};
1619         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0x0061};
1620         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0006};
1621 
1622         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1623                  expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1624                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1625             log_err("utf8->u with substitute did not match.\n");;
1626     }
1627     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1628     {
1629         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1630         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1631         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1632 
1633         if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1634                  expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1635                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1636             log_err("scsu->u with stop did not match.\n");;
1637     }
1638 
1639 #if !UCONFIG_NO_LEGACY_CONVERSION
1640     log_verbose("Testing ibm-930 subchar/subchar1\n");
1641     {
1642         static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1643         static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1644         static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1645 
1646         static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1647         static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1648         static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1649 
1650         if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1651                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1652         ) {
1653             log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1654         }
1655 
1656         if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1657                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1658         ) {
1659             log_err("ibm-930->u subchar/subchar1 did not match.\n");
1660         }
1661     }
1662 
1663     log_verbose("Testing GB 18030 with substitute callbacks\n");
1664     {
1665         static const UChar u2[]={
1666             0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1667         static const uint8_t gb2[]={
1668             0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1669         static const int32_t offsets2[]={
1670             0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1671 
1672         if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1673                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1674         ) {
1675             log_err("gb18030->u with substitute did not match.\n");
1676         }
1677     }
1678 #endif
1679 
1680     log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1681     {
1682         static const uint8_t utf7[]={
1683          /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1684             0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1685         };
1686         static const UChar unicode[]={
1687             0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1688         };
1689         static const int32_t offsets[]={
1690             0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1691         };
1692 
1693         if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1694                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1695         ) {
1696             log_err("UTF-7->u with substitute did not match.\n");
1697         }
1698     }
1699 
1700     log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1701     {
1702         static const uint8_t
1703             in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1704             in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1705             in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1706 
1707         static const UChar
1708             out1[]={ 0x4e00, 0xfeff },
1709             out2[]={ 0x004e, 0xfffe },
1710             out3[]={ 0xfefd, 0x4e00, 0xfeff };
1711 
1712         static const int32_t
1713             offsets1[]={ 2, 4 },
1714             offsets2[]={ 2, 4 },
1715             offsets3[]={ 0, 2, 4 };
1716 
1717         if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1718                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1719         ) {
1720             log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1721         }
1722 
1723         if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1724                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1725         ) {
1726             log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1727         }
1728 
1729         if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1730                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1731         ) {
1732             log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1733         }
1734     }
1735 
1736     log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1737     {
1738         static const uint8_t
1739             in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1740             in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1741             in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1742             in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1743 
1744         static const UChar
1745             out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1746             out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1747             out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1748             out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1749 
1750         static const int32_t
1751             offsets1[]={ 4, 4, 8 },
1752             offsets2[]={ 4, 4, 8 },
1753             offsets3[]={ 0, 4, 4, 8, 12 },
1754             offsets4[]={ 0, 0, 4, 8 };
1755 
1756         if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1757                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1758         ) {
1759             log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1760         }
1761 
1762         if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1763                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1764         ) {
1765             log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1766         }
1767 
1768         if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1769                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1770         ) {
1771             log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1772         }
1773 
1774         if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1775                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1776         ) {
1777             log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1778         }
1779     }
1780 }
1781 
TestSubWithValue(int32_t inputsize,int32_t outputsize)1782 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1783 {
1784     UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1785     UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1786 
1787     const uint8_t expsubwvalIBM_949[]= {
1788         0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1789         0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1790 
1791     const uint8_t expsubwvalIBM_943[]= {
1792         0x9f, 0xaf, 0x9f, 0xb1,
1793         0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1794 
1795     const uint8_t expsubwvalIBM_930[] = {
1796         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1797 
1798     int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1799     int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1800     int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1801 
1802     gInBufferSize = inputsize;
1803     gOutBufferSize = outputsize;
1804 
1805     /*from Unicode*/
1806 
1807 #if !UCONFIG_NO_LEGACY_CONVERSION
1808     if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1809             expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1810             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1811         log_err("u-> ibm-949 with subst with value did not match.\n");
1812 
1813     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1814             expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1815             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1816         log_err("u-> ibm-943 with sub with value did not match.\n");
1817 
1818     if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1819             expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1820             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1821         log_err("u-> ibm-930 with subst with value did not match.\n");
1822 
1823 
1824     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1825     {
1826         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1827         static const uint8_t toIBM943[]= { 0x61,
1828             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1829             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1830             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1831             0x61 };
1832         static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1833 
1834 
1835          /* EUC_JP*/
1836         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1837         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1838             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1840             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841             0x61, 0x8e, 0xe0,
1842         };
1843         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1844             3, 3, 3, 3, 3, 3,
1845             3, 3, 3, 3, 3, 3,
1846             5, 5, 5, 5, 5, 5,
1847             6, 7, 7,
1848         };
1849 
1850         /*EUC_TW*/
1851         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1852         static const uint8_t to_euc_tw[]={
1853             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1854             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1856             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1857             0x61, 0xe6, 0xca, 0x8a,
1858         };
1859         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1860              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1861              6, 7, 7, 8,
1862         };
1863         /*ISO-2022-JP*/
1864         static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1865         static const uint8_t to_iso_2022_jp1[]={
1866             0x1b,   0x24,   0x42,   0x21, 0x21,
1867             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1868             0x1b,   0x24,   0x42,   0x21, 0x22,
1869             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1870             0x42,
1871         };
1872 
1873         static const int32_t from_iso_2022_jpOffs1 [] ={
1874             0,0,0,0,0,
1875             1,1,1,1,1,1,1,1,1,
1876             2,2,2,2,2,
1877             3,3,3,3,3,3,3,3,3,
1878             4,
1879         };
1880         /* surrogate pair*/
1881         static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1882         static const uint8_t to_iso_2022_jp2[]={
1883                                 0x1b,   0x24,   0x42,   0x21,   0x21,
1884                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1885                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1886                                 0x1b,   0x24,   0x42,   0x21,   0x22,
1887                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1888                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1889                                 0x42,
1890                                 };
1891         static const int32_t from_iso_2022_jpOffs2 [] ={
1892             0,0,0,0,0,
1893             1,1,1,1,1,1,1,1,1,
1894             1,1,1,1,1,1,
1895             3,3,3,3,3,
1896             4,4,4,4,4,4,4,4,4,
1897             4,4,4,4,4,4,
1898             6,
1899         };
1900 
1901         /*ISO-2022-cn*/
1902         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1903         static const uint8_t to_iso_2022_cn[]={
1904             0x41,
1905             0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1906             0x42,
1907         };
1908         static const int32_t from_iso_2022_cnOffs [] ={
1909             0,
1910             1,1,1,1,1,1,
1911             2,
1912         };
1913 
1914         static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1915 
1916         static const uint8_t to_iso_2022_cn4[]={
1917                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1918                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1919                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1920                              0x0e,   0x21,   0x22,
1921                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1922                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1923                              0x42,
1924                              };
1925         static const int32_t from_iso_2022_cnOffs4 [] ={
1926             0,0,0,0,0,0,0,
1927             1,1,1,1,1,1,1,
1928             1,1,1,1,1,1,
1929             3,3,3,
1930             4,4,4,4,4,4,4,
1931             4,4,4,4,4,4,
1932             6
1933 
1934         };
1935 
1936         /*ISO-2022-kr*/
1937         static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1938         static const uint8_t to_iso_2022_kr2[]={
1939             0x1b,   0x24,   0x29,   0x43,
1940             0x41,
1941             0x0e,   0x25,   0x50,
1942             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1943             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1944             0x0e,   0x25,   0x50,
1945             0x0f,   0x42,
1946             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1947             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1948             0x43
1949         };
1950         static const int32_t from_iso_2022_krOffs2 [] ={
1951             -1,-1,-1,-1,
1952              0,
1953             1,1,1,
1954             2,2,2,2,2,2,2,
1955             2,2,2,2,2,2,
1956             4,4,4,
1957             5,5,
1958             6,6,6,6,6,6,
1959             6,6,6,6,6,6,
1960             8,
1961         };
1962 
1963         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1964         static const uint8_t to_iso_2022_kr[]={
1965             0x1b,   0x24,   0x29,   0x43,
1966             0x41,
1967             0x0e,   0x25,   0x50,
1968             0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1969             0x0e,   0x25,   0x50,
1970             0x0f,   0x42,
1971             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1972             0x43
1973         };
1974 
1975 
1976         static const int32_t from_iso_2022_krOffs [] ={
1977             -1,-1,-1,-1,
1978              0,
1979             1,1,1,
1980             2,2,2,2,2,2,2,
1981             3,3,3,
1982             4,4,
1983             5,5,5,5,5,5,
1984             6,
1985         };
1986         /* HZ encoding */
1987         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1988 
1989         static const uint8_t to_hz[]={
1990             0x7e,   0x7d,   0x41,
1991             0x7e,   0x7b,   0x26,   0x30,
1992             0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1993             0x7e,   0x7b,   0x26,   0x30,
1994             0x7e,   0x7d,   0x42,
1995 
1996         };
1997         static const int32_t from_hzOffs [] ={
1998             0,0,0,
1999             1,1,1,1,
2000             2,2,2,2,2,2,2,2,
2001             3,3,3,3,
2002             4,4,4
2003         };
2004 
2005         static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2006         static const uint8_t to_hz2[]={
2007             0x7e,   0x7d,   0x41,
2008             0x7e,   0x7b,   0x26,   0x30,
2009             0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2010             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2011             0x7e,   0x7b,   0x26,   0x30,
2012             0x7e,   0x7d,   0x42,
2013             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2014             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2015             0x43
2016         };
2017         static const int32_t from_hzOffs2 [] ={
2018             0,0,0,
2019             1,1,1,1,
2020             2,2,2,2,2,2,2,2,
2021             2,2,2,2,2,2,
2022             4,4,4,4,
2023             5,5,5,
2024             6,6,6,6,6,6,
2025             6,6,6,6,6,6,
2026             8,
2027         };
2028 
2029                 /*ISCII*/
2030         static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2031         static const uint8_t to_iscii[]={
2032             0x41,
2033             0xef,   0x42,   0xa1,
2034             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2035             0xa2,
2036             0x42,
2037             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2038             0x43
2039         };
2040 
2041 
2042         static const int32_t from_isciiOffs [] ={
2043             0,
2044             1,1,1,
2045             2,2,2,2,2,2,
2046             3,
2047             4,
2048             5,5,5,5,5,5,
2049             6,
2050         };
2051 
2052         if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2053                 toIBM943, sizeof(toIBM943), "ibm-943",
2054                 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2055             log_err("u-> ibm-943 with subst with value did not match.\n");
2056 
2057         if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2058                 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
2059                 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2060             log_err("u-> euc-jp with subst with value did not match.\n");
2061 
2062         if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2063                 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2064                 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2065             log_err("u-> euc-tw with subst with value did not match.\n");
2066 
2067         if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2068                 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2069                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2070             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2071 
2072         if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2073                 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2074                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2075             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2076 
2077         if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2078                 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2079                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2080             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2081         /*ESCAPE OPTIONS*/
2082         {
2083             /* surrogate pair*/
2084             static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2085             static const uint8_t to_iso_2022_jp3_v2[]={
2086                     0x1b,   0x24,   0x42,   0x21,   0x21,
2087                     0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2088 
2089                     0x1b,   0x24,   0x42,   0x21,   0x22,
2090                     0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2091 
2092                     0x42,
2093                     0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2094                     };
2095 
2096             static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2097                 0,0,0,0,0,
2098                 1,1,1,1,1,1,1,1,1,1,1,1,
2099 
2100                 3,3,3,3,3,
2101                 4,4,4,4,4,4,4,4,4,4,4,4,
2102 
2103                 6,
2104                 7,7,7,7,7,7,7,7,7
2105             };
2106 
2107             if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2108                     to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2109                     UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2110                 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2111         }
2112         {
2113             static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2114             static const uint8_t to_iso_2022_cn5_v2[]={
2115                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2116                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2117                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2118                              0x0e,   0x21,   0x22,
2119                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2120                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2121                              0x42,
2122                              0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2123                              };
2124             static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2125                 0,0,0,0,0,0,0,
2126                 1,1,1,1,1,1,1,
2127                 1,1,1,1,1,1,
2128                 3,3,3,
2129                 4,4,4,4,4,4,4,
2130                 4,4,4,4,4,4,
2131                 6,
2132                 7,7,7,7,7,7
2133             };
2134             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2135                 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2136                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2137                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2138 
2139         }
2140         {
2141             static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2142             static const uint8_t to_iso_2022_cn6_v2[]={
2143                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2144                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2145                                 0x0e,   0x21,   0x22,
2146                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2147                                 0x42,
2148                                 0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2149                              };
2150             static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2151                     0,  0,  0,  0,  0,  0,  0,
2152                     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2153                     3,  3,  3,
2154                     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2155                     6,
2156                     7,  7,  7,  7,  7,  7,  7,  7,
2157             };
2158             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2159                 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2160                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2161                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2162 
2163         }
2164         {
2165             static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2166             static const uint8_t to_iso_2022_cn7_v2[]={
2167                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2168                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2169                                 0x0e,   0x21,   0x22,
2170                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2171                                 0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2172                             };
2173             static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2174                                 0,  0,  0,  0,  0,  0,  0,
2175                                 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2176                                 3,  3,  3,
2177                                 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2178                                 6,
2179                                 7,  7,  7,  7,  7,  7,
2180             };
2181             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2182                 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2183                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2184                 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2185 
2186         }
2187         {
2188             static const UChar iso_2022_cn_inputText8[]={
2189                                 0x3000,
2190                                 0xD84D, 0xDC56,
2191                                 0x3001,
2192                                 0xD84D, 0xDC56,
2193                                 0xDBFF, 0xDFFF,
2194                                 0x0042,
2195                                 0x0902};
2196             static const uint8_t to_iso_2022_cn8_v2[]={
2197                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2198                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2199                                 0x0e,   0x21,   0x22,
2200                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2201                                 0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2202                                 0x42,
2203                                 0x5c,   0x39,   0x30,   0x32,   0x20
2204                              };
2205             static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2206                     0,  0,  0,  0,  0,  0,  0,
2207                     1,  1,  1,  1,  1,  1,  1,  1,
2208                     3,  3,  3,
2209                     4,  4,  4,  4,  4,  4,  4,  4,
2210                     6,  6,  6,  6,  6,  6,  6,  6,
2211                     8,
2212                     9,  9,  9,  9,  9
2213             };
2214             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2215                 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2216                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2217                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2218 
2219         }
2220         {
2221             static const uint8_t to_iso_2022_cn4_v3[]={
2222                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2223                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2224                             0x0e,   0x21,   0x22,
2225                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2226                             0x42
2227                              };
2228 
2229 
2230             static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2231                 0,0,0,0,0,0,0,
2232                 1,1,1,1,1,1,1,1,1,1,1,
2233 
2234                 3,3,3,
2235                 4,4,4,4,4,4,4,4,4,4,4,
2236 
2237                 6
2238 
2239             };
2240             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2241                 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2242                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2243             {
2244                 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2245             }
2246         }
2247         if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2248                 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2249                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2250             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2251 
2252         if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2253                 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2254                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2255             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2256         if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2257                 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2258                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2259             log_err("u-> iso_2022_kr with subst with value did not match.\n");
2260         if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2261                 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2262                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2263             log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2264         if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2265                 to_hz, sizeof(to_hz), "HZ",
2266                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2267             log_err("u-> hz with subst with value did not match.\n");
2268         if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2269                 to_hz2, sizeof(to_hz2), "HZ",
2270                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2271             log_err("u-> hz with subst with value did not match.\n");
2272 
2273         if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2274                 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2275                 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2276             log_err("u-> iscii with subst with value did not match.\n");
2277     }
2278 #endif
2279 
2280     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2281     /*to Unicode*/
2282     {
2283 #if !UCONFIG_NO_LEGACY_CONVERSION
2284         static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2285             0x81, 0xad, /*unassigned*/
2286             0x89, 0xd3 };
2287         static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2288             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2289             0x7B87};
2290         static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2291 
2292         /* EUC_JP*/
2293         static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2294             0x8f, 0xda, 0xa1,  /*unassigned*/
2295            0x8e, 0xe0,
2296         };
2297         static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2298             0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2299             0x00a2 };
2300         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2301             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2302             9,
2303         };
2304 
2305         /*EUC_TW*/
2306         static const uint8_t sampleTxt_euc_tw[]={
2307             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2308             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2309             0xe6, 0xca, 0x8a,
2310         };
2311         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2312              0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2313              0x8706, 0x8a, };
2314         static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2315              7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2316              11, 13};
2317 
2318         /*iso-2022-jp*/
2319         static const uint8_t sampleTxt_iso_2022_jp[]={
2320             0x1b,   0x28,   0x42,   0x41,
2321             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2322             0x1b,   0x28,   0x42,   0x42,
2323 
2324         };
2325                                                    /*     A    %    X    3    A    %    X    1    A     B    */
2326         static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2327         static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2328 
2329         /*iso-2022-cn*/
2330         static const uint8_t sampleTxt_iso_2022_cn[]={
2331             0x0f,   0x41,   0x44,
2332             0x1B,   0x24,   0x29,   0x47,
2333             0x0E,   0x40,   0x6c, /*unassigned*/
2334             0x0f,   0x42,
2335 
2336         };
2337         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2338         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2339 
2340         /*iso-2022-kr*/
2341         static const uint8_t sampleTxt_iso_2022_kr[]={
2342           0x1b, 0x24, 0x29,  0x43,
2343           0x41,
2344           0x0E, 0x7f, 0x1E,
2345           0x0e, 0x25, 0x50,
2346           0x0f, 0x51,
2347           0x42, 0x43,
2348 
2349         };
2350         static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2351         static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2352 
2353         /*hz*/
2354         static const uint8_t sampleTxt_hz[]={
2355             0x41,
2356             0x7e,   0x7b,   0x26,   0x30,
2357             0x7f,   0x1E, /*unassigned*/
2358             0x26,   0x30,
2359             0x7e,   0x7d,   0x42,
2360             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2361             0x7e,   0x7d,   0x42,
2362         };
2363         static const UChar hztoUnicode[]={
2364             0x41,
2365             0x03a0,
2366             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2367             0x03A0,
2368             0x42,
2369             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370             0x42,};
2371 
2372         static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2373 
2374 
2375         /*iscii*/
2376         static const uint8_t sampleTxt_iscii[]={
2377             0x41,
2378             0x30,
2379             0xEB, /*unassigned*/
2380             0xa3,
2381             0x42,
2382             0xEC, /*unassigned*/
2383             0x42,
2384         };
2385         static const UChar isciitoUnicode[]={
2386             0x41,
2387             0x30,
2388             0x25,  0x58,  0x45, 0x42,
2389             0x0903,
2390             0x42,
2391             0x25,  0x58,  0x45, 0x43,
2392             0x42,};
2393 
2394         static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2395 #endif
2396 
2397         /*UTF8*/
2398         static const uint8_t sampleTxtUTF8[]={
2399             0x20, 0x64, 0x50,
2400             0xC2, 0x7E, /* truncated char */
2401             0x20,
2402             0xE0, 0xB5, 0x7E, /* truncated char */
2403             0x40,
2404         };
2405         static const UChar UTF8ToUnicode[]={
2406             0x0020, 0x0064, 0x0050,
2407             0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2408             0x0020,
2409             0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2410             0x0040
2411         };
2412         static const int32_t fromUTF8[] = {
2413             0, 1, 2,
2414             3, 3, 3, 3, 4,
2415             5,
2416             6, 6, 6, 6, 6, 6, 6, 6, 8,
2417             9
2418         };
2419         static const UChar UTF8ToUnicodeXML_DEC[]={
2420             0x0020, 0x0064, 0x0050,
2421             0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2422             0x0020,
2423             0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2424             0x0040
2425         };
2426         static const int32_t fromUTF8XML_DEC[] = {
2427             0, 1, 2,
2428             3, 3, 3, 3, 3, 3, 4,
2429             5,
2430             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2431             9
2432         };
2433 
2434 
2435 #if !UCONFIG_NO_LEGACY_CONVERSION
2436         if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2437                  IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2438                 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2439             log_err("ibm-943->u with substitute with value did not match.\n");
2440 
2441         if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2442                  EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
2443                 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2444             log_err("euc-jp->u with substitute with value did not match.\n");
2445 
2446         if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2447                  euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2448                 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2449             log_err("euc-tw->u with substitute with value did not match.\n");
2450 
2451         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2452                  iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2453                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2454             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2455 
2456         if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2457                  iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2458                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2459             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2460 
2461         {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2462             {
2463                 static const UChar iso_2022_jptoUnicodeDec[]={
2464                                                   0x0041,
2465                                                   /*   &         #         5         8         ;   */
2466                                                   0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2467                                                   0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2468                                                   0x0042 };
2469                 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2470                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2471                      iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2472                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2473                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2474             }
2475             {
2476                 static const UChar iso_2022_jptoUnicodeHex[]={
2477                                                   0x0041,
2478                                                   /*   &       #       x       3       A       ;  */
2479                                                   0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2480                                                   0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2481                                                   0x0042 };
2482                 static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2483                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2484                      iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2485                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2486                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2487             }
2488             {
2489                 static const UChar iso_2022_jptoUnicodeC[]={
2490                                                 0x0041,
2491                                                 0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2492                                                 0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2493                                                 0x0042 };
2494                 int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2495                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2496                      iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2497                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2498                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2499             }
2500         }
2501         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2502                  iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2503                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2504             log_err("iso-2022-cn->u with substitute with value did not match.\n");
2505 
2506         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2507                  iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2508                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2509             log_err("iso-2022-kr->u with substitute with value did not match.\n");
2510 
2511          if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2512                  hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2513                 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2514             log_err("hz->u with substitute with value did not match.\n");
2515 
2516          if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2517                  isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2518                 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2519             log_err("ISCII ->u with substitute with value did not match.\n");
2520 #endif
2521 
2522         if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2523                 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2524                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2525             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2526         if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2527                 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2528                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2529             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2530     }
2531 }
2532 
2533 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2534 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2535 {
2536     static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2537     static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2538     static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2539 
2540 
2541     static const uint8_t text943[] = {
2542         0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2543     static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2544     static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2545     static const UChar toUnicode943stop[]= { 0x304b};
2546 
2547     static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2548     static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2549     static const int32_t  fromIBM943Offsstop[] = { 0};
2550 
2551     gInBufferSize = inputsize;
2552     gOutBufferSize = outputsize;
2553     /*checking with a legal value*/
2554     if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2555             templegal949, sizeof(templegal949), "ibm-949",
2556             UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2557         log_err("u-> ibm-949 with skip did not match.\n");
2558 
2559     /*checking illegal value for ibm-943 with substitute*/
2560     if(!testConvertToUnicode(text943, sizeof(text943),
2561              toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2562             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2563         log_err("ibm-943->u with subst did not match.\n");
2564     /*checking illegal value for ibm-943 with skip */
2565     if(!testConvertToUnicode(text943, sizeof(text943),
2566              toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2567             UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2568         log_err("ibm-943->u with skip did not match.\n");
2569 
2570     /*checking illegal value for ibm-943 with stop */
2571     if(!testConvertToUnicode(text943, sizeof(text943),
2572              toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2573             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2574         log_err("ibm-943->u with stop did not match.\n");
2575 
2576 }
2577 
TestSingleByte(int32_t inputsize,int32_t outputsize)2578 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2579 {
2580     static const uint8_t sampleText[] = {
2581         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2582         0xff, 0x32, 0x33};
2583     static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2584     static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2585     /*checking illegal value for ibm-943 with substitute*/
2586     gInBufferSize = inputsize;
2587     gOutBufferSize = outputsize;
2588 
2589     if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2590              toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2591             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2592         log_err("ibm-943->u with subst did not match.\n");
2593 }
2594 
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2595 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2596 {
2597     /*EBCDIC_STATEFUL*/
2598     static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2599     static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2600     static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2601 /*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2602 
2603     /*EBCDIC_STATEFUL with subChar=3f*/
2604     static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2605     static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2606     static const char mySubChar[]={ 0x3f};
2607 
2608     gInBufferSize = inputsize;
2609     gOutBufferSize = outputsize;
2610 
2611     if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2612         toIBM930, sizeof(toIBM930), "ibm-930",
2613         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2614             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2615 
2616     if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2617         toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2618         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2619             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2620 }
2621 #endif
2622 
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2623 UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2624                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2625                 const char *mySubChar, int8_t len)
2626 {
2627 
2628 
2629     UErrorCode status = U_ZERO_ERROR;
2630     UConverter *conv = 0;
2631     char junkout[NEW_MAX_BUFFER]; /* FIX */
2632     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2633     const UChar *src;
2634     char *end;
2635     char *targ;
2636     int32_t *offs;
2637     int i;
2638     int32_t  realBufferSize;
2639     char *realBufferEnd;
2640     const UChar *realSourceEnd;
2641     const UChar *sourceLimit;
2642     UBool checkOffsets = TRUE;
2643     UBool doFlush;
2644     char junk[9999];
2645     char offset_str[9999];
2646     char *p;
2647     UConverterFromUCallback oldAction = NULL;
2648     const void* oldContext = NULL;
2649 
2650 
2651     for(i=0;i<NEW_MAX_BUFFER;i++)
2652         junkout[i] = (char)0xF0;
2653     for(i=0;i<NEW_MAX_BUFFER;i++)
2654         junokout[i] = 0xFF;
2655     setNuConvTestName(codepage, "FROM");
2656 
2657     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2658             gOutBufferSize);
2659 
2660     conv = ucnv_open(codepage, &status);
2661     if(U_FAILURE(status))
2662     {
2663         log_data_err("Couldn't open converter %s\n",codepage);
2664         return TRUE;
2665     }
2666 
2667     log_verbose("Converter opened..\n");
2668 
2669     /*----setting the callback routine----*/
2670     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2671     if (U_FAILURE(status))
2672     {
2673         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2674     }
2675     /*------------------------*/
2676     /*setting the subChar*/
2677     if(mySubChar != NULL){
2678         ucnv_setSubstChars(conv, mySubChar, len, &status);
2679         if (U_FAILURE(status))  {
2680             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2681         }
2682     }
2683     /*------------*/
2684 
2685     src = source;
2686     targ = junkout;
2687     offs = junokout;
2688 
2689     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2690     realBufferEnd = junkout + realBufferSize;
2691     realSourceEnd = source + sourceLen;
2692 
2693     if ( gOutBufferSize != realBufferSize )
2694       checkOffsets = FALSE;
2695 
2696     if( gInBufferSize != NEW_MAX_BUFFER )
2697       checkOffsets = FALSE;
2698 
2699     do
2700     {
2701         end = nct_min(targ + gOutBufferSize, realBufferEnd);
2702         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2703 
2704         doFlush = (UBool)(sourceLimit == realSourceEnd);
2705 
2706         if(targ == realBufferEnd)
2707         {
2708             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2709             return FALSE;
2710         }
2711         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2712 
2713 
2714         status = U_ZERO_ERROR;
2715 
2716         ucnv_fromUnicode (conv,
2717                   (char **)&targ,
2718                   (const char *)end,
2719                   &src,
2720                   sourceLimit,
2721                   checkOffsets ? offs : NULL,
2722                   doFlush, /* flush if we're at the end of the input data */
2723                   &status);
2724     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2725 
2726 
2727     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2728         UChar errChars[50]; /* should be sufficient */
2729         int8_t errLen = 50;
2730         UErrorCode err = U_ZERO_ERROR;
2731         const UChar* start= NULL;
2732         ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2733         if(U_FAILURE(err)){
2734             log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2735         }
2736         /* length of in invalid chars should be equal to returned length*/
2737         start = src - errLen;
2738         if(u_strncmp(errChars,start,errLen)!=0){
2739             log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2740         }
2741     }
2742     /* allow failure codes for the stop callback */
2743     if(U_FAILURE(status) &&
2744        (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2745     {
2746         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2747         return FALSE;
2748     }
2749 
2750     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2751         sourceLen, targ-junkout);
2752     if(getTestOption(VERBOSITY_OPTION))
2753     {
2754 
2755         junk[0] = 0;
2756         offset_str[0] = 0;
2757         for(p = junkout;p<targ;p++)
2758         {
2759             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2760             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2761         }
2762 
2763         log_verbose(junk);
2764         printSeq(expect, expectLen);
2765         if ( checkOffsets )
2766         {
2767             log_verbose("\nOffsets:");
2768             log_verbose(offset_str);
2769         }
2770         log_verbose("\n");
2771     }
2772     ucnv_close(conv);
2773 
2774 
2775     if(expectLen != targ-junkout)
2776     {
2777         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2778         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2779         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2780         printSeqErr(expect, expectLen);
2781         return FALSE;
2782     }
2783 
2784     if (checkOffsets && (expectOffsets != 0) )
2785     {
2786         log_verbose("comparing %d offsets..\n", targ-junkout);
2787         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2788             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2789             log_err("Got Output : ");
2790             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2791             log_err("Got Offsets:      ");
2792             for(p=junkout;p<targ;p++)
2793                 log_err("%d,", junokout[p-junkout]);
2794             log_err("\n");
2795             log_err("Expected Offsets: ");
2796             for(i=0; i<(targ-junkout); i++)
2797                 log_err("%d,", expectOffsets[i]);
2798             log_err("\n");
2799             return FALSE;
2800         }
2801     }
2802 
2803     if(!memcmp(junkout, expect, expectLen))
2804     {
2805         log_verbose("String matches! %s\n", gNuConvTestName);
2806         return TRUE;
2807     }
2808     else
2809     {
2810         log_err("String does not match. %s\n", gNuConvTestName);
2811         log_err("source: ");
2812         printUSeqErr(source, sourceLen);
2813         log_err("Got:      ");
2814         printSeqErr((const uint8_t *)junkout, expectLen);
2815         log_err("Expected: ");
2816         printSeqErr(expect, expectLen);
2817         return FALSE;
2818     }
2819 }
2820 
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2821 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2822                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2823                const char *mySubChar, int8_t len)
2824 {
2825     UErrorCode status = U_ZERO_ERROR;
2826     UConverter *conv = 0;
2827     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2828     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2829     const char *src;
2830     const char *realSourceEnd;
2831     const char *srcLimit;
2832     UChar *targ;
2833     UChar *end;
2834     int32_t *offs;
2835     int i;
2836     UBool   checkOffsets = TRUE;
2837     char junk[9999];
2838     char offset_str[9999];
2839     UChar *p;
2840     UConverterToUCallback oldAction = NULL;
2841     const void* oldContext = NULL;
2842 
2843     int32_t   realBufferSize;
2844     UChar *realBufferEnd;
2845 
2846 
2847     for(i=0;i<NEW_MAX_BUFFER;i++)
2848         junkout[i] = 0xFFFE;
2849 
2850     for(i=0;i<NEW_MAX_BUFFER;i++)
2851         junokout[i] = -1;
2852 
2853     setNuConvTestName(codepage, "TO");
2854 
2855     log_verbose("\n=========  %s\n", gNuConvTestName);
2856 
2857     conv = ucnv_open(codepage, &status);
2858     if(U_FAILURE(status))
2859     {
2860         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2861         return TRUE;
2862     }
2863 
2864     log_verbose("Converter opened..\n");
2865 
2866     src = (const char *)source;
2867     targ = junkout;
2868     offs = junokout;
2869 
2870     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2871     realBufferEnd = junkout + realBufferSize;
2872     realSourceEnd = src + sourcelen;
2873     /*----setting the callback routine----*/
2874     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2875     if (U_FAILURE(status))
2876     {
2877         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2878     }
2879     /*-------------------------------------*/
2880     /*setting the subChar*/
2881     if(mySubChar != NULL){
2882         ucnv_setSubstChars(conv, mySubChar, len, &status);
2883         if (U_FAILURE(status))  {
2884             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2885         }
2886     }
2887     /*------------*/
2888 
2889 
2890     if ( gOutBufferSize != realBufferSize )
2891         checkOffsets = FALSE;
2892 
2893     if( gInBufferSize != NEW_MAX_BUFFER )
2894         checkOffsets = FALSE;
2895 
2896     do
2897     {
2898         end = nct_min( targ + gOutBufferSize, realBufferEnd);
2899         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2900 
2901         if(targ == realBufferEnd)
2902         {
2903             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2904             return FALSE;
2905         }
2906         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2907 
2908 
2909 
2910         status = U_ZERO_ERROR;
2911 
2912         ucnv_toUnicode (conv,
2913                 &targ,
2914                 end,
2915                 (const char **)&src,
2916                 (const char *)srcLimit,
2917                 checkOffsets ? offs : NULL,
2918                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2919                 &status);
2920     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2921 
2922     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2923         char errChars[50]; /* should be sufficient */
2924         int8_t errLen = 50;
2925         UErrorCode err = U_ZERO_ERROR;
2926         const char* start= NULL;
2927         ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2928         if(U_FAILURE(err)){
2929             log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2930         }
2931         /* length of in invalid chars should be equal to returned length*/
2932         start = src - errLen;
2933         if(uprv_strncmp(errChars,start,errLen)!=0){
2934             log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2935         }
2936     }
2937     /* allow failure codes for the stop callback */
2938     if(U_FAILURE(status) &&
2939        (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2940     {
2941         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2942         return FALSE;
2943     }
2944 
2945     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2946         sourcelen, targ-junkout);
2947     if(getTestOption(VERBOSITY_OPTION))
2948     {
2949 
2950         junk[0] = 0;
2951         offset_str[0] = 0;
2952 
2953         for(p = junkout;p<targ;p++)
2954         {
2955             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2956             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2957         }
2958 
2959         log_verbose(junk);
2960         printUSeq(expect, expectlen);
2961         if ( checkOffsets )
2962         {
2963             log_verbose("\nOffsets:");
2964             log_verbose(offset_str);
2965         }
2966         log_verbose("\n");
2967     }
2968     ucnv_close(conv);
2969 
2970     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2971 
2972     if (checkOffsets && (expectOffsets != 0))
2973     {
2974         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2975         {
2976             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2977             log_err("Got offsets:      ");
2978             for(p=junkout;p<targ;p++)
2979                 log_err("  %2d,", junokout[p-junkout]);
2980             log_err("\n");
2981             log_err("Expected offsets: ");
2982             for(i=0; i<(targ-junkout); i++)
2983                 log_err("  %2d,", expectOffsets[i]);
2984             log_err("\n");
2985             log_err("Got output:       ");
2986             for(i=0; i<(targ-junkout); i++)
2987                 log_err("0x%04x,", junkout[i]);
2988             log_err("\n");
2989             log_err("From source:      ");
2990             for(i=0; i<(src-(const char *)source); i++)
2991                 log_err("  0x%02x,", (unsigned char)source[i]);
2992             log_err("\n");
2993         }
2994     }
2995 
2996     if(!memcmp(junkout, expect, expectlen*2))
2997     {
2998         log_verbose("Matches!\n");
2999         return TRUE;
3000     }
3001     else
3002     {
3003         log_err("String does not match. %s\n", gNuConvTestName);
3004         log_verbose("String does not match. %s\n", gNuConvTestName);
3005         log_err("Got:      ");
3006         printUSeqErr(junkout, expectlen);
3007         log_err("Expected: ");
3008         printUSeqErr(expect, expectlen);
3009         log_err("\n");
3010         return FALSE;
3011     }
3012 }
3013 
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3014 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3015                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3016                 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3017 {
3018 
3019 
3020     UErrorCode status = U_ZERO_ERROR;
3021     UConverter *conv = 0;
3022     char junkout[NEW_MAX_BUFFER]; /* FIX */
3023     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3024     const UChar *src;
3025     char *end;
3026     char *targ;
3027     int32_t *offs;
3028     int i;
3029     int32_t  realBufferSize;
3030     char *realBufferEnd;
3031     const UChar *realSourceEnd;
3032     const UChar *sourceLimit;
3033     UBool checkOffsets = TRUE;
3034     UBool doFlush;
3035     char junk[9999];
3036     char offset_str[9999];
3037     char *p;
3038     UConverterFromUCallback oldAction = NULL;
3039     const void* oldContext = NULL;
3040 
3041 
3042     for(i=0;i<NEW_MAX_BUFFER;i++)
3043         junkout[i] = (char)0xF0;
3044     for(i=0;i<NEW_MAX_BUFFER;i++)
3045         junokout[i] = 0xFF;
3046     setNuConvTestName(codepage, "FROM");
3047 
3048     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3049             gOutBufferSize);
3050 
3051     conv = ucnv_open(codepage, &status);
3052     if(U_FAILURE(status))
3053     {
3054         log_data_err("Couldn't open converter %s\n",codepage);
3055         return TRUE; /* Because the err has already been logged. */
3056     }
3057 
3058     log_verbose("Converter opened..\n");
3059 
3060     /*----setting the callback routine----*/
3061     ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3062     if (U_FAILURE(status))
3063     {
3064         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3065     }
3066     /*------------------------*/
3067     /*setting the subChar*/
3068     if(mySubChar != NULL){
3069         ucnv_setSubstChars(conv, mySubChar, len, &status);
3070         if (U_FAILURE(status))  {
3071             log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3072         }
3073     }
3074     /*------------*/
3075 
3076     src = source;
3077     targ = junkout;
3078     offs = junokout;
3079 
3080     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3081     realBufferEnd = junkout + realBufferSize;
3082     realSourceEnd = source + sourceLen;
3083 
3084     if ( gOutBufferSize != realBufferSize )
3085       checkOffsets = FALSE;
3086 
3087     if( gInBufferSize != NEW_MAX_BUFFER )
3088       checkOffsets = FALSE;
3089 
3090     do
3091     {
3092         end = nct_min(targ + gOutBufferSize, realBufferEnd);
3093         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3094 
3095         doFlush = (UBool)(sourceLimit == realSourceEnd);
3096 
3097         if(targ == realBufferEnd)
3098         {
3099             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3100             return FALSE;
3101         }
3102         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3103 
3104 
3105         status = U_ZERO_ERROR;
3106 
3107         ucnv_fromUnicode (conv,
3108                   (char **)&targ,
3109                   (const char *)end,
3110                   &src,
3111                   sourceLimit,
3112                   checkOffsets ? offs : NULL,
3113                   doFlush, /* flush if we're at the end of the input data */
3114                   &status);
3115     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3116 
3117     /* allow failure codes for the stop callback */
3118     if(U_FAILURE(status) && status != expectedError)
3119     {
3120         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3121         return FALSE;
3122     }
3123 
3124     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3125         sourceLen, targ-junkout);
3126     if(getTestOption(VERBOSITY_OPTION))
3127     {
3128 
3129         junk[0] = 0;
3130         offset_str[0] = 0;
3131         for(p = junkout;p<targ;p++)
3132         {
3133             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3134             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3135         }
3136 
3137         log_verbose(junk);
3138         printSeq(expect, expectLen);
3139         if ( checkOffsets )
3140         {
3141             log_verbose("\nOffsets:");
3142             log_verbose(offset_str);
3143         }
3144         log_verbose("\n");
3145     }
3146     ucnv_close(conv);
3147 
3148 
3149     if(expectLen != targ-junkout)
3150     {
3151         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3152         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3153         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3154         printSeqErr(expect, expectLen);
3155         return FALSE;
3156     }
3157 
3158     if (checkOffsets && (expectOffsets != 0) )
3159     {
3160         log_verbose("comparing %d offsets..\n", targ-junkout);
3161         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3162             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3163             log_err("Got Output : ");
3164             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3165             log_err("Got Offsets:      ");
3166             for(p=junkout;p<targ;p++)
3167                 log_err("%d,", junokout[p-junkout]);
3168             log_err("\n");
3169             log_err("Expected Offsets: ");
3170             for(i=0; i<(targ-junkout); i++)
3171                 log_err("%d,", expectOffsets[i]);
3172             log_err("\n");
3173             return FALSE;
3174         }
3175     }
3176 
3177     if(!memcmp(junkout, expect, expectLen))
3178     {
3179         log_verbose("String matches! %s\n", gNuConvTestName);
3180         return TRUE;
3181     }
3182     else
3183     {
3184         log_err("String does not match. %s\n", gNuConvTestName);
3185         log_err("source: ");
3186         printUSeqErr(source, sourceLen);
3187         log_err("Got:      ");
3188         printSeqErr((const uint8_t *)junkout, expectLen);
3189         log_err("Expected: ");
3190         printSeqErr(expect, expectLen);
3191         return FALSE;
3192     }
3193 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3194 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3195                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3196                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3197 {
3198     UErrorCode status = U_ZERO_ERROR;
3199     UConverter *conv = 0;
3200     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3201     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3202     const char *src;
3203     const char *realSourceEnd;
3204     const char *srcLimit;
3205     UChar *targ;
3206     UChar *end;
3207     int32_t *offs;
3208     int i;
3209     UBool   checkOffsets = TRUE;
3210     char junk[9999];
3211     char offset_str[9999];
3212     UChar *p;
3213     UConverterToUCallback oldAction = NULL;
3214     const void* oldContext = NULL;
3215 
3216     int32_t   realBufferSize;
3217     UChar *realBufferEnd;
3218 
3219 
3220     for(i=0;i<NEW_MAX_BUFFER;i++)
3221         junkout[i] = 0xFFFE;
3222 
3223     for(i=0;i<NEW_MAX_BUFFER;i++)
3224         junokout[i] = -1;
3225 
3226     setNuConvTestName(codepage, "TO");
3227 
3228     log_verbose("\n=========  %s\n", gNuConvTestName);
3229 
3230     conv = ucnv_open(codepage, &status);
3231     if(U_FAILURE(status))
3232     {
3233         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3234         return TRUE;
3235     }
3236 
3237     log_verbose("Converter opened..\n");
3238 
3239     src = (const char *)source;
3240     targ = junkout;
3241     offs = junokout;
3242 
3243     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3244     realBufferEnd = junkout + realBufferSize;
3245     realSourceEnd = src + sourcelen;
3246     /*----setting the callback routine----*/
3247     ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3248     if (U_FAILURE(status))
3249     {
3250         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3251     }
3252     /*-------------------------------------*/
3253     /*setting the subChar*/
3254     if(mySubChar != NULL){
3255         ucnv_setSubstChars(conv, mySubChar, len, &status);
3256         if (U_FAILURE(status))  {
3257             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3258         }
3259     }
3260     /*------------*/
3261 
3262 
3263     if ( gOutBufferSize != realBufferSize )
3264         checkOffsets = FALSE;
3265 
3266     if( gInBufferSize != NEW_MAX_BUFFER )
3267         checkOffsets = FALSE;
3268 
3269     do
3270     {
3271         end = nct_min( targ + gOutBufferSize, realBufferEnd);
3272         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3273 
3274         if(targ == realBufferEnd)
3275         {
3276             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3277             return FALSE;
3278         }
3279         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3280 
3281 
3282 
3283         status = U_ZERO_ERROR;
3284 
3285         ucnv_toUnicode (conv,
3286                 &targ,
3287                 end,
3288                 (const char **)&src,
3289                 (const char *)srcLimit,
3290                 checkOffsets ? offs : NULL,
3291                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3292                 &status);
3293     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3294 
3295     /* allow failure codes for the stop callback */
3296     if(U_FAILURE(status) && status!=expectedError)
3297     {
3298         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3299         return FALSE;
3300     }
3301 
3302     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3303         sourcelen, targ-junkout);
3304     if(getTestOption(VERBOSITY_OPTION))
3305     {
3306 
3307         junk[0] = 0;
3308         offset_str[0] = 0;
3309 
3310         for(p = junkout;p<targ;p++)
3311         {
3312             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3313             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3314         }
3315 
3316         log_verbose(junk);
3317         printUSeq(expect, expectlen);
3318         if ( checkOffsets )
3319         {
3320             log_verbose("\nOffsets:");
3321             log_verbose(offset_str);
3322         }
3323         log_verbose("\n");
3324     }
3325     ucnv_close(conv);
3326 
3327     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3328 
3329     if (checkOffsets && (expectOffsets != 0))
3330     {
3331         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3332         {
3333             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3334             log_err("Got offsets:      ");
3335             for(p=junkout;p<targ;p++)
3336                 log_err("  %2d,", junokout[p-junkout]);
3337             log_err("\n");
3338             log_err("Expected offsets: ");
3339             for(i=0; i<(targ-junkout); i++)
3340                 log_err("  %2d,", expectOffsets[i]);
3341             log_err("\n");
3342             log_err("Got output:       ");
3343             for(i=0; i<(targ-junkout); i++)
3344                 log_err("0x%04x,", junkout[i]);
3345             log_err("\n");
3346             log_err("From source:      ");
3347             for(i=0; i<(src-(const char *)source); i++)
3348                 log_err("  0x%02x,", (unsigned char)source[i]);
3349             log_err("\n");
3350         }
3351     }
3352 
3353     if(!memcmp(junkout, expect, expectlen*2))
3354     {
3355         log_verbose("Matches!\n");
3356         return TRUE;
3357     }
3358     else
3359     {
3360         log_err("String does not match. %s\n", gNuConvTestName);
3361         log_verbose("String does not match. %s\n", gNuConvTestName);
3362         log_err("Got:      ");
3363         printUSeqErr(junkout, expectlen);
3364         log_err("Expected: ");
3365         printUSeqErr(expect, expectlen);
3366         log_err("\n");
3367         return FALSE;
3368     }
3369 }
3370 
TestCallBackFailure(void)3371 static void TestCallBackFailure(void) {
3372     UErrorCode status = U_USELESS_COLLATOR_ERROR;
3373     ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3374     if (status != U_USELESS_COLLATOR_ERROR) {
3375         log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3376     }
3377     ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3378     if (status != U_USELESS_COLLATOR_ERROR) {
3379         log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3380     }
3381     ucnv_cbFromUWriteSub(NULL, -1, &status);
3382     if (status != U_USELESS_COLLATOR_ERROR) {
3383         log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3384     }
3385     ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3386     if (status != U_USELESS_COLLATOR_ERROR) {
3387         log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3388     }
3389 }
3390