• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 *        Name                            Description
14 *    Madhu Katragadda     7/21/1999      Testing error callback routines
15 ********************************************************************************
16 */
17 #include <ctype.h>
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "cintltst.h"
28 #include "unicode/utypes.h"
29 #include "unicode/ustring.h"
30 #include "nccbtst.h"
31 #include "unicode/ucnv_cb.h"
32 #include "unicode/utf16.h"
33 
34 #define NEW_MAX_BUFFER 999
35 
36 #define nct_min(x,y)  ((x<y) ? x : y)
37 
38 static int32_t  gInBufferSize = 0;
39 static int32_t  gOutBufferSize = 0;
40 static char     gNuConvTestName[1024];
41 
printSeq(const uint8_t * a,int len)42 static void printSeq(const uint8_t* a, int len)
43 {
44     int i=0;
45     log_verbose("\n{");
46     while (i<len)
47         log_verbose("0x%02X, ", a[i++]);
48     log_verbose("}\n");
49 }
50 
printUSeq(const UChar * a,int len)51 static void printUSeq(const UChar* a, int len)
52 {
53     int i=0;
54     log_verbose("{");
55     while (i<len)
56         log_verbose("  0x%04x, ", a[i++]);
57     log_verbose("}\n");
58 }
59 
printSeqErr(const uint8_t * a,int len)60 static void printSeqErr(const uint8_t* a, int len)
61 {
62     int i=0;
63     fprintf(stderr, "{");
64     while (i<len)
65         fprintf(stderr, "  0x%02x, ", a[i++]);
66     fprintf(stderr, "}\n");
67 }
68 
printUSeqErr(const UChar * a,int len)69 static void printUSeqErr(const UChar* a, int len)
70 {
71     int i=0;
72     fprintf(stderr, "{");
73     while (i<len)
74         fprintf(stderr, "0x%04x, ", a[i++]);
75     fprintf(stderr,"}\n");
76 }
77 
setNuConvTestName(const char * codepage,const char * direction)78 static void setNuConvTestName(const char *codepage, const char *direction)
79 {
80     snprintf(gNuConvTestName, sizeof(gNuConvTestName), "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
81             codepage,
82             direction,
83             (int)gInBufferSize,
84             (int)gOutBufferSize);
85 }
86 
87 
88 static void TestCallBackFailure(void);
89 
90 void addTestConvertErrorCallBack(TestNode** root);
91 
addTestConvertErrorCallBack(TestNode ** root)92 void addTestConvertErrorCallBack(TestNode** root)
93 {
94     addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
95     addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
96     addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
97     /* BEGIN android-removed
98        To save space, Android does not build complete CJK conversion tables.
99        We skip the test here.
100     addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
101        END android-removed */
102 
103 #if !UCONFIG_NO_LEGACY_CONVERSION
104     addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
105     addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
106 #endif
107 
108     addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
109 }
110 
TestSkipCallBack(void)111 static void TestSkipCallBack(void)
112 {
113     TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
114     TestSkip(1,NEW_MAX_BUFFER);
115     TestSkip(1,1);
116     TestSkip(NEW_MAX_BUFFER, 1);
117 }
118 
TestStopCallBack(void)119 static void TestStopCallBack(void)
120 {
121     TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
122     TestStop(1,NEW_MAX_BUFFER);
123     TestStop(1,1);
124     TestStop(NEW_MAX_BUFFER, 1);
125 }
126 
TestSubCallBack(void)127 static void TestSubCallBack(void)
128 {
129     TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
130     TestSub(1,NEW_MAX_BUFFER);
131     TestSub(1,1);
132     TestSub(NEW_MAX_BUFFER, 1);
133 
134 #if !UCONFIG_NO_LEGACY_CONVERSION
135     TestEBCDIC_STATEFUL_Sub(1, 1);
136     TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
137     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
138     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
139 #endif
140 }
141 
TestSubWithValueCallBack(void)142 static void TestSubWithValueCallBack(void)
143 {
144     TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
145     TestSubWithValue(1,NEW_MAX_BUFFER);
146     TestSubWithValue(1,1);
147     TestSubWithValue(NEW_MAX_BUFFER, 1);
148 }
149 
150 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack(void)151 static void TestLegalAndOtherCallBack(void)
152 {
153     TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
154     TestLegalAndOthers(1,NEW_MAX_BUFFER);
155     TestLegalAndOthers(1,1);
156     TestLegalAndOthers(NEW_MAX_BUFFER, 1);
157 }
158 
TestSingleByteCallBack(void)159 static void TestSingleByteCallBack(void)
160 {
161     TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
162     TestSingleByte(1,NEW_MAX_BUFFER);
163     TestSingleByte(1,1);
164     TestSingleByte(NEW_MAX_BUFFER, 1);
165 }
166 #endif
167 
TestSkip(int32_t inputsize,int32_t outputsize)168 static void TestSkip(int32_t inputsize, int32_t outputsize)
169 {
170     static const uint8_t expskipIBM_949[]= {
171         0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
172 
173     static const uint8_t expskipIBM_943[] = {
174         0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
175 
176     static const uint8_t expskipIBM_930[] = {
177         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
178 
179     gInBufferSize = inputsize;
180     gOutBufferSize = outputsize;
181 
182     /*From Unicode*/
183     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
184 
185 #if !UCONFIG_NO_LEGACY_CONVERSION
186     {
187         static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
188         static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
189 
190         static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
191         static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
192 
193         if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
194                 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
195                 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
196             log_err("u-> ibm-949 with skip did not match.\n");
197         if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
198                 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
199                 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
200             log_err("u-> ibm-943 with skip did not match.\n");
201     }
202 
203     {
204         static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
205         static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
206         static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
207 
208         /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
209         if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
210                                    fromUBytes, UPRV_LENGTHOF(fromUBytes),
211                                    "ibm-930",
212                                    UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
213                                    NULL, 0)
214         ) {
215             log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
216         }
217     }
218 #endif
219 
220     {
221         static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
222         static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
223         static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
224 
225         static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
226         static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
227         static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
228 
229         /* US-ASCII */
230         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
231                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
232                                    "US-ASCII",
233                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
234                                    NULL, 0)
235         ) {
236             log_err("u->US-ASCII with skip did not match.\n");
237         }
238 
239 #if !UCONFIG_NO_LEGACY_CONVERSION
240         /* SBCS NLTC codepage 367 for US-ASCII */
241         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
242                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
243                                    "ibm-367",
244                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
245                                    NULL, 0)
246         ) {
247             log_err("u->ibm-367 with skip did not match.\n");
248         }
249 #endif
250 
251         /* ISO-Latin-1 */
252         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
253                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
254                                    "LATIN_1",
255                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
256                                    NULL, 0)
257         ) {
258             log_err("u->LATIN_1 with skip did not match.\n");
259         }
260 
261 #if !UCONFIG_NO_LEGACY_CONVERSION
262         /* windows-1252 */
263         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
264                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
265                                    "windows-1252",
266                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
267                                    NULL, 0)
268         ) {
269             log_err("u->windows-1252 with skip did not match.\n");
270         }
271     }
272 
273     {
274         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
275         static const uint8_t toIBM943[]= { 0x61, 0x61 };
276         static const int32_t offset[]= {0, 4};
277 
278          /* EUC_JP*/
279         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
280         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
281             0x61, 0x8e, 0xe0,
282         };
283         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
284 
285         /*EUC_TW*/
286         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
287         static const uint8_t to_euc_tw[]={
288             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
289             0x61, 0xe6, 0xca, 0x8a,
290         };
291         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
292 
293         /*ISO-2022-JP*/
294         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
295         static const uint8_t to_iso_2022_jp[]={
296             0x41,
297             0x42,
298 
299         };
300         static const int32_t from_iso_2022_jpOffs [] ={0,2};
301 
302         /*ISO-2022-JP*/
303         UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
304         static const uint8_t to_iso_2022_jp2[]={
305             0x41,
306             0x43,
307 
308         };
309         static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
310 
311         /*ISO-2022-cn*/
312         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
313         static const uint8_t to_iso_2022_cn[]={
314             0x41, 0x42
315         };
316         static const int32_t from_iso_2022_cnOffs [] ={
317             0, 2
318         };
319 
320         /*ISO-2022-CN*/
321         static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
322         static const uint8_t to_iso_2022_cn1[]={
323             0x41, 0x43
324 
325         };
326         static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
327 
328         /*ISO-2022-kr*/
329         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
330         static const uint8_t to_iso_2022_kr[]={
331             0x1b,   0x24,   0x29,   0x43,
332             0x41,
333             0x0e,   0x25,   0x50,
334             0x25,   0x50,
335             0x0f,   0x42,
336         };
337         static const int32_t from_iso_2022_krOffs [] ={
338             -1,-1,-1,-1,
339             0,
340             1,1,1,
341             3,3,
342             4,4
343         };
344 
345         /*ISO-2022-kr*/
346         static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
347         static const uint8_t to_iso_2022_kr1[]={
348             0x1b,   0x24,   0x29,   0x43,
349             0x41,
350             0x0e,   0x25,   0x50,
351             0x25,   0x50,
352 
353         };
354         static const int32_t from_iso_2022_krOffs1 [] ={
355             -1,-1,-1,-1,
356             0,
357             1,1,1,
358             3,3,
359 
360         };
361         /* HZ encoding */
362         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
363 
364         static const uint8_t to_hz[]={
365             0x7e,   0x7d,   0x41,
366             0x7e,   0x7b,   0x26,   0x30,
367             0x26,   0x30,
368             0x7e,   0x7d,   0x42,
369 
370         };
371         static const int32_t from_hzOffs [] ={
372             0,0,0,
373             1,1,1,1,
374             3,3,
375             4,4,4,4
376         };
377 
378         static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
379 
380         static const uint8_t to_hz1[]={
381             0x7e,   0x7d,   0x41,
382             0x7e,   0x7b,   0x26,   0x30,
383             0x26,   0x30,
384 
385 
386         };
387         static const int32_t from_hzOffs1 [] ={
388             0,0,0,
389             1,1,1,1,
390             3,3,
391 
392         };
393 
394 #endif
395 
396         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
397 
398         static const uint8_t to_SCSU[]={
399             0x41,
400             0x42
401 
402 
403         };
404         static const int32_t from_SCSUOffs [] ={
405             0,
406             2,
407 
408         };
409 
410 #if !UCONFIG_NO_LEGACY_CONVERSION
411         /* ISCII */
412         static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
413         static const uint8_t to_iscii[]={
414             0x41,
415             0x42,
416         };
417         static const int32_t from_isciiOffs [] ={
418             0,2,
419 
420         };
421         /*ISCII*/
422         static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
423         static const uint8_t to_iscii1[]={
424             0x44,
425             0x43,
426 
427         };
428         static const int32_t from_isciiOffs1 [] ={0,2};
429 
430         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
431                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
432                 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
433             log_err("u-> ibm-943 with skip did not match.\n");
434 
435         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
436                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
437                 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
438             log_err("u-> euc-jp with skip did not match.\n");
439 
440         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
441                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
442                 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
443             log_err("u-> euc-tw with skip did not match.\n");
444 
445         /*iso_2022_jp*/
446         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
447                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
448                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
449             log_err("u-> iso-2022-jp with skip did not match.\n");
450 
451         /* with context */
452         if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
453                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
454                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
455             log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
456 
457         /*iso_2022_cn*/
458         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
459                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
460                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
461             log_err("u-> iso-2022-cn with skip did not match.\n");
462         /*with context*/
463         if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
464                 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
465                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
466             log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
467 
468         /*iso_2022_kr*/
469         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
470                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
471                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
472             log_err("u-> iso-2022-kr with skip did not match.\n");
473           /*with context*/
474         if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
475                 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
476                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
477             log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
478 
479         /*hz*/
480         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
481                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
482                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
483             log_err("u-> HZ with skip did not match.\n");
484           /*with context*/
485         if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
486                 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
487                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
488             log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
489 #endif
490 
491         /*SCSU*/
492         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
493                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
494                 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
495             log_err("u-> SCSU with skip did not match.\n");
496 
497 #if !UCONFIG_NO_LEGACY_CONVERSION
498         /*ISCII*/
499         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
500                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
501                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
502             log_err("u-> iscii with skip did not match.\n");
503         /*with context*/
504         if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
505                 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
506                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
507             log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
508 #endif
509     }
510 
511     log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
512     {
513         static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
514             0xFB, 0xEE, 0x28,       /* from source offset 0 */
515             0x24, 0x1E, 0x52,
516             0xB2,
517             0x20,
518             0xB3,
519             0xB1,
520             0x0D,
521             0x0A,
522 
523             0x20,                   /* from 8 */
524             0x00,
525             0xD0, 0x6C,
526             0xB6,
527             0xD8, 0xA5,
528             0x20,
529             0x68,
530             0x59,
531 
532             0xF9, 0x28,             /* from 16 */
533             0x6D,
534             0x20,
535             0x73,
536             0xE0, 0x2D,
537             0xDE, 0x43,
538             0xD0, 0x33,
539             0x20,
540 
541             0xFA, 0x83,             /* from 24 */
542             0x25, 0x01,
543             0xFB, 0x16, 0x87,
544             0x4B, 0x16,
545             0x20,
546             0xE6, 0xBD,
547             0xEB, 0x5B,
548             0x4B, 0xCC,
549 
550             0xF9, 0xA2,             /* from 32 */
551             0xFC, 0x10, 0x3E,
552             0xFE, 0x16, 0x3A, 0x8C,
553             0x20,
554             0xFC, 0x03, 0xAC,
555 
556             0x01,                   /* from 41 */
557             0xDE, 0x83,
558             0x20,
559             0x09
560         };
561         static const UChar expected[]={
562             0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
563             0x0063, 0x0061, 0x000D, 0x000A,
564 
565             0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
566             0x0930, 0x0020, 0x0918, 0x0909,
567 
568             0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
569             0x4000, 0x4E00, 0x7777, 0x0020,
570 
571             0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
572             0x0020, 0xD7A3, 0xDC00, 0xD800,
573 
574             0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
575             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
576 
577             0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
578             0x0009
579         };
580         static const int32_t offsets[]={
581             0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
582             8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
583             16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
584             24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
585             32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
586             41, 42, 42, 43, 44
587         };
588 
589         /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
590         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
591                                  sampleText, UPRV_LENGTHOF(sampleText),
592                                  "BOCU-1",
593                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
594         ) {
595             log_err("u->BOCU-1 with skip did not match.\n");
596         }
597     }
598 
599     log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
600     {
601         const uint8_t sampleText[]={
602             0x61,                               /* 'a' */
603             0xc4, 0xb5,                         /* U+0135 */
604             0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
605             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
606             0xee, 0x80, 0x80,                   /* PUA U+e000 */
607             0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
608             0x62,                               /* 'b' */
609             0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
610             0xd0, 0x80                          /* U+0400 */
611         };
612         UChar expected[]={
613             0x0061,
614             0x0135,
615             0xd020,
616             0xd801, 0xdc01,
617             0xe000,
618             0xdc01,
619             0x0062,
620             0xd801,
621             0x0400
622         };
623         int32_t offsets[]={
624             0,
625             1, 1,
626             2, 2, 2,
627             3, 3, 3, 4, 4, 4,
628             5, 5, 5,
629             6, 6, 6,
630             7,
631             8, 8, 8,
632             9, 9
633         };
634 
635         /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
636 
637         /* without offsets */
638         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
639                                  sampleText, UPRV_LENGTHOF(sampleText),
640                                  "CESU-8",
641                                  UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
642         ) {
643             log_err("u->CESU-8 with skip did not match.\n");
644         }
645 
646         /* with offsets */
647         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
648                                  sampleText, UPRV_LENGTHOF(sampleText),
649                                  "CESU-8",
650                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
651         ) {
652             log_err("u->CESU-8 with skip did not match.\n");
653         }
654     }
655 
656     /*to Unicode*/
657     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
658 
659 #if !UCONFIG_NO_LEGACY_CONVERSION
660     {
661 
662         static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
663         static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
664         static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
665 
666         static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
667         static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
668         static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
669 
670         if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
671                  IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
672                 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
673             log_err("ibm-949->u with skip did not match.\n");
674         if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
675                  IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
676                 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
677             log_err("ibm-943->u with skip did not match.\n");
678 
679 
680         if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
681                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
682                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
683             log_err("ibm-930->u with skip did not match.\n");
684 
685 
686         if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
687                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
688                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
689             log_err("ibm-930->u with skip did not match.\n");
690     }
691 #endif
692 
693     {
694         static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
695         static const UChar usasciiToU[] = { 0x61, 0x31 };
696         static const int32_t usasciiToUOffsets[] = { 0, 2 };
697 
698         static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
699         static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
700         static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
701 
702         /* US-ASCII */
703         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
704                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
705                                  "US-ASCII",
706                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
707                                  NULL, 0)
708         ) {
709             log_err("US-ASCII->u with skip did not match.\n");
710         }
711 
712 #if !UCONFIG_NO_LEGACY_CONVERSION
713         /* SBCS NLTC codepage 367 for US-ASCII */
714         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
715                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
716                                  "ibm-367",
717                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
718                                  NULL, 0)
719         ) {
720             log_err("ibm-367->u with skip did not match.\n");
721         }
722 #endif
723 
724         /* ISO-Latin-1 */
725         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
726                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
727                                  "LATIN_1",
728                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
729                                  NULL, 0)
730         ) {
731             log_err("LATIN_1->u with skip did not match.\n");
732         }
733 
734 #if !UCONFIG_NO_LEGACY_CONVERSION
735         /* windows-1252 */
736         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
737                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
738                                  "windows-1252",
739                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
740                                  NULL, 0)
741         ) {
742             log_err("windows-1252->u with skip did not match.\n");
743         }
744 #endif
745     }
746 
747 #if !UCONFIG_NO_LEGACY_CONVERSION
748     {
749         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
750             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
751         };
752         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
753         };
754         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
755 
756 
757          /* euc-jp*/
758         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
759             0x8f, 0xda, 0xa1,  /*unassigned*/
760            0x8e, 0xe0,
761         };
762         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
763         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
764 
765          /*EUC_TW*/
766         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
767             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
768            0xe6, 0xca, 0x8a,
769         };
770         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
771         static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
772                 /*iso-2022-jp*/
773         static const uint8_t sampleTxt_iso_2022_jp[]={
774             0x41,
775             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
776             0x1b,   0x28,   0x42,   0x42,
777 
778         };
779         static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
780         static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
781 
782         /*iso-2022-cn*/
783         static const uint8_t sampleTxt_iso_2022_cn[]={
784             0x0f,   0x41,   0x44,
785             0x1B,   0x24,   0x29,   0x47,
786             0x0E,   0x40,   0x6f, /*unassigned*/
787             0x0f,   0x42,
788 
789         };
790 
791         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
792         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
793 
794         /*iso-2022-kr*/
795         static const uint8_t sampleTxt_iso_2022_kr[]={
796           0x1b, 0x24, 0x29,  0x43,
797           0x41,
798           0x0E, 0x7f, 0x1E,
799           0x0e, 0x25, 0x50,
800           0x0f, 0x51,
801           0x42, 0x43,
802 
803         };
804         static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
805         static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
806 
807         /*hz*/
808         static const uint8_t sampleTxt_hz[]={
809             0x41,
810             0x7e,   0x7b,   0x26,   0x30,
811             0x7f,   0x1E, /*unassigned*/
812             0x26,   0x30,
813             0x7e,   0x7d,   0x42,
814             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
815             0x7e,   0x7d,   0x42,
816         };
817         static const UChar hztoUnicode[]={
818             0x41,
819             0x03a0,
820             0x03A0,
821             0x42,
822             0x42,};
823 
824         static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
825 
826         /*ISCII*/
827         static const uint8_t sampleTxt_iscii[]={
828             0x41,
829             0xa1,
830             0xEB,    /*unassigned*/
831             0x26,
832             0x30,
833             0xa2,
834             0xEC,    /*unassigned*/
835             0x42,
836         };
837         static const UChar isciitoUnicode[]={
838             0x41,
839             0x0901,
840             0x26,
841             0x30,
842             0x0902,
843             0x42,
844             };
845 
846         static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
847 
848         /*LMBCS*/
849         static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
850             0x12, 0x92, 0xa0, /*unassigned*/
851             0x12, 0x92, 0xA1,
852         };
853         static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
854         static const int32_t fromLMBCS[] = {0, 6};
855 
856         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
857              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
858             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
859         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
860 
861         if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
862              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
863             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
864         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
865 
866         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
867                  euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
868                 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
869             log_err("euc-jp->u with skip did not match.\n");
870 
871 
872 
873         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
874                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
875                 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
876             log_err("euc-tw->u with skip did not match.\n");
877 
878 
879         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
880                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
881                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
882             log_err("iso-2022-jp->u with skip did not match.\n");
883 
884         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
885                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
886                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
887             log_err("iso-2022-cn->u with skip did not match.\n");
888 
889         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
890                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
891                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
892             log_err("iso-2022-kr->u with skip did not match.\n");
893 
894         if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
895                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
896                 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
897             log_err("HZ->u with skip did not match.\n");
898 
899         if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
900                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
901                 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
902             log_err("iscii->u with skip did not match.\n");
903 
904         if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
905                 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
906                 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
907             log_err("LMBCS->u with skip did not match.\n");
908 
909     }
910 #endif
911 
912     log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
913     {
914         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
915             0xe0, 0x80,  0x61,};
916         UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
917         int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
918 
919         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
920                  expected1, UPRV_LENGTHOF(expected1),"utf8",
921                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
922             log_err("utf8->u with skip did not match.\n");
923     }
924 
925     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
926     {
927         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
928         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
929         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
930 
931         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
932                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
933                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
934             log_err("scsu->u with skip did not match.\n");
935     }
936 
937     log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
938     {
939         const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
940             0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
941             0x24, 0x1E, 0x52,       /* 3 */
942             0xB2,                   /* 6 */
943             0x20,                   /* 7 */
944             0x40, 0x07,             /* 8 - wrong trail byte */
945             0xB3,                   /* 10 */
946             0xB1,                   /* 11 */
947             0xD0, 0x20,             /* 12 - wrong trail byte */
948             0x0D,                   /* 14 */
949             0x0A,                   /* 15 */
950             0x20,                   /* 16 */
951             0x00,                   /* 17 */
952             0xD0, 0x6C,             /* 18 */
953             0xB6,                   /* 20 */
954             0xD8, 0xA5,             /* 21 */
955             0x20,                   /* 23 */
956             0x68,                   /* 24 */
957             0x59,                   /* 25 */
958             0xF9, 0x28,             /* 26 */
959             0x6D,                   /* 28 */
960             0x20,                   /* 29 */
961             0x73,                   /* 30 */
962             0xE0, 0x2D,             /* 31 */
963             0xDE, 0x43,             /* 33 */
964             0xD0, 0x33,             /* 35 */
965             0x20,                   /* 37 */
966             0xFA, 0x83,             /* 38 */
967             0x25, 0x01,             /* 40 */
968             0xFB, 0x16, 0x87,       /* 42 */
969             0x4B, 0x16,             /* 45 */
970             0x20,                   /* 47 */
971             0xE6, 0xBD,             /* 48 */
972             0xEB, 0x5B,             /* 50 */
973             0x4B, 0xCC,             /* 52 */
974             0xF9, 0xA2,             /* 54 */
975             0xFC, 0x10, 0x3E,       /* 56 */
976             0xFE, 0x16, 0x3A, 0x8C, /* 59 */
977             0x20,                   /* 63 */
978             0xFC, 0x03, 0xAC,       /* 64 */
979             0xFF,                   /* 67 - FF just resets the state without encoding anything */
980             0x01,                   /* 68 */
981             0xDE, 0x83,             /* 69 */
982             0x20,                   /* 71 */
983             0x09                    /* 72 */
984         };
985         UChar expected[]={
986             0xFEFF, 0x0061, 0x0062, 0x0020,
987             0x0063, 0x0061, 0x000D, 0x000A,
988             0x0020, 0x0000, 0x00DF, 0x00E6,
989             0x0930, 0x0020, 0x0918, 0x0909,
990             0x3086, 0x304D, 0x0020, 0x3053,
991             0x4000, 0x4E00, 0x7777, 0x0020,
992             0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
993             0x0020, 0xD7A3, 0xDC00, 0xD800,
994             0xD800, 0xDC00, 0xD845, 0xDDDD,
995             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
996             0xDFFF, 0x0001, 0x0E40, 0x0020,
997             0x0009
998         };
999         int32_t offsets[]={
1000             0, 3, 6, 7, /* skip 8, */
1001             10, 11, /* skip 12, */
1002             14, 15, 16, 17, 18,
1003             20, 21, 23, 24, 25, 26, 28, 29,
1004             30, 31, 33, 35, 37, 38,
1005             40, 42, 45, 47, 48,
1006             50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1007             63, 64, /* trail */ 64, /* reset only 67, */
1008             68, 69,
1009             71, 72
1010         };
1011 
1012         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1013                                  expected, UPRV_LENGTHOF(expected), "BOCU-1",
1014                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1015         ) {
1016             log_err("BOCU-1->u with skip did not match.\n");
1017         }
1018     }
1019 
1020     log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1021     {
1022         const uint8_t sampleText[]={
1023             0x61,                               /* 0  'a' */
1024             0xc0, 0x80,                         /* 1  non-shortest form */
1025             0xc4, 0xb5,                         /* 3  U+0135 */
1026             0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1027             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1028             0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1029             0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1030             0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1031             0x62,                               /* 24 'b' */
1032             0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1033             0xed, 0xa0,                         /* 28 incomplete sequence */
1034             0xd0, 0x80                          /* 30 U+0400 */
1035         };
1036         UChar expected[]={
1037             0x0061,
1038             /* skip */
1039             0x0135,
1040             0xd020,
1041             0xd801, 0xdc01,
1042             0xe000,
1043             0xdc01,
1044             /* skip */
1045             0x0062,
1046             0xd801,
1047             0x0400
1048         };
1049         int32_t offsets[]={
1050             0,
1051             /* skip 1, */
1052             3,
1053             5,
1054             8, 11,
1055             14,
1056             17,
1057             /* skip 20, 20, */
1058             24,
1059             25,
1060             /* skip 28 */
1061             30
1062         };
1063 
1064         /* without offsets */
1065         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1066                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1067                                  UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1068         ) {
1069             log_err("CESU-8->u with skip did not match.\n");
1070         }
1071 
1072         /* with offsets */
1073         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1074                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1075                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1076         ) {
1077             log_err("CESU-8->u with skip did not match.\n");
1078         }
1079     }
1080 }
1081 
TestStop(int32_t inputsize,int32_t outputsize)1082 static void TestStop(int32_t inputsize, int32_t outputsize)
1083 {
1084     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1085     static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1086 
1087     static const uint8_t expstopIBM_949[]= {
1088         0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1089 
1090     static const uint8_t expstopIBM_943[] = {
1091         0x9f, 0xaf, 0x9f, 0xb1};
1092 
1093     static const uint8_t expstopIBM_930[] = {
1094         0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1095 
1096     static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1097     static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1098     static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1099 
1100 
1101     static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1102     static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1103     static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1104 
1105     static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1106     static const int32_t  fromIBM943Offs [] = { 0, 2};
1107     static const int32_t  fromIBM930Offs [] = { 1, 3};
1108 
1109     gInBufferSize = inputsize;
1110     gOutBufferSize = outputsize;
1111 
1112     /*From Unicode*/
1113 
1114 #if !UCONFIG_NO_LEGACY_CONVERSION
1115     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1116             expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1117             UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1118         log_err("u-> ibm-949 with stop did not match.\n");
1119     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1120             expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1121             UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1122         log_err("u-> ibm-943 with stop did not match.\n");
1123     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1124             expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1125             UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1126         log_err("u-> ibm-930 with stop did not match.\n");
1127 
1128     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1129     {
1130         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1131         static const uint8_t toIBM943[]= { 0x61,};
1132         static const int32_t offset[]= {0,} ;
1133 
1134          /*EUC_JP*/
1135         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1136         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1137         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1138 
1139         /*EUC_TW*/
1140         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1141         static const uint8_t to_euc_tw[]={
1142             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1143         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1144 
1145         /*ISO-2022-JP*/
1146         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1147         static const uint8_t to_iso_2022_jp[]={
1148              0x41,
1149 
1150         };
1151         static const int32_t from_iso_2022_jpOffs [] ={0,};
1152 
1153         /*ISO-2022-cn*/
1154         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1155         static const uint8_t to_iso_2022_cn[]={
1156             0x41,
1157 
1158         };
1159         static const int32_t from_iso_2022_cnOffs [] ={
1160             0,0,
1161             2,2,
1162         };
1163 
1164         /*ISO-2022-kr*/
1165         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1166         static const uint8_t to_iso_2022_kr[]={
1167             0x1b,   0x24,   0x29,   0x43,
1168             0x41,
1169             0x0e,   0x25,   0x50,
1170         };
1171         static const int32_t from_iso_2022_krOffs [] ={
1172             -1,-1,-1,-1,
1173              0,
1174             1,1,1,
1175         };
1176 
1177         /* HZ encoding */
1178         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1179 
1180         static const uint8_t to_hz[]={
1181             0x7e,   0x7d, 0x41,
1182             0x7e,   0x7b,   0x26,   0x30,
1183 
1184         };
1185         static const int32_t from_hzOffs [] ={
1186             0, 0,0,
1187             1,1,1,1,
1188         };
1189 
1190         /*ISCII*/
1191         static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1192         static const uint8_t to_iscii[]={
1193             0x41,
1194         };
1195         static const int32_t from_isciiOffs [] ={
1196             0,
1197         };
1198 
1199         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1200                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1201                 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1202             log_err("u-> ibm-943 with stop did not match.\n");
1203 
1204         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1205                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1206                 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1207             log_err("u-> euc-jp with stop did not match.\n");
1208 
1209         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1210                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1211                 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1212             log_err("u-> euc-tw with stop did not match.\n");
1213 
1214         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1215                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1216                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1217             log_err("u-> iso-2022-jp with stop did not match.\n");
1218 
1219         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1220                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1221                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1222             log_err("u-> iso-2022-jp with stop did not match.\n");
1223 
1224         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1225                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1226                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1227             log_err("u-> iso-2022-cn with stop did not match.\n");
1228 
1229         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1230                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1231                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1232             log_err("u-> iso-2022-kr with stop did not match.\n");
1233 
1234         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1235                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1236                 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1237             log_err("u-> HZ with stop did not match.\n");\
1238 
1239         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1240                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1241                 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1242             log_err("u-> iscii with stop did not match.\n");
1243 
1244 
1245     }
1246 #endif
1247 
1248     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1249     {
1250         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1251 
1252         static const uint8_t to_SCSU[]={
1253             0x41,
1254 
1255         };
1256         int32_t from_SCSUOffs [] ={
1257             0,
1258 
1259         };
1260         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1261                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1262                 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1263             log_err("u-> SCSU with skip did not match.\n");
1264 
1265     }
1266 
1267     /*to Unicode*/
1268 
1269 #if !UCONFIG_NO_LEGACY_CONVERSION
1270     if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1271              IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1272             UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1273         log_err("ibm-949->u with stop did not match.\n");
1274     if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1275              IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1276             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1277         log_err("ibm-943->u with stop did not match.\n");
1278     if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1279              IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1280             UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1281         log_err("ibm-930->u with stop did not match.\n");
1282 
1283     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1284     {
1285 
1286         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1287             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1288         };
1289         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1290         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1291 
1292 
1293          /*EUC-JP*/
1294         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1295             0x8f, 0xda, 0xa1,  /*unassigned*/
1296            0x8e, 0xe0,
1297         };
1298         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1299         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1300 
1301           /*EUC_TW*/
1302         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1303             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1304            0xe6, 0xca, 0x8a,
1305         };
1306         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1307         int32_t from_euc_twOffs [] ={ 0, 1, 3};
1308 
1309 
1310 
1311          if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1312              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1313             UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1314         log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1315 
1316         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1317              euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1318             UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1319         log_err("euc-jp->u with stop did not match.\n");
1320 
1321         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1322                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1323                 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1324             log_err("euc-tw->u with stop did not match.\n");
1325     }
1326 #endif
1327 
1328     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1329     {
1330         static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1331             0xe0, 0x80,  0x61,};
1332         static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1333         static const int32_t offsets1[] = {   0x0000, 0x0001};
1334 
1335         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1336                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1337                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1338             log_err("utf8->u with stop did not match.\n");
1339     }
1340     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1341     {
1342         static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1343         static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1344         static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1345 
1346         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1347                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1348                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1349             log_err("scsu->u with stop did not match.\n");
1350     }
1351 
1352 }
1353 
TestSub(int32_t inputsize,int32_t outputsize)1354 static void TestSub(int32_t inputsize, int32_t outputsize)
1355 {
1356     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1357     static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1358 
1359     static const uint8_t expsubIBM_949[] =
1360      { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1361 
1362     static const uint8_t expsubIBM_943[] = {
1363         0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1364 
1365     static const uint8_t expsubIBM_930[] = {
1366         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1367 
1368     static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1369     static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1370     static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1371 
1372     static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1373     static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1374     static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1375 
1376     static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1377     static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1378     static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1379 
1380     gInBufferSize = inputsize;
1381     gOutBufferSize = outputsize;
1382 
1383     /*from unicode*/
1384 
1385 #if !UCONFIG_NO_LEGACY_CONVERSION
1386     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1387             expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1388             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1389         log_err("u-> ibm-949 with subst did not match.\n");
1390     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1391             expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1392             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1393         log_err("u-> ibm-943 with subst did not match.\n");
1394     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1395             expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1396             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1397         log_err("u-> ibm-930 with subst did not match.\n");
1398 
1399     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1400     {
1401         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1402         static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1403         static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1404 
1405 
1406         /* EUC_JP*/
1407         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1408         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1409             0xf4, 0xfe, 0xf4, 0xfe,
1410             0x61, 0x8e, 0xe0,
1411         };
1412         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1413 
1414         /*EUC_TW*/
1415         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1416         static const uint8_t to_euc_tw[]={
1417             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1418             0xfd, 0xfe, 0xfd, 0xfe,
1419             0x61, 0xe6, 0xca, 0x8a,
1420         };
1421 
1422         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1423 
1424         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1425                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1426                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1427             log_err("u-> ibm-943 with substitute did not match.\n");
1428 
1429         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1430                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1431                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1432             log_err("u-> euc-jp with substitute did not match.\n");
1433 
1434         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1435                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1436                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1437             log_err("u-> euc-tw with substitute did not match.\n");
1438     }
1439 #endif
1440 
1441     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1442     {
1443         UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1444 
1445         const uint8_t to_SCSU[]={
1446             0x41,
1447             0x0e, 0xff,0xfd,
1448             0x42
1449 
1450 
1451         };
1452         int32_t from_SCSUOffs [] ={
1453             0,
1454             1,1,1,
1455             2,
1456 
1457         };
1458         const uint8_t to_SCSU_1[]={
1459             0x41,
1460 
1461         };
1462         int32_t from_SCSUOffs_1 [] ={
1463             0,
1464 
1465         };
1466         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1467                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1468                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1469             log_err("u-> SCSU with substitute did not match.\n");
1470 
1471         if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1472                 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1473                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1474             log_err("u-> SCSU with substitute did not match.\n");
1475     }
1476 
1477     log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1478     {
1479         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1480         static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1481                            0xf0, 0x90, 0x90, 0x81,
1482                            0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1483                            0xef, 0xbf, 0xbf, 0x61,
1484 
1485         };
1486         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1487         if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1488                 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1489                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1490             log_err("u-> utf8 with substitute did not match.\n");
1491         }
1492     }
1493 
1494     log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1495     {
1496         static const UChar in[]={ 0x0041, 0xfeff };
1497 
1498         static const uint8_t out[]={
1499 #if U_IS_BIG_ENDIAN
1500             0xfe, 0xff,
1501             0x00, 0x41,
1502             0xfe, 0xff
1503 #else
1504             0xff, 0xfe,
1505             0x41, 0x00,
1506             0xff, 0xfe
1507 #endif
1508         };
1509         static const int32_t offsets[]={
1510             -1, -1, 0, 0, 1, 1
1511         };
1512 
1513         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1514                                    out, UPRV_LENGTHOF(out), "UTF-16",
1515                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1516         ) {
1517             log_err("u->UTF-16 with substitute did not match.\n");
1518         }
1519     }
1520 
1521     log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1522     {
1523         static const UChar in[]={ 0x0041, 0xfeff };
1524 
1525         static const uint8_t out[]={
1526 #if U_IS_BIG_ENDIAN
1527             0x00, 0x00, 0xfe, 0xff,
1528             0x00, 0x00, 0x00, 0x41,
1529             0x00, 0x00, 0xfe, 0xff
1530 #else
1531             0xff, 0xfe, 0x00, 0x00,
1532             0x41, 0x00, 0x00, 0x00,
1533             0xff, 0xfe, 0x00, 0x00
1534 #endif
1535         };
1536         static const int32_t offsets[]={
1537             -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1538         };
1539 
1540         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1541                                    out, UPRV_LENGTHOF(out), "UTF-32",
1542                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1543         ) {
1544             log_err("u->UTF-32 with substitute did not match.\n");
1545         }
1546     }
1547 
1548     /*to unicode*/
1549 
1550 #if !UCONFIG_NO_LEGACY_CONVERSION
1551     if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1552              IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1553             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1554         log_err("ibm-949->u with substitute did not match.\n");
1555     if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1556              IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1557             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1558         log_err("ibm-943->u with substitute did not match.\n");
1559     if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1560              IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1561             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1562         log_err("ibm-930->u with substitute did not match.\n");
1563 
1564     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1565     {
1566 
1567         const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1568             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1569         };
1570         UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1571         };
1572         int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1573 
1574 
1575         /* EUC_JP*/
1576         const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1577             0x8f, 0xda, 0xa1,  /*unassigned*/
1578            0x8e, 0xe0, 0x8a
1579         };
1580         UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1581         int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1582 
1583         /*EUC_TW*/
1584         const uint8_t sampleTxt_euc_tw[]={
1585             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1586             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1587             0xe6, 0xca, 0x8a,
1588         };
1589         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1590         int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1591 
1592 
1593         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1594            EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1595           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1596             log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1597 
1598 
1599         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1600            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1601           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1602             log_err("euc-jp->u with substitute did not match.\n");
1603 
1604 
1605         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1606            euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1607           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1608             log_err("euc-tw->u with substitute  did not match.\n");
1609 
1610 
1611         if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1612            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1613           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1614             log_err("euc-jp->u with substitute did not match.\n");
1615     }
1616 #endif
1617 
1618     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1619     {
1620         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1621             0xe0, 0x80,  0x61,};
1622         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1623         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1624 
1625         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1626                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1627                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1628             log_err("utf8->u with substitute did not match.\n");
1629     }
1630     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1631     {
1632         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1633         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1634         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1635 
1636         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1637                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1638                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1639             log_err("scsu->u with stop did not match.\n");
1640     }
1641 
1642 #if !UCONFIG_NO_LEGACY_CONVERSION
1643     log_verbose("Testing ibm-930 subchar/subchar1\n");
1644     {
1645         static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1646         static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1647         static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1648 
1649         static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1650         static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1651         static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1652 
1653         if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1654                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1655         ) {
1656             log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1657         }
1658 
1659         if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1660                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1661         ) {
1662             log_err("ibm-930->u subchar/subchar1 did not match.\n");
1663         }
1664     }
1665 
1666     log_verbose("Testing GB 18030 with substitute callbacks\n");
1667     {
1668         static const UChar u2[]={
1669             0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1670         static const uint8_t gb2[]={
1671             0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1672         static const int32_t offsets2[]={
1673             0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1674 
1675         if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1676                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1677         ) {
1678             log_err("gb18030->u with substitute did not match.\n");
1679         }
1680     }
1681 #endif
1682 
1683     log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1684     {
1685         static const uint8_t utf7[]={
1686          /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1687             0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1688         };
1689         static const UChar unicode[]={
1690             0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1691         };
1692         static const int32_t offsets[]={
1693             0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1694         };
1695 
1696         if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1697                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1698         ) {
1699             log_err("UTF-7->u with substitute did not match.\n");
1700         }
1701     }
1702 
1703     log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1704     {
1705         static const uint8_t
1706             in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1707             in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1708             in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1709 
1710         static const UChar
1711             out1[]={ 0x4e00, 0xfeff },
1712             out2[]={ 0x004e, 0xfffe },
1713             out3[]={ 0xfefd, 0x4e00, 0xfeff };
1714 
1715         static const int32_t
1716             offsets1[]={ 2, 4 },
1717             offsets2[]={ 2, 4 },
1718             offsets3[]={ 0, 2, 4 };
1719 
1720         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1721                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1722         ) {
1723             log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1724         }
1725 
1726         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1727                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1728         ) {
1729             log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1730         }
1731 
1732         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1733                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1734         ) {
1735             log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1736         }
1737     }
1738 
1739     log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1740     {
1741         static const uint8_t
1742             in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1743             in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1744             in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1745             in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1746 
1747         static const UChar
1748             out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1749             out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1750             out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1751             out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1752 
1753         static const int32_t
1754             offsets1[]={ 4, 4, 8 },
1755             offsets2[]={ 4, 4, 8 },
1756             offsets3[]={ 0, 4, 4, 8, 12 },
1757             offsets4[]={ 0, 0, 4, 8 };
1758 
1759         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1760                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1761         ) {
1762             log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1763         }
1764 
1765         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1766                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1767         ) {
1768             log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1769         }
1770 
1771         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1772                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1773         ) {
1774             log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1775         }
1776 
1777         if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1778                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1779         ) {
1780             log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1781         }
1782     }
1783 }
1784 
TestSubWithValue(int32_t inputsize,int32_t outputsize)1785 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1786 {
1787     UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1788     UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1789 
1790     const uint8_t expsubwvalIBM_949[]= {
1791         0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1792         0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1793 
1794     const uint8_t expsubwvalIBM_943[]= {
1795         0x9f, 0xaf, 0x9f, 0xb1,
1796         0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1797 
1798     const uint8_t expsubwvalIBM_930[] = {
1799         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1800 
1801     int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1802     int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1803     int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1804 
1805     gInBufferSize = inputsize;
1806     gOutBufferSize = outputsize;
1807 
1808     /*from Unicode*/
1809 
1810 #if !UCONFIG_NO_LEGACY_CONVERSION
1811     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1812             expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1813             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1814         log_err("u-> ibm-949 with subst with value did not match.\n");
1815 
1816     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1817             expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1818             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1819         log_err("u-> ibm-943 with sub with value did not match.\n");
1820 
1821     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1822             expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1823             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1824         log_err("u-> ibm-930 with subst with value did not match.\n");
1825 
1826 
1827     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1828     {
1829         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1830         static const uint8_t toIBM943[]= { 0x61,
1831             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1832             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1833             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1834             0x61 };
1835         static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1836 
1837 
1838          /* EUC_JP*/
1839         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1840         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1841             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1842             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1843             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1844             0x61, 0x8e, 0xe0,
1845         };
1846         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1847             3, 3, 3, 3, 3, 3,
1848             3, 3, 3, 3, 3, 3,
1849             5, 5, 5, 5, 5, 5,
1850             6, 7, 7,
1851         };
1852 
1853         /*EUC_TW*/
1854         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1855         static const uint8_t to_euc_tw[]={
1856             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1857             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1858             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1859             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1860             0x61, 0xe6, 0xca, 0x8a,
1861         };
1862         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1863              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1864              6, 7, 7, 8,
1865         };
1866         /*ISO-2022-JP*/
1867         static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1868         static const uint8_t to_iso_2022_jp1[]={
1869             0x1b,   0x24,   0x42,   0x21, 0x21,
1870             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1871             0x1b,   0x24,   0x42,   0x21, 0x22,
1872             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1873             0x42,
1874         };
1875 
1876         static const int32_t from_iso_2022_jpOffs1 [] ={
1877             0,0,0,0,0,
1878             1,1,1,1,1,1,1,1,1,
1879             2,2,2,2,2,
1880             3,3,3,3,3,3,3,3,3,
1881             4,
1882         };
1883         /* surrogate pair*/
1884         static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1885         static const uint8_t to_iso_2022_jp2[]={
1886                                 0x1b,   0x24,   0x42,   0x21,   0x21,
1887                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1888                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1889                                 0x1b,   0x24,   0x42,   0x21,   0x22,
1890                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1891                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1892                                 0x42,
1893                                 };
1894         static const int32_t from_iso_2022_jpOffs2 [] ={
1895             0,0,0,0,0,
1896             1,1,1,1,1,1,1,1,1,
1897             1,1,1,1,1,1,
1898             3,3,3,3,3,
1899             4,4,4,4,4,4,4,4,4,
1900             4,4,4,4,4,4,
1901             6,
1902         };
1903 
1904         /*ISO-2022-cn*/
1905         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1906         static const uint8_t to_iso_2022_cn[]={
1907             0x41,
1908             0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1909             0x42,
1910         };
1911         static const int32_t from_iso_2022_cnOffs [] ={
1912             0,
1913             1,1,1,1,1,1,
1914             2,
1915         };
1916 
1917         static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1918 
1919         static const uint8_t to_iso_2022_cn4[]={
1920                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1921                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1922                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1923                              0x0e,   0x21,   0x22,
1924                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1925                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1926                              0x42,
1927                              };
1928         static const int32_t from_iso_2022_cnOffs4 [] ={
1929             0,0,0,0,0,0,0,
1930             1,1,1,1,1,1,1,
1931             1,1,1,1,1,1,
1932             3,3,3,
1933             4,4,4,4,4,4,4,
1934             4,4,4,4,4,4,
1935             6
1936 
1937         };
1938 
1939         /*ISO-2022-kr*/
1940         static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1941         static const uint8_t to_iso_2022_kr2[]={
1942             0x1b,   0x24,   0x29,   0x43,
1943             0x41,
1944             0x0e,   0x25,   0x50,
1945             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1946             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1947             0x0e,   0x25,   0x50,
1948             0x0f,   0x42,
1949             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1950             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1951             0x43
1952         };
1953         static const int32_t from_iso_2022_krOffs2 [] ={
1954             -1,-1,-1,-1,
1955              0,
1956             1,1,1,
1957             2,2,2,2,2,2,2,
1958             2,2,2,2,2,2,
1959             4,4,4,
1960             5,5,
1961             6,6,6,6,6,6,
1962             6,6,6,6,6,6,
1963             8,
1964         };
1965 
1966         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1967         static const uint8_t to_iso_2022_kr[]={
1968             0x1b,   0x24,   0x29,   0x43,
1969             0x41,
1970             0x0e,   0x25,   0x50,
1971             0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1972             0x0e,   0x25,   0x50,
1973             0x0f,   0x42,
1974             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1975             0x43
1976         };
1977 
1978 
1979         static const int32_t from_iso_2022_krOffs [] ={
1980             -1,-1,-1,-1,
1981              0,
1982             1,1,1,
1983             2,2,2,2,2,2,2,
1984             3,3,3,
1985             4,4,
1986             5,5,5,5,5,5,
1987             6,
1988         };
1989         /* HZ encoding */
1990         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1991 
1992         static const uint8_t to_hz[]={
1993             0x7e,   0x7d,   0x41,
1994             0x7e,   0x7b,   0x26,   0x30,
1995             0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1996             0x7e,   0x7b,   0x26,   0x30,
1997             0x7e,   0x7d,   0x42,
1998 
1999         };
2000         static const int32_t from_hzOffs [] ={
2001             0,0,0,
2002             1,1,1,1,
2003             2,2,2,2,2,2,2,2,
2004             3,3,3,3,
2005             4,4,4
2006         };
2007 
2008         static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2009         static const uint8_t to_hz2[]={
2010             0x7e,   0x7d,   0x41,
2011             0x7e,   0x7b,   0x26,   0x30,
2012             0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2013             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2014             0x7e,   0x7b,   0x26,   0x30,
2015             0x7e,   0x7d,   0x42,
2016             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2017             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2018             0x43
2019         };
2020         static const int32_t from_hzOffs2 [] ={
2021             0,0,0,
2022             1,1,1,1,
2023             2,2,2,2,2,2,2,2,
2024             2,2,2,2,2,2,
2025             4,4,4,4,
2026             5,5,5,
2027             6,6,6,6,6,6,
2028             6,6,6,6,6,6,
2029             8,
2030         };
2031 
2032                 /*ISCII*/
2033         static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2034         static const uint8_t to_iscii[]={
2035             0x41,
2036             0xef,   0x42,   0xa1,
2037             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2038             0xa2,
2039             0x42,
2040             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2041             0x43
2042         };
2043 
2044 
2045         static const int32_t from_isciiOffs [] ={
2046             0,
2047             1,1,1,
2048             2,2,2,2,2,2,
2049             3,
2050             4,
2051             5,5,5,5,5,5,
2052             6,
2053         };
2054 
2055         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2056                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2057                 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2058             log_err("u-> ibm-943 with subst with value did not match.\n");
2059 
2060         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2061                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2062                 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2063             log_err("u-> euc-jp with subst with value did not match.\n");
2064 
2065         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2066                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2067                 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2068             log_err("u-> euc-tw with subst with value did not match.\n");
2069 
2070         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2071                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2072                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2073             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2074 
2075         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2076                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2077                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2078             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2079 
2080         if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2081                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2082                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2083             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2084         /*ESCAPE OPTIONS*/
2085         {
2086             /* surrogate pair*/
2087             static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2088             static const uint8_t to_iso_2022_jp3_v2[]={
2089                     0x1b,   0x24,   0x42,   0x21,   0x21,
2090                     0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2091 
2092                     0x1b,   0x24,   0x42,   0x21,   0x22,
2093                     0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2094 
2095                     0x42,
2096                     0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2097                     };
2098 
2099             static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2100                 0,0,0,0,0,
2101                 1,1,1,1,1,1,1,1,1,1,1,1,
2102 
2103                 3,3,3,3,3,
2104                 4,4,4,4,4,4,4,4,4,4,4,4,
2105 
2106                 6,
2107                 7,7,7,7,7,7,7,7,7
2108             };
2109 
2110             if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2111                     to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2112                     UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2113                 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2114         }
2115         {
2116             static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2117             static const uint8_t to_iso_2022_cn5_v2[]={
2118                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2119                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2120                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2121                              0x0e,   0x21,   0x22,
2122                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2123                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2124                              0x42,
2125                              0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2126                              };
2127             static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2128                 0,0,0,0,0,0,0,
2129                 1,1,1,1,1,1,1,
2130                 1,1,1,1,1,1,
2131                 3,3,3,
2132                 4,4,4,4,4,4,4,
2133                 4,4,4,4,4,4,
2134                 6,
2135                 7,7,7,7,7,7
2136             };
2137             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2138                 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2139                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2140                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2141 
2142         }
2143         {
2144             static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2145             static const uint8_t to_iso_2022_cn6_v2[]={
2146                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2147                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2148                                 0x0e,   0x21,   0x22,
2149                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2150                                 0x42,
2151                                 0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2152                              };
2153             static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2154                     0,  0,  0,  0,  0,  0,  0,
2155                     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2156                     3,  3,  3,
2157                     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2158                     6,
2159                     7,  7,  7,  7,  7,  7,  7,  7,
2160             };
2161             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2162                 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2163                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2164                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2165 
2166         }
2167         {
2168             static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2169             static const uint8_t to_iso_2022_cn7_v2[]={
2170                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2171                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2172                                 0x0e,   0x21,   0x22,
2173                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2174                                 0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2175                             };
2176             static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2177                                 0,  0,  0,  0,  0,  0,  0,
2178                                 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2179                                 3,  3,  3,
2180                                 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2181                                 6,
2182                                 7,  7,  7,  7,  7,  7,
2183             };
2184             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2185                 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2186                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2187                 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2188 
2189         }
2190         {
2191             static const UChar iso_2022_cn_inputText8[]={
2192                                 0x3000,
2193                                 0xD84D, 0xDC56,
2194                                 0x3001,
2195                                 0xD84D, 0xDC56,
2196                                 0xDBFF, 0xDFFF,
2197                                 0x0042,
2198                                 0x0902};
2199             static const uint8_t to_iso_2022_cn8_v2[]={
2200                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2201                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2202                                 0x0e,   0x21,   0x22,
2203                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2204                                 0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2205                                 0x42,
2206                                 0x5c,   0x39,   0x30,   0x32,   0x20
2207                              };
2208             static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2209                     0,  0,  0,  0,  0,  0,  0,
2210                     1,  1,  1,  1,  1,  1,  1,  1,
2211                     3,  3,  3,
2212                     4,  4,  4,  4,  4,  4,  4,  4,
2213                     6,  6,  6,  6,  6,  6,  6,  6,
2214                     8,
2215                     9,  9,  9,  9,  9
2216             };
2217             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2218                 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2219                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2220                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2221 
2222         }
2223         {
2224             static const uint8_t to_iso_2022_cn4_v3[]={
2225                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2226                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2227                             0x0e,   0x21,   0x22,
2228                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2229                             0x42
2230                              };
2231 
2232 
2233             static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2234                 0,0,0,0,0,0,0,
2235                 1,1,1,1,1,1,1,1,1,1,1,
2236 
2237                 3,3,3,
2238                 4,4,4,4,4,4,4,4,4,4,4,
2239 
2240                 6
2241 
2242             };
2243             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2244                 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2245                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2246             {
2247                 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2248             }
2249         }
2250         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2251                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2252                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2253             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2254 
2255         if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2256                 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2257                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2258             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2259         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2260                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2261                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2262             log_err("u-> iso_2022_kr with subst with value did not match.\n");
2263         if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2264                 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2265                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2266             log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2267         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2268                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2269                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2270             log_err("u-> hz with subst with value did not match.\n");
2271         if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2272                 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2273                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2274             log_err("u-> hz with subst with value did not match.\n");
2275 
2276         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2277                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2278                 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2279             log_err("u-> iscii with subst with value did not match.\n");
2280     }
2281 #endif
2282 
2283     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2284     /*to Unicode*/
2285     {
2286 #if !UCONFIG_NO_LEGACY_CONVERSION
2287         static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2288             0x81, 0xad, /*unassigned*/
2289             0x89, 0xd3 };
2290         static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2291             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2292             0x7B87};
2293         static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2294 
2295         /* EUC_JP*/
2296         static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2297             0x8f, 0xda, 0xa1,  /*unassigned*/
2298            0x8e, 0xe0,
2299         };
2300         static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2301             0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2302             0x00a2 };
2303         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2304             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2305             9,
2306         };
2307 
2308         /*EUC_TW*/
2309         static const uint8_t sampleTxt_euc_tw[]={
2310             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2311             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2312             0xe6, 0xca, 0x8a,
2313         };
2314         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2315              0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2316              0x8706, 0x8a, };
2317         static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2318              7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2319              11, 13};
2320 
2321         /*iso-2022-jp*/
2322         static const uint8_t sampleTxt_iso_2022_jp[]={
2323             0x1b,   0x28,   0x42,   0x41,
2324             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2325             0x1b,   0x28,   0x42,   0x42,
2326 
2327         };
2328                                                    /*     A    %    X    3    A    %    X    1    A     B    */
2329         static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2330         static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2331 
2332         /*iso-2022-cn*/
2333         static const uint8_t sampleTxt_iso_2022_cn[]={
2334             0x0f,   0x41,   0x44,
2335             0x1B,   0x24,   0x29,   0x47,
2336             0x0E,   0x40,   0x6c, /*unassigned*/
2337             0x0f,   0x42,
2338 
2339         };
2340         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2341         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2342 
2343         /*iso-2022-kr*/
2344         static const uint8_t sampleTxt_iso_2022_kr[]={
2345           0x1b, 0x24, 0x29,  0x43,
2346           0x41,
2347           0x0E, 0x7f, 0x1E,
2348           0x0e, 0x25, 0x50,
2349           0x0f, 0x51,
2350           0x42, 0x43,
2351 
2352         };
2353         static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2354         static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2355 
2356         /*hz*/
2357         static const uint8_t sampleTxt_hz[]={
2358             0x41,
2359             0x7e,   0x7b,   0x26,   0x30,
2360             0x7f,   0x1E, /*unassigned*/
2361             0x26,   0x30,
2362             0x7e,   0x7d,   0x42,
2363             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2364             0x7e,   0x7d,   0x42,
2365         };
2366         static const UChar hztoUnicode[]={
2367             0x41,
2368             0x03a0,
2369             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370             0x03A0,
2371             0x42,
2372             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2373             0x42,};
2374 
2375         static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2376 
2377 
2378         /*iscii*/
2379         static const uint8_t sampleTxt_iscii[]={
2380             0x41,
2381             0x30,
2382             0xEB, /*unassigned*/
2383             0xa3,
2384             0x42,
2385             0xEC, /*unassigned*/
2386             0x42,
2387         };
2388         static const UChar isciitoUnicode[]={
2389             0x41,
2390             0x30,
2391             0x25,  0x58,  0x45, 0x42,
2392             0x0903,
2393             0x42,
2394             0x25,  0x58,  0x45, 0x43,
2395             0x42,};
2396 
2397         static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2398 #endif
2399 
2400         /*UTF8*/
2401         static const uint8_t sampleTxtUTF8[]={
2402             0x20, 0x64, 0x50,
2403             0xC2, 0x7E, /* truncated char */
2404             0x20,
2405             0xE0, 0xB5, 0x7E, /* truncated char */
2406             0x40,
2407         };
2408         static const UChar UTF8ToUnicode[]={
2409             0x0020, 0x0064, 0x0050,
2410             0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2411             0x0020,
2412             0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2413             0x0040
2414         };
2415         static const int32_t fromUTF8[] = {
2416             0, 1, 2,
2417             3, 3, 3, 3, 4,
2418             5,
2419             6, 6, 6, 6, 6, 6, 6, 6, 8,
2420             9
2421         };
2422         static const UChar UTF8ToUnicodeXML_DEC[]={
2423             0x0020, 0x0064, 0x0050,
2424             0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2425             0x0020,
2426             0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2427             0x0040
2428         };
2429         static const int32_t fromUTF8XML_DEC[] = {
2430             0, 1, 2,
2431             3, 3, 3, 3, 3, 3, 4,
2432             5,
2433             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2434             9
2435         };
2436 
2437 
2438 #if !UCONFIG_NO_LEGACY_CONVERSION
2439         if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2440                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2441                 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2442             log_err("ibm-943->u with substitute with value did not match.\n");
2443 
2444         if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2445                  EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2446                 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2447             log_err("euc-jp->u with substitute with value did not match.\n");
2448 
2449         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2450                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2451                 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2452             log_err("euc-tw->u with substitute with value did not match.\n");
2453 
2454         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2455                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2456                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2457             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2458 
2459         if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2460                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2461                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2462             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2463 
2464         {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2465             {
2466                 static const UChar iso_2022_jptoUnicodeDec[]={
2467                                                   0x0041,
2468                                                   /*   &         #         5         8         ;   */
2469                                                   0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2470                                                   0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2471                                                   0x0042 };
2472                 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2473                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2474                      iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2475                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2476                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2477             }
2478             {
2479                 static const UChar iso_2022_jptoUnicodeHex[]={
2480                                                   0x0041,
2481                                                   /*   &       #       x       3       A       ;  */
2482                                                   0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2483                                                   0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2484                                                   0x0042 };
2485                 static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2486                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2487                      iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2488                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2489                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2490             }
2491             {
2492                 static const UChar iso_2022_jptoUnicodeC[]={
2493                                                 0x0041,
2494                                                 0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2495                                                 0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2496                                                 0x0042 };
2497                 int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2498                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2499                      iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2500                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2501                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2502             }
2503         }
2504         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2505                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2506                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2507             log_err("iso-2022-cn->u with substitute with value did not match.\n");
2508 
2509         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2510                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2511                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2512             log_err("iso-2022-kr->u with substitute with value did not match.\n");
2513 
2514          if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2515                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2516                 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2517             log_err("hz->u with substitute with value did not match.\n");
2518 
2519          if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2520                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2521                 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2522             log_err("ISCII ->u with substitute with value did not match.\n");
2523 #endif
2524 
2525         if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2526                 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2527                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2528             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2529         if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2530                 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2531                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2532             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2533     }
2534 }
2535 
2536 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2537 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2538 {
2539     static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2540     static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2541     static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2542 
2543 
2544     static const uint8_t text943[] = {
2545         0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2546     static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2547     static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2548     static const UChar toUnicode943stop[]= { 0x304b};
2549 
2550     static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2551     static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2552     static const int32_t  fromIBM943Offsstop[] = { 0};
2553 
2554     gInBufferSize = inputsize;
2555     gOutBufferSize = outputsize;
2556     /*checking with a legal value*/
2557     if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2558             templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2559             UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2560         log_err("u-> ibm-949 with skip did not match.\n");
2561 
2562     /*checking illegal value for ibm-943 with substitute*/
2563     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2564              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2565             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2566         log_err("ibm-943->u with subst did not match.\n");
2567     /*checking illegal value for ibm-943 with skip */
2568     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2569              toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2570             UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2571         log_err("ibm-943->u with skip did not match.\n");
2572 
2573     /*checking illegal value for ibm-943 with stop */
2574     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2575              toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2576             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2577         log_err("ibm-943->u with stop did not match.\n");
2578 
2579 }
2580 
TestSingleByte(int32_t inputsize,int32_t outputsize)2581 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2582 {
2583     static const uint8_t sampleText[] = {
2584         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2585         0xff, 0x32, 0x33};
2586     static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2587     static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2588     /*checking illegal value for ibm-943 with substitute*/
2589     gInBufferSize = inputsize;
2590     gOutBufferSize = outputsize;
2591 
2592     if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2593              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2594             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2595         log_err("ibm-943->u with subst did not match.\n");
2596 }
2597 
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2598 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2599 {
2600     /*EBCDIC_STATEFUL*/
2601     static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2602     static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2603     static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2604 /*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2605 
2606     /*EBCDIC_STATEFUL with subChar=3f*/
2607     static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2608     static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2609     static const char mySubChar[]={ 0x3f};
2610 
2611     gInBufferSize = inputsize;
2612     gOutBufferSize = outputsize;
2613 
2614     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2615         toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2616         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2617             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2618 
2619     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2620         toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2621         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2622             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2623 }
2624 #endif
2625 
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2626 UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2627                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2628                 const char *mySubChar, int8_t len)
2629 {
2630 
2631 
2632     UErrorCode status = U_ZERO_ERROR;
2633     UConverter *conv = 0;
2634     char junkout[NEW_MAX_BUFFER]; /* FIX */
2635     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2636     const UChar *src;
2637     char *end;
2638     char *targ;
2639     int32_t *offs;
2640     int i;
2641     int32_t  realBufferSize;
2642     char *realBufferEnd;
2643     const UChar *realSourceEnd;
2644     const UChar *sourceLimit;
2645     UBool checkOffsets = true;
2646     UBool doFlush;
2647     char junk[9999];
2648     char offset_str[9999];
2649     char *p;
2650     UConverterFromUCallback oldAction = NULL;
2651     const void* oldContext = NULL;
2652 
2653 
2654     for(i=0;i<NEW_MAX_BUFFER;i++)
2655         junkout[i] = (char)0xF0;
2656     for(i=0;i<NEW_MAX_BUFFER;i++)
2657         junokout[i] = 0xFF;
2658     setNuConvTestName(codepage, "FROM");
2659 
2660     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2661             gOutBufferSize);
2662 
2663     conv = ucnv_open(codepage, &status);
2664     if(U_FAILURE(status))
2665     {
2666         log_data_err("Couldn't open converter %s\n",codepage);
2667         return true;
2668     }
2669 
2670     log_verbose("Converter opened..\n");
2671 
2672     /*----setting the callback routine----*/
2673     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2674     if (U_FAILURE(status))
2675     {
2676         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2677     }
2678     /*------------------------*/
2679     /*setting the subChar*/
2680     if(mySubChar != NULL){
2681         ucnv_setSubstChars(conv, mySubChar, len, &status);
2682         if (U_FAILURE(status))  {
2683             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2684         }
2685     }
2686     /*------------*/
2687 
2688     src = source;
2689     targ = junkout;
2690     offs = junokout;
2691 
2692     realBufferSize = UPRV_LENGTHOF(junkout);
2693     realBufferEnd = junkout + realBufferSize;
2694     realSourceEnd = source + sourceLen;
2695 
2696     if ( gOutBufferSize != realBufferSize )
2697       checkOffsets = false;
2698 
2699     if( gInBufferSize != NEW_MAX_BUFFER )
2700       checkOffsets = false;
2701 
2702     do
2703     {
2704         end = nct_min(targ + gOutBufferSize, realBufferEnd);
2705         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2706 
2707         doFlush = (UBool)(sourceLimit == realSourceEnd);
2708 
2709         if(targ == realBufferEnd)
2710         {
2711             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2712             return false;
2713         }
2714         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
2715 
2716 
2717         status = U_ZERO_ERROR;
2718 
2719         ucnv_fromUnicode (conv,
2720                   (char **)&targ,
2721                   (const char *)end,
2722                   &src,
2723                   sourceLimit,
2724                   checkOffsets ? offs : NULL,
2725                   doFlush, /* flush if we're at the end of the input data */
2726                   &status);
2727     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2728 
2729 
2730     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2731         UChar errChars[50]; /* should be sufficient */
2732         int8_t errLen = 50;
2733         UErrorCode err = U_ZERO_ERROR;
2734         const UChar* start= NULL;
2735         ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2736         if(U_FAILURE(err)){
2737             log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2738         }
2739         /* length of in invalid chars should be equal to returned length*/
2740         start = src - errLen;
2741         if(u_strncmp(errChars,start,errLen)!=0){
2742             log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2743         }
2744     }
2745     /* allow failure codes for the stop callback */
2746     if(U_FAILURE(status) &&
2747        (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2748     {
2749         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2750         return false;
2751     }
2752 
2753     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2754         sourceLen, targ-junkout);
2755     if(getTestOption(VERBOSITY_OPTION))
2756     {
2757 
2758         junk[0] = 0;
2759         offset_str[0] = 0;
2760         for(p = junkout;p<targ;p++)
2761         {
2762             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2763             snprintf(offset_str + strlen(offset_str), sizeof(offset_str) - strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2764         }
2765 
2766         log_verbose(junk);
2767         printSeq(expect, expectLen);
2768         if ( checkOffsets )
2769         {
2770             log_verbose("\nOffsets:");
2771             log_verbose(offset_str);
2772         }
2773         log_verbose("\n");
2774     }
2775     ucnv_close(conv);
2776 
2777 
2778     if(expectLen != targ-junkout)
2779     {
2780         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2781         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2782         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2783         printSeqErr(expect, expectLen);
2784         return false;
2785     }
2786 
2787     if (checkOffsets && (expectOffsets != 0) )
2788     {
2789         log_verbose("comparing %d offsets..\n", targ-junkout);
2790         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2791             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2792             log_err("Got Output : ");
2793             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2794             log_err("Got Offsets:      ");
2795             for(p=junkout;p<targ;p++)
2796                 log_err("%d,", junokout[p-junkout]);
2797             log_err("\n");
2798             log_err("Expected Offsets: ");
2799             for(i=0; i<(targ-junkout); i++)
2800                 log_err("%d,", expectOffsets[i]);
2801             log_err("\n");
2802             return false;
2803         }
2804     }
2805 
2806     if(!memcmp(junkout, expect, expectLen))
2807     {
2808         log_verbose("String matches! %s\n", gNuConvTestName);
2809         return true;
2810     }
2811     else
2812     {
2813         log_err("String does not match. %s\n", gNuConvTestName);
2814         log_err("source: ");
2815         printUSeqErr(source, sourceLen);
2816         log_err("Got:      ");
2817         printSeqErr((const uint8_t *)junkout, expectLen);
2818         log_err("Expected: ");
2819         printSeqErr(expect, expectLen);
2820         return false;
2821     }
2822 }
2823 
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2824 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2825                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2826                const char *mySubChar, int8_t len)
2827 {
2828     UErrorCode status = U_ZERO_ERROR;
2829     UConverter *conv = 0;
2830     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2831     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2832     const char *src;
2833     const char *realSourceEnd;
2834     const char *srcLimit;
2835     UChar *targ;
2836     UChar *end;
2837     int32_t *offs;
2838     int i;
2839     UBool   checkOffsets = true;
2840     char junk[9999];
2841     char offset_str[9999];
2842     UChar *p;
2843     UConverterToUCallback oldAction = NULL;
2844     const void* oldContext = NULL;
2845 
2846     int32_t   realBufferSize;
2847     UChar *realBufferEnd;
2848 
2849 
2850     for(i=0;i<NEW_MAX_BUFFER;i++)
2851         junkout[i] = 0xFFFE;
2852 
2853     for(i=0;i<NEW_MAX_BUFFER;i++)
2854         junokout[i] = -1;
2855 
2856     setNuConvTestName(codepage, "TO");
2857 
2858     log_verbose("\n=========  %s\n", gNuConvTestName);
2859 
2860     conv = ucnv_open(codepage, &status);
2861     if(U_FAILURE(status))
2862     {
2863         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2864         return true;
2865     }
2866 
2867     log_verbose("Converter opened..\n");
2868 
2869     src = (const char *)source;
2870     targ = junkout;
2871     offs = junokout;
2872 
2873     realBufferSize = UPRV_LENGTHOF(junkout);
2874     realBufferEnd = junkout + realBufferSize;
2875     realSourceEnd = src + sourcelen;
2876     /*----setting the callback routine----*/
2877     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2878     if (U_FAILURE(status))
2879     {
2880         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2881     }
2882     /*-------------------------------------*/
2883     /*setting the subChar*/
2884     if(mySubChar != NULL){
2885         ucnv_setSubstChars(conv, mySubChar, len, &status);
2886         if (U_FAILURE(status))  {
2887             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2888         }
2889     }
2890     /*------------*/
2891 
2892 
2893     if ( gOutBufferSize != realBufferSize )
2894         checkOffsets = false;
2895 
2896     if( gInBufferSize != NEW_MAX_BUFFER )
2897         checkOffsets = false;
2898 
2899     do
2900     {
2901         end = nct_min( targ + gOutBufferSize, realBufferEnd);
2902         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2903 
2904         if(targ == realBufferEnd)
2905         {
2906             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2907             return false;
2908         }
2909         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2910 
2911 
2912 
2913         status = U_ZERO_ERROR;
2914 
2915         ucnv_toUnicode (conv,
2916                 &targ,
2917                 end,
2918                 (const char **)&src,
2919                 (const char *)srcLimit,
2920                 checkOffsets ? offs : NULL,
2921                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2922                 &status);
2923     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2924 
2925     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2926         char errChars[50]; /* should be sufficient */
2927         int8_t errLen = 50;
2928         UErrorCode err = U_ZERO_ERROR;
2929         const char* start= NULL;
2930         ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2931         if(U_FAILURE(err)){
2932             log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2933         }
2934         /* length of in invalid chars should be equal to returned length*/
2935         start = src - errLen;
2936         if(uprv_strncmp(errChars,start,errLen)!=0){
2937             log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2938         }
2939     }
2940     /* allow failure codes for the stop callback */
2941     if(U_FAILURE(status) &&
2942        (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2943     {
2944         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2945         return false;
2946     }
2947 
2948     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2949         sourcelen, targ-junkout);
2950     if(getTestOption(VERBOSITY_OPTION))
2951     {
2952 
2953         junk[0] = 0;
2954         offset_str[0] = 0;
2955 
2956         for(p = junkout;p<targ;p++)
2957         {
2958             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2959             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2960         }
2961 
2962         log_verbose(junk);
2963         printUSeq(expect, expectlen);
2964         if ( checkOffsets )
2965         {
2966             log_verbose("\nOffsets:");
2967             log_verbose(offset_str);
2968         }
2969         log_verbose("\n");
2970     }
2971     ucnv_close(conv);
2972 
2973     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2974 
2975     if (checkOffsets && (expectOffsets != 0))
2976     {
2977         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2978         {
2979             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2980             log_err("Got offsets:      ");
2981             for(p=junkout;p<targ;p++)
2982                 log_err("  %2d,", junokout[p-junkout]);
2983             log_err("\n");
2984             log_err("Expected offsets: ");
2985             for(i=0; i<(targ-junkout); i++)
2986                 log_err("  %2d,", expectOffsets[i]);
2987             log_err("\n");
2988             log_err("Got output:       ");
2989             for(i=0; i<(targ-junkout); i++)
2990                 log_err("0x%04x,", junkout[i]);
2991             log_err("\n");
2992             log_err("From source:      ");
2993             for(i=0; i<(src-(const char *)source); i++)
2994                 log_err("  0x%02x,", (unsigned char)source[i]);
2995             log_err("\n");
2996         }
2997     }
2998 
2999     if(!memcmp(junkout, expect, expectlen*2))
3000     {
3001         log_verbose("Matches!\n");
3002         return true;
3003     }
3004     else
3005     {
3006         log_err("String does not match. %s\n", gNuConvTestName);
3007         log_verbose("String does not match. %s\n", gNuConvTestName);
3008         log_err("Got:      ");
3009         printUSeqErr(junkout, expectlen);
3010         log_err("Expected: ");
3011         printUSeqErr(expect, expectlen);
3012         log_err("\n");
3013         return false;
3014     }
3015 }
3016 
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3017 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3018                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3019                 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3020 {
3021 
3022 
3023     UErrorCode status = U_ZERO_ERROR;
3024     UConverter *conv = 0;
3025     char junkout[NEW_MAX_BUFFER]; /* FIX */
3026     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3027     const UChar *src;
3028     char *end;
3029     char *targ;
3030     int32_t *offs;
3031     int i;
3032     int32_t  realBufferSize;
3033     char *realBufferEnd;
3034     const UChar *realSourceEnd;
3035     const UChar *sourceLimit;
3036     UBool checkOffsets = true;
3037     UBool doFlush;
3038     char junk[9999];
3039     char offset_str[9999];
3040     char *p;
3041     UConverterFromUCallback oldAction = NULL;
3042     const void* oldContext = NULL;
3043 
3044 
3045     for(i=0;i<NEW_MAX_BUFFER;i++)
3046         junkout[i] = (char)0xF0;
3047     for(i=0;i<NEW_MAX_BUFFER;i++)
3048         junokout[i] = 0xFF;
3049     setNuConvTestName(codepage, "FROM");
3050 
3051     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3052             gOutBufferSize);
3053 
3054     conv = ucnv_open(codepage, &status);
3055     if(U_FAILURE(status))
3056     {
3057         log_data_err("Couldn't open converter %s\n",codepage);
3058         return true; /* Because the err has already been logged. */
3059     }
3060 
3061     log_verbose("Converter opened..\n");
3062 
3063     /*----setting the callback routine----*/
3064     ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3065     if (U_FAILURE(status))
3066     {
3067         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3068     }
3069     /*------------------------*/
3070     /*setting the subChar*/
3071     if(mySubChar != NULL){
3072         ucnv_setSubstChars(conv, mySubChar, len, &status);
3073         if (U_FAILURE(status))  {
3074             log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3075         }
3076     }
3077     /*------------*/
3078 
3079     src = source;
3080     targ = junkout;
3081     offs = junokout;
3082 
3083     realBufferSize = UPRV_LENGTHOF(junkout);
3084     realBufferEnd = junkout + realBufferSize;
3085     realSourceEnd = source + sourceLen;
3086 
3087     if ( gOutBufferSize != realBufferSize )
3088       checkOffsets = false;
3089 
3090     if( gInBufferSize != NEW_MAX_BUFFER )
3091       checkOffsets = false;
3092 
3093     do
3094     {
3095         end = nct_min(targ + gOutBufferSize, realBufferEnd);
3096         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3097 
3098         doFlush = (UBool)(sourceLimit == realSourceEnd);
3099 
3100         if(targ == realBufferEnd)
3101         {
3102             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3103             return false;
3104         }
3105         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
3106 
3107 
3108         status = U_ZERO_ERROR;
3109 
3110         ucnv_fromUnicode (conv,
3111                   (char **)&targ,
3112                   (const char *)end,
3113                   &src,
3114                   sourceLimit,
3115                   checkOffsets ? offs : NULL,
3116                   doFlush, /* flush if we're at the end of the input data */
3117                   &status);
3118     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3119 
3120     /* allow failure codes for the stop callback */
3121     if(U_FAILURE(status) && status != expectedError)
3122     {
3123         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3124         return false;
3125     }
3126 
3127     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3128         sourceLen, targ-junkout);
3129     if(getTestOption(VERBOSITY_OPTION))
3130     {
3131 
3132         junk[0] = 0;
3133         offset_str[0] = 0;
3134         for(p = junkout;p<targ;p++)
3135         {
3136             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3137             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3138         }
3139 
3140         log_verbose(junk);
3141         printSeq(expect, expectLen);
3142         if ( checkOffsets )
3143         {
3144             log_verbose("\nOffsets:");
3145             log_verbose(offset_str);
3146         }
3147         log_verbose("\n");
3148     }
3149     ucnv_close(conv);
3150 
3151 
3152     if(expectLen != targ-junkout)
3153     {
3154         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3155         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3156         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3157         printSeqErr(expect, expectLen);
3158         return false;
3159     }
3160 
3161     if (checkOffsets && (expectOffsets != 0) )
3162     {
3163         log_verbose("comparing %d offsets..\n", targ-junkout);
3164         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3165             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3166             log_err("Got Output : ");
3167             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3168             log_err("Got Offsets:      ");
3169             for(p=junkout;p<targ;p++)
3170                 log_err("%d,", junokout[p-junkout]);
3171             log_err("\n");
3172             log_err("Expected Offsets: ");
3173             for(i=0; i<(targ-junkout); i++)
3174                 log_err("%d,", expectOffsets[i]);
3175             log_err("\n");
3176             return false;
3177         }
3178     }
3179 
3180     if(!memcmp(junkout, expect, expectLen))
3181     {
3182         log_verbose("String matches! %s\n", gNuConvTestName);
3183         return true;
3184     }
3185     else
3186     {
3187         log_err("String does not match. %s\n", gNuConvTestName);
3188         log_err("source: ");
3189         printUSeqErr(source, sourceLen);
3190         log_err("Got:      ");
3191         printSeqErr((const uint8_t *)junkout, expectLen);
3192         log_err("Expected: ");
3193         printSeqErr(expect, expectLen);
3194         return false;
3195     }
3196 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3197 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3198                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3199                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3200 {
3201     UErrorCode status = U_ZERO_ERROR;
3202     UConverter *conv = 0;
3203     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3204     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3205     const char *src;
3206     const char *realSourceEnd;
3207     const char *srcLimit;
3208     UChar *targ;
3209     UChar *end;
3210     int32_t *offs;
3211     int i;
3212     UBool   checkOffsets = true;
3213     char junk[9999];
3214     char offset_str[9999];
3215     UChar *p;
3216     UConverterToUCallback oldAction = NULL;
3217     const void* oldContext = NULL;
3218 
3219     int32_t   realBufferSize;
3220     UChar *realBufferEnd;
3221 
3222 
3223     for(i=0;i<NEW_MAX_BUFFER;i++)
3224         junkout[i] = 0xFFFE;
3225 
3226     for(i=0;i<NEW_MAX_BUFFER;i++)
3227         junokout[i] = -1;
3228 
3229     setNuConvTestName(codepage, "TO");
3230 
3231     log_verbose("\n=========  %s\n", gNuConvTestName);
3232 
3233     conv = ucnv_open(codepage, &status);
3234     if(U_FAILURE(status))
3235     {
3236         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3237         return true;
3238     }
3239 
3240     log_verbose("Converter opened..\n");
3241 
3242     src = (const char *)source;
3243     targ = junkout;
3244     offs = junokout;
3245 
3246     realBufferSize = UPRV_LENGTHOF(junkout);
3247     realBufferEnd = junkout + realBufferSize;
3248     realSourceEnd = src + sourcelen;
3249     /*----setting the callback routine----*/
3250     ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3251     if (U_FAILURE(status))
3252     {
3253         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3254     }
3255     /*-------------------------------------*/
3256     /*setting the subChar*/
3257     if(mySubChar != NULL){
3258         ucnv_setSubstChars(conv, mySubChar, len, &status);
3259         if (U_FAILURE(status))  {
3260             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3261         }
3262     }
3263     /*------------*/
3264 
3265 
3266     if ( gOutBufferSize != realBufferSize )
3267         checkOffsets = false;
3268 
3269     if( gInBufferSize != NEW_MAX_BUFFER )
3270         checkOffsets = false;
3271 
3272     do
3273     {
3274         end = nct_min( targ + gOutBufferSize, realBufferEnd);
3275         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3276 
3277         if(targ == realBufferEnd)
3278         {
3279             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3280             return false;
3281         }
3282         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3283 
3284 
3285 
3286         status = U_ZERO_ERROR;
3287 
3288         ucnv_toUnicode (conv,
3289                 &targ,
3290                 end,
3291                 (const char **)&src,
3292                 (const char *)srcLimit,
3293                 checkOffsets ? offs : NULL,
3294                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3295                 &status);
3296     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3297 
3298     /* allow failure codes for the stop callback */
3299     if(U_FAILURE(status) && status!=expectedError)
3300     {
3301         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3302         return false;
3303     }
3304 
3305     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3306         sourcelen, targ-junkout);
3307     if(getTestOption(VERBOSITY_OPTION))
3308     {
3309 
3310         junk[0] = 0;
3311         offset_str[0] = 0;
3312 
3313         for(p = junkout;p<targ;p++)
3314         {
3315             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3316             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3317         }
3318 
3319         log_verbose(junk);
3320         printUSeq(expect, expectlen);
3321         if ( checkOffsets )
3322         {
3323             log_verbose("\nOffsets:");
3324             log_verbose(offset_str);
3325         }
3326         log_verbose("\n");
3327     }
3328     ucnv_close(conv);
3329 
3330     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3331 
3332     if (checkOffsets && (expectOffsets != 0))
3333     {
3334         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3335         {
3336             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3337             log_err("Got offsets:      ");
3338             for(p=junkout;p<targ;p++)
3339                 log_err("  %2d,", junokout[p-junkout]);
3340             log_err("\n");
3341             log_err("Expected offsets: ");
3342             for(i=0; i<(targ-junkout); i++)
3343                 log_err("  %2d,", expectOffsets[i]);
3344             log_err("\n");
3345             log_err("Got output:       ");
3346             for(i=0; i<(targ-junkout); i++)
3347                 log_err("0x%04x,", junkout[i]);
3348             log_err("\n");
3349             log_err("From source:      ");
3350             for(i=0; i<(src-(const char *)source); i++)
3351                 log_err("  0x%02x,", (unsigned char)source[i]);
3352             log_err("\n");
3353         }
3354     }
3355 
3356     if(!memcmp(junkout, expect, expectlen*2))
3357     {
3358         log_verbose("Matches!\n");
3359         return true;
3360     }
3361     else
3362     {
3363         log_err("String does not match. %s\n", gNuConvTestName);
3364         log_verbose("String does not match. %s\n", gNuConvTestName);
3365         log_err("Got:      ");
3366         printUSeqErr(junkout, expectlen);
3367         log_err("Expected: ");
3368         printUSeqErr(expect, expectlen);
3369         log_err("\n");
3370         return false;
3371     }
3372 }
3373 
TestCallBackFailure(void)3374 static void TestCallBackFailure(void) {
3375     UErrorCode status = U_USELESS_COLLATOR_ERROR;
3376     ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3377     if (status != U_USELESS_COLLATOR_ERROR) {
3378         log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3379     }
3380     ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3381     if (status != U_USELESS_COLLATOR_ERROR) {
3382         log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3383     }
3384     ucnv_cbFromUWriteSub(NULL, -1, &status);
3385     if (status != U_USELESS_COLLATOR_ERROR) {
3386         log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3387     }
3388     ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3389     if (status != U_USELESS_COLLATOR_ERROR) {
3390         log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3391     }
3392 }
3393