• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 *        Name                            Description
14 *    Madhu Katragadda     7/21/1999      Testing error callback routines
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
26 #include "cintltst.h"
27 #include "unicode/utypes.h"
28 #include "unicode/ustring.h"
29 #include "nccbtst.h"
30 #include "unicode/ucnv_cb.h"
31 #include "unicode/utf16.h"
32 
33 #define NEW_MAX_BUFFER 999
34 
35 #define nct_min(x,y)  ((x<y) ? x : y)
36 
37 static int32_t  gInBufferSize = 0;
38 static int32_t  gOutBufferSize = 0;
39 static char     gNuConvTestName[1024];
40 
printSeq(const uint8_t * a,int len)41 static void printSeq(const uint8_t* a, int len)
42 {
43     int i=0;
44     log_verbose("\n{");
45     while (i<len)
46         log_verbose("0x%02X, ", a[i++]);
47     log_verbose("}\n");
48 }
49 
printUSeq(const UChar * a,int len)50 static void printUSeq(const UChar* a, int len)
51 {
52     int i=0;
53     log_verbose("{");
54     while (i<len)
55         log_verbose("  0x%04x, ", a[i++]);
56     log_verbose("}\n");
57 }
58 
printSeqErr(const uint8_t * a,int len)59 static void printSeqErr(const uint8_t* a, int len)
60 {
61     int i=0;
62     fprintf(stderr, "{");
63     while (i<len)
64         fprintf(stderr, "  0x%02x, ", a[i++]);
65     fprintf(stderr, "}\n");
66 }
67 
printUSeqErr(const UChar * a,int len)68 static void printUSeqErr(const UChar* a, int len)
69 {
70     int i=0;
71     fprintf(stderr, "{");
72     while (i<len)
73         fprintf(stderr, "0x%04x, ", a[i++]);
74     fprintf(stderr,"}\n");
75 }
76 
setNuConvTestName(const char * codepage,const char * direction)77 static void setNuConvTestName(const char *codepage, const char *direction)
78 {
79     sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
80             codepage,
81             direction,
82             (int)gInBufferSize,
83             (int)gOutBufferSize);
84 }
85 
86 
87 static void TestCallBackFailure(void);
88 
89 void addTestConvertErrorCallBack(TestNode** root);
90 
addTestConvertErrorCallBack(TestNode ** root)91 void addTestConvertErrorCallBack(TestNode** root)
92 {
93     addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
94     addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
95     addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
96     addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
97 
98 #if !UCONFIG_NO_LEGACY_CONVERSION
99     addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
100     addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
101 #endif
102 
103     addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
104 }
105 
TestSkipCallBack()106 static void TestSkipCallBack()
107 {
108     TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
109     TestSkip(1,NEW_MAX_BUFFER);
110     TestSkip(1,1);
111     TestSkip(NEW_MAX_BUFFER, 1);
112 }
113 
TestStopCallBack()114 static void TestStopCallBack()
115 {
116     TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
117     TestStop(1,NEW_MAX_BUFFER);
118     TestStop(1,1);
119     TestStop(NEW_MAX_BUFFER, 1);
120 }
121 
TestSubCallBack()122 static void TestSubCallBack()
123 {
124     TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
125     TestSub(1,NEW_MAX_BUFFER);
126     TestSub(1,1);
127     TestSub(NEW_MAX_BUFFER, 1);
128 
129 #if !UCONFIG_NO_LEGACY_CONVERSION
130     TestEBCDIC_STATEFUL_Sub(1, 1);
131     TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
132     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
133     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
134 #endif
135 }
136 
TestSubWithValueCallBack()137 static void TestSubWithValueCallBack()
138 {
139     TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
140     TestSubWithValue(1,NEW_MAX_BUFFER);
141     TestSubWithValue(1,1);
142     TestSubWithValue(NEW_MAX_BUFFER, 1);
143 }
144 
145 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()146 static void TestLegalAndOtherCallBack()
147 {
148     TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
149     TestLegalAndOthers(1,NEW_MAX_BUFFER);
150     TestLegalAndOthers(1,1);
151     TestLegalAndOthers(NEW_MAX_BUFFER, 1);
152 }
153 
TestSingleByteCallBack()154 static void TestSingleByteCallBack()
155 {
156     TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
157     TestSingleByte(1,NEW_MAX_BUFFER);
158     TestSingleByte(1,1);
159     TestSingleByte(NEW_MAX_BUFFER, 1);
160 }
161 #endif
162 
TestSkip(int32_t inputsize,int32_t outputsize)163 static void TestSkip(int32_t inputsize, int32_t outputsize)
164 {
165     static const uint8_t expskipIBM_949[]= {
166         0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
167 
168     static const uint8_t expskipIBM_943[] = {
169         0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
170 
171     static const uint8_t expskipIBM_930[] = {
172         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
173 
174     gInBufferSize = inputsize;
175     gOutBufferSize = outputsize;
176 
177     /*From Unicode*/
178     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
179 
180 #if !UCONFIG_NO_LEGACY_CONVERSION
181     {
182         static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
183         static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
184 
185         static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
186         static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
187 
188         if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
189                 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
190                 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
191             log_err("u-> ibm-949 with skip did not match.\n");
192         if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
193                 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
194                 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
195             log_err("u-> ibm-943 with skip did not match.\n");
196     }
197 
198     {
199         static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
200         static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
201         static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
202 
203         /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
204         if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
205                                    fromUBytes, UPRV_LENGTHOF(fromUBytes),
206                                    "ibm-930",
207                                    UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
208                                    NULL, 0)
209         ) {
210             log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
211         }
212     }
213 #endif
214 
215     {
216         static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
217         static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
218         static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
219 
220         static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221         static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
222         static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
223 
224         /* US-ASCII */
225         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
226                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
227                                    "US-ASCII",
228                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
229                                    NULL, 0)
230         ) {
231             log_err("u->US-ASCII with skip did not match.\n");
232         }
233 
234 #if !UCONFIG_NO_LEGACY_CONVERSION
235         /* SBCS NLTC codepage 367 for US-ASCII */
236         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
237                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
238                                    "ibm-367",
239                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
240                                    NULL, 0)
241         ) {
242             log_err("u->ibm-367 with skip did not match.\n");
243         }
244 #endif
245 
246         /* ISO-Latin-1 */
247         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
248                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
249                                    "LATIN_1",
250                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
251                                    NULL, 0)
252         ) {
253             log_err("u->LATIN_1 with skip did not match.\n");
254         }
255 
256 #if !UCONFIG_NO_LEGACY_CONVERSION
257         /* windows-1252 */
258         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
259                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
260                                    "windows-1252",
261                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
262                                    NULL, 0)
263         ) {
264             log_err("u->windows-1252 with skip did not match.\n");
265         }
266     }
267 
268     {
269         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
270         static const uint8_t toIBM943[]= { 0x61, 0x61 };
271         static const int32_t offset[]= {0, 4};
272 
273          /* EUC_JP*/
274         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
275         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
276             0x61, 0x8e, 0xe0,
277         };
278         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
279 
280         /*EUC_TW*/
281         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
282         static const uint8_t to_euc_tw[]={
283             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
284             0x61, 0xe6, 0xca, 0x8a,
285         };
286         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
287 
288         /*ISO-2022-JP*/
289         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
290         static const uint8_t to_iso_2022_jp[]={
291             0x41,
292             0x42,
293 
294         };
295         static const int32_t from_iso_2022_jpOffs [] ={0,2};
296 
297         /*ISO-2022-JP*/
298         UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
299         static const uint8_t to_iso_2022_jp2[]={
300             0x41,
301             0x43,
302 
303         };
304         static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
305 
306         /*ISO-2022-cn*/
307         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
308         static const uint8_t to_iso_2022_cn[]={
309             0x41, 0x42
310         };
311         static const int32_t from_iso_2022_cnOffs [] ={
312             0, 2
313         };
314 
315         /*ISO-2022-CN*/
316         static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
317         static const uint8_t to_iso_2022_cn1[]={
318             0x41, 0x43
319 
320         };
321         static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
322 
323         /*ISO-2022-kr*/
324         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
325         static const uint8_t to_iso_2022_kr[]={
326             0x1b,   0x24,   0x29,   0x43,
327             0x41,
328             0x0e,   0x25,   0x50,
329             0x25,   0x50,
330             0x0f,   0x42,
331         };
332         static const int32_t from_iso_2022_krOffs [] ={
333             -1,-1,-1,-1,
334             0,
335             1,1,1,
336             3,3,
337             4,4
338         };
339 
340         /*ISO-2022-kr*/
341         static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
342         static const uint8_t to_iso_2022_kr1[]={
343             0x1b,   0x24,   0x29,   0x43,
344             0x41,
345             0x0e,   0x25,   0x50,
346             0x25,   0x50,
347 
348         };
349         static const int32_t from_iso_2022_krOffs1 [] ={
350             -1,-1,-1,-1,
351             0,
352             1,1,1,
353             3,3,
354 
355         };
356         /* HZ encoding */
357         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
358 
359         static const uint8_t to_hz[]={
360             0x7e,   0x7d,   0x41,
361             0x7e,   0x7b,   0x26,   0x30,
362             0x26,   0x30,
363             0x7e,   0x7d,   0x42,
364 
365         };
366         static const int32_t from_hzOffs [] ={
367             0,0,0,
368             1,1,1,1,
369             3,3,
370             4,4,4,4
371         };
372 
373         static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
374 
375         static const uint8_t to_hz1[]={
376             0x7e,   0x7d,   0x41,
377             0x7e,   0x7b,   0x26,   0x30,
378             0x26,   0x30,
379 
380 
381         };
382         static const int32_t from_hzOffs1 [] ={
383             0,0,0,
384             1,1,1,1,
385             3,3,
386 
387         };
388 
389 #endif
390 
391         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
392 
393         static const uint8_t to_SCSU[]={
394             0x41,
395             0x42
396 
397 
398         };
399         static const int32_t from_SCSUOffs [] ={
400             0,
401             2,
402 
403         };
404 
405 #if !UCONFIG_NO_LEGACY_CONVERSION
406         /* ISCII */
407         static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
408         static const uint8_t to_iscii[]={
409             0x41,
410             0x42,
411         };
412         static const int32_t from_isciiOffs [] ={
413             0,2,
414 
415         };
416         /*ISCII*/
417         static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
418         static const uint8_t to_iscii1[]={
419             0x44,
420             0x43,
421 
422         };
423         static const int32_t from_isciiOffs1 [] ={0,2};
424 
425         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
426                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
427                 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
428             log_err("u-> ibm-943 with skip did not match.\n");
429 
430         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
431                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
432                 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
433             log_err("u-> euc-jp with skip did not match.\n");
434 
435         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
436                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
437                 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
438             log_err("u-> euc-tw with skip did not match.\n");
439 
440         /*iso_2022_jp*/
441         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
442                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
443                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
444             log_err("u-> iso-2022-jp with skip did not match.\n");
445 
446         /* with context */
447         if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
448                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
449                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
450             log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
451 
452         /*iso_2022_cn*/
453         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
454                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
455                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
456             log_err("u-> iso-2022-cn with skip did not match.\n");
457         /*with context*/
458         if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
459                 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
460                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
461             log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
462 
463         /*iso_2022_kr*/
464         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
465                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
466                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
467             log_err("u-> iso-2022-kr with skip did not match.\n");
468           /*with context*/
469         if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
470                 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
471                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
472             log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
473 
474         /*hz*/
475         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
476                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
477                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
478             log_err("u-> HZ with skip did not match.\n");
479           /*with context*/
480         if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
481                 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
482                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
483             log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
484 #endif
485 
486         /*SCSU*/
487         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
488                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
489                 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
490             log_err("u-> SCSU with skip did not match.\n");
491 
492 #if !UCONFIG_NO_LEGACY_CONVERSION
493         /*ISCII*/
494         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
495                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
496                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
497             log_err("u-> iscii with skip did not match.\n");
498         /*with context*/
499         if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
500                 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
501                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
502             log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
503 #endif
504     }
505 
506     log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
507     {
508         static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
509             0xFB, 0xEE, 0x28,       /* from source offset 0 */
510             0x24, 0x1E, 0x52,
511             0xB2,
512             0x20,
513             0xB3,
514             0xB1,
515             0x0D,
516             0x0A,
517 
518             0x20,                   /* from 8 */
519             0x00,
520             0xD0, 0x6C,
521             0xB6,
522             0xD8, 0xA5,
523             0x20,
524             0x68,
525             0x59,
526 
527             0xF9, 0x28,             /* from 16 */
528             0x6D,
529             0x20,
530             0x73,
531             0xE0, 0x2D,
532             0xDE, 0x43,
533             0xD0, 0x33,
534             0x20,
535 
536             0xFA, 0x83,             /* from 24 */
537             0x25, 0x01,
538             0xFB, 0x16, 0x87,
539             0x4B, 0x16,
540             0x20,
541             0xE6, 0xBD,
542             0xEB, 0x5B,
543             0x4B, 0xCC,
544 
545             0xF9, 0xA2,             /* from 32 */
546             0xFC, 0x10, 0x3E,
547             0xFE, 0x16, 0x3A, 0x8C,
548             0x20,
549             0xFC, 0x03, 0xAC,
550 
551             0x01,                   /* from 41 */
552             0xDE, 0x83,
553             0x20,
554             0x09
555         };
556         static const UChar expected[]={
557             0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
558             0x0063, 0x0061, 0x000D, 0x000A,
559 
560             0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
561             0x0930, 0x0020, 0x0918, 0x0909,
562 
563             0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
564             0x4000, 0x4E00, 0x7777, 0x0020,
565 
566             0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
567             0x0020, 0xD7A3, 0xDC00, 0xD800,
568 
569             0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
570             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
571 
572             0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
573             0x0009
574         };
575         static const int32_t offsets[]={
576             0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
577             8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
578             16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
579             24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
580             32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
581             41, 42, 42, 43, 44
582         };
583 
584         /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
585         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
586                                  sampleText, UPRV_LENGTHOF(sampleText),
587                                  "BOCU-1",
588                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
589         ) {
590             log_err("u->BOCU-1 with skip did not match.\n");
591         }
592     }
593 
594     log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
595     {
596         const uint8_t sampleText[]={
597             0x61,                               /* 'a' */
598             0xc4, 0xb5,                         /* U+0135 */
599             0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
600             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
601             0xee, 0x80, 0x80,                   /* PUA U+e000 */
602             0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
603             0x62,                               /* 'b' */
604             0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
605             0xd0, 0x80                          /* U+0400 */
606         };
607         UChar expected[]={
608             0x0061,
609             0x0135,
610             0xd020,
611             0xd801, 0xdc01,
612             0xe000,
613             0xdc01,
614             0x0062,
615             0xd801,
616             0x0400
617         };
618         int32_t offsets[]={
619             0,
620             1, 1,
621             2, 2, 2,
622             3, 3, 3, 4, 4, 4,
623             5, 5, 5,
624             6, 6, 6,
625             7,
626             8, 8, 8,
627             9, 9
628         };
629 
630         /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
631 
632         /* without offsets */
633         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
634                                  sampleText, UPRV_LENGTHOF(sampleText),
635                                  "CESU-8",
636                                  UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
637         ) {
638             log_err("u->CESU-8 with skip did not match.\n");
639         }
640 
641         /* with offsets */
642         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
643                                  sampleText, UPRV_LENGTHOF(sampleText),
644                                  "CESU-8",
645                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
646         ) {
647             log_err("u->CESU-8 with skip did not match.\n");
648         }
649     }
650 
651     /*to Unicode*/
652     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
653 
654 #if !UCONFIG_NO_LEGACY_CONVERSION
655     {
656 
657         static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
658         static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
659         static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
660 
661         static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
662         static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
663         static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
664 
665         if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
666                  IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
667                 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
668             log_err("ibm-949->u with skip did not match.\n");
669         if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
670                  IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
671                 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
672             log_err("ibm-943->u with skip did not match.\n");
673 
674 
675         if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
676                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
677                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
678             log_err("ibm-930->u with skip did not match.\n");
679 
680 
681         if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
682                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
683                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
684             log_err("ibm-930->u with skip did not match.\n");
685     }
686 #endif
687 
688     {
689         static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
690         static const UChar usasciiToU[] = { 0x61, 0x31 };
691         static const int32_t usasciiToUOffsets[] = { 0, 2 };
692 
693         static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
694         static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
695         static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
696 
697         /* US-ASCII */
698         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
699                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
700                                  "US-ASCII",
701                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
702                                  NULL, 0)
703         ) {
704             log_err("US-ASCII->u with skip did not match.\n");
705         }
706 
707 #if !UCONFIG_NO_LEGACY_CONVERSION
708         /* SBCS NLTC codepage 367 for US-ASCII */
709         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
710                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
711                                  "ibm-367",
712                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
713                                  NULL, 0)
714         ) {
715             log_err("ibm-367->u with skip did not match.\n");
716         }
717 #endif
718 
719         /* ISO-Latin-1 */
720         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
721                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
722                                  "LATIN_1",
723                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
724                                  NULL, 0)
725         ) {
726             log_err("LATIN_1->u with skip did not match.\n");
727         }
728 
729 #if !UCONFIG_NO_LEGACY_CONVERSION
730         /* windows-1252 */
731         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
732                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
733                                  "windows-1252",
734                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
735                                  NULL, 0)
736         ) {
737             log_err("windows-1252->u with skip did not match.\n");
738         }
739 #endif
740     }
741 
742 #if !UCONFIG_NO_LEGACY_CONVERSION
743     {
744         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
745             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
746         };
747         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
748         };
749         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
750 
751 
752          /* euc-jp*/
753         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
754             0x8f, 0xda, 0xa1,  /*unassigned*/
755            0x8e, 0xe0,
756         };
757         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
758         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
759 
760          /*EUC_TW*/
761         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
762             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
763            0xe6, 0xca, 0x8a,
764         };
765         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
766         static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
767                 /*iso-2022-jp*/
768         static const uint8_t sampleTxt_iso_2022_jp[]={
769             0x41,
770             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
771             0x1b,   0x28,   0x42,   0x42,
772 
773         };
774         static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
775         static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
776 
777         /*iso-2022-cn*/
778         static const uint8_t sampleTxt_iso_2022_cn[]={
779             0x0f,   0x41,   0x44,
780             0x1B,   0x24,   0x29,   0x47,
781             0x0E,   0x40,   0x6f, /*unassigned*/
782             0x0f,   0x42,
783 
784         };
785 
786         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
787         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
788 
789         /*iso-2022-kr*/
790         static const uint8_t sampleTxt_iso_2022_kr[]={
791           0x1b, 0x24, 0x29,  0x43,
792           0x41,
793           0x0E, 0x7f, 0x1E,
794           0x0e, 0x25, 0x50,
795           0x0f, 0x51,
796           0x42, 0x43,
797 
798         };
799         static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
800         static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
801 
802         /*hz*/
803         static const uint8_t sampleTxt_hz[]={
804             0x41,
805             0x7e,   0x7b,   0x26,   0x30,
806             0x7f,   0x1E, /*unassigned*/
807             0x26,   0x30,
808             0x7e,   0x7d,   0x42,
809             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
810             0x7e,   0x7d,   0x42,
811         };
812         static const UChar hztoUnicode[]={
813             0x41,
814             0x03a0,
815             0x03A0,
816             0x42,
817             0x42,};
818 
819         static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
820 
821         /*ISCII*/
822         static const uint8_t sampleTxt_iscii[]={
823             0x41,
824             0xa1,
825             0xEB,    /*unassigned*/
826             0x26,
827             0x30,
828             0xa2,
829             0xEC,    /*unassigned*/
830             0x42,
831         };
832         static const UChar isciitoUnicode[]={
833             0x41,
834             0x0901,
835             0x26,
836             0x30,
837             0x0902,
838             0x42,
839             };
840 
841         static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
842 
843         /*LMBCS*/
844         static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
845             0x12, 0x92, 0xa0, /*unassigned*/
846             0x12, 0x92, 0xA1,
847         };
848         static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
849         static const int32_t fromLMBCS[] = {0, 6};
850 
851         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
852              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
853             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
854         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
855 
856         if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
857              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
858             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
859         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
860 
861         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
862                  euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
863                 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
864             log_err("euc-jp->u with skip did not match.\n");
865 
866 
867 
868         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
869                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
870                 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
871             log_err("euc-tw->u with skip did not match.\n");
872 
873 
874         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
875                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
876                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
877             log_err("iso-2022-jp->u with skip did not match.\n");
878 
879         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
880                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
881                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
882             log_err("iso-2022-cn->u with skip did not match.\n");
883 
884         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
885                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
886                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
887             log_err("iso-2022-kr->u with skip did not match.\n");
888 
889         if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
890                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
891                 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
892             log_err("HZ->u with skip did not match.\n");
893 
894         if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
895                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
896                 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
897             log_err("iscii->u with skip did not match.\n");
898 
899         if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
900                 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
901                 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
902             log_err("LMBCS->u with skip did not match.\n");
903 
904     }
905 #endif
906 
907     log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
908     {
909         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
910             0xe0, 0x80,  0x61,};
911         UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
912         int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
913 
914         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
915                  expected1, UPRV_LENGTHOF(expected1),"utf8",
916                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
917             log_err("utf8->u with skip did not match.\n");
918     }
919 
920     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
921     {
922         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
923         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
924         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
925 
926         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
927                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
928                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
929             log_err("scsu->u with skip did not match.\n");
930     }
931 
932     log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
933     {
934         const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
935             0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
936             0x24, 0x1E, 0x52,       /* 3 */
937             0xB2,                   /* 6 */
938             0x20,                   /* 7 */
939             0x40, 0x07,             /* 8 - wrong trail byte */
940             0xB3,                   /* 10 */
941             0xB1,                   /* 11 */
942             0xD0, 0x20,             /* 12 - wrong trail byte */
943             0x0D,                   /* 14 */
944             0x0A,                   /* 15 */
945             0x20,                   /* 16 */
946             0x00,                   /* 17 */
947             0xD0, 0x6C,             /* 18 */
948             0xB6,                   /* 20 */
949             0xD8, 0xA5,             /* 21 */
950             0x20,                   /* 23 */
951             0x68,                   /* 24 */
952             0x59,                   /* 25 */
953             0xF9, 0x28,             /* 26 */
954             0x6D,                   /* 28 */
955             0x20,                   /* 29 */
956             0x73,                   /* 30 */
957             0xE0, 0x2D,             /* 31 */
958             0xDE, 0x43,             /* 33 */
959             0xD0, 0x33,             /* 35 */
960             0x20,                   /* 37 */
961             0xFA, 0x83,             /* 38 */
962             0x25, 0x01,             /* 40 */
963             0xFB, 0x16, 0x87,       /* 42 */
964             0x4B, 0x16,             /* 45 */
965             0x20,                   /* 47 */
966             0xE6, 0xBD,             /* 48 */
967             0xEB, 0x5B,             /* 50 */
968             0x4B, 0xCC,             /* 52 */
969             0xF9, 0xA2,             /* 54 */
970             0xFC, 0x10, 0x3E,       /* 56 */
971             0xFE, 0x16, 0x3A, 0x8C, /* 59 */
972             0x20,                   /* 63 */
973             0xFC, 0x03, 0xAC,       /* 64 */
974             0xFF,                   /* 67 - FF just resets the state without encoding anything */
975             0x01,                   /* 68 */
976             0xDE, 0x83,             /* 69 */
977             0x20,                   /* 71 */
978             0x09                    /* 72 */
979         };
980         UChar expected[]={
981             0xFEFF, 0x0061, 0x0062, 0x0020,
982             0x0063, 0x0061, 0x000D, 0x000A,
983             0x0020, 0x0000, 0x00DF, 0x00E6,
984             0x0930, 0x0020, 0x0918, 0x0909,
985             0x3086, 0x304D, 0x0020, 0x3053,
986             0x4000, 0x4E00, 0x7777, 0x0020,
987             0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
988             0x0020, 0xD7A3, 0xDC00, 0xD800,
989             0xD800, 0xDC00, 0xD845, 0xDDDD,
990             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
991             0xDFFF, 0x0001, 0x0E40, 0x0020,
992             0x0009
993         };
994         int32_t offsets[]={
995             0, 3, 6, 7, /* skip 8, */
996             10, 11, /* skip 12, */
997             14, 15, 16, 17, 18,
998             20, 21, 23, 24, 25, 26, 28, 29,
999             30, 31, 33, 35, 37, 38,
1000             40, 42, 45, 47, 48,
1001             50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1002             63, 64, /* trail */ 64, /* reset only 67, */
1003             68, 69,
1004             71, 72
1005         };
1006 
1007         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1008                                  expected, UPRV_LENGTHOF(expected), "BOCU-1",
1009                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1010         ) {
1011             log_err("BOCU-1->u with skip did not match.\n");
1012         }
1013     }
1014 
1015     log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1016     {
1017         const uint8_t sampleText[]={
1018             0x61,                               /* 0  'a' */
1019             0xc0, 0x80,                         /* 1  non-shortest form */
1020             0xc4, 0xb5,                         /* 3  U+0135 */
1021             0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1022             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1023             0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1024             0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1025             0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1026             0x62,                               /* 24 'b' */
1027             0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1028             0xed, 0xa0,                         /* 28 incomplete sequence */
1029             0xd0, 0x80                          /* 30 U+0400 */
1030         };
1031         UChar expected[]={
1032             0x0061,
1033             /* skip */
1034             0x0135,
1035             0xd020,
1036             0xd801, 0xdc01,
1037             0xe000,
1038             0xdc01,
1039             /* skip */
1040             0x0062,
1041             0xd801,
1042             0x0400
1043         };
1044         int32_t offsets[]={
1045             0,
1046             /* skip 1, */
1047             3,
1048             5,
1049             8, 11,
1050             14,
1051             17,
1052             /* skip 20, 20, */
1053             24,
1054             25,
1055             /* skip 28 */
1056             30
1057         };
1058 
1059         /* without offsets */
1060         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1061                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1062                                  UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1063         ) {
1064             log_err("CESU-8->u with skip did not match.\n");
1065         }
1066 
1067         /* with offsets */
1068         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1069                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1070                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1071         ) {
1072             log_err("CESU-8->u with skip did not match.\n");
1073         }
1074     }
1075 }
1076 
TestStop(int32_t inputsize,int32_t outputsize)1077 static void TestStop(int32_t inputsize, int32_t outputsize)
1078 {
1079     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1080     static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1081 
1082     static const uint8_t expstopIBM_949[]= {
1083         0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1084 
1085     static const uint8_t expstopIBM_943[] = {
1086         0x9f, 0xaf, 0x9f, 0xb1};
1087 
1088     static const uint8_t expstopIBM_930[] = {
1089         0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1090 
1091     static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1092     static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1093     static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1094 
1095 
1096     static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1097     static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1098     static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1099 
1100     static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1101     static const int32_t  fromIBM943Offs [] = { 0, 2};
1102     static const int32_t  fromIBM930Offs [] = { 1, 3};
1103 
1104     gInBufferSize = inputsize;
1105     gOutBufferSize = outputsize;
1106 
1107     /*From Unicode*/
1108 
1109 #if !UCONFIG_NO_LEGACY_CONVERSION
1110     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1111             expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1112             UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1113         log_err("u-> ibm-949 with stop did not match.\n");
1114     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1115             expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1116             UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1117         log_err("u-> ibm-943 with stop did not match.\n");
1118     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1119             expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1120             UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1121         log_err("u-> ibm-930 with stop did not match.\n");
1122 
1123     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1124     {
1125         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1126         static const uint8_t toIBM943[]= { 0x61,};
1127         static const int32_t offset[]= {0,} ;
1128 
1129          /*EUC_JP*/
1130         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1131         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1132         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1133 
1134         /*EUC_TW*/
1135         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1136         static const uint8_t to_euc_tw[]={
1137             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1138         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1139 
1140         /*ISO-2022-JP*/
1141         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1142         static const uint8_t to_iso_2022_jp[]={
1143              0x41,
1144 
1145         };
1146         static const int32_t from_iso_2022_jpOffs [] ={0,};
1147 
1148         /*ISO-2022-cn*/
1149         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1150         static const uint8_t to_iso_2022_cn[]={
1151             0x41,
1152 
1153         };
1154         static const int32_t from_iso_2022_cnOffs [] ={
1155             0,0,
1156             2,2,
1157         };
1158 
1159         /*ISO-2022-kr*/
1160         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1161         static const uint8_t to_iso_2022_kr[]={
1162             0x1b,   0x24,   0x29,   0x43,
1163             0x41,
1164             0x0e,   0x25,   0x50,
1165         };
1166         static const int32_t from_iso_2022_krOffs [] ={
1167             -1,-1,-1,-1,
1168              0,
1169             1,1,1,
1170         };
1171 
1172         /* HZ encoding */
1173         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1174 
1175         static const uint8_t to_hz[]={
1176             0x7e,   0x7d, 0x41,
1177             0x7e,   0x7b,   0x26,   0x30,
1178 
1179         };
1180         static const int32_t from_hzOffs [] ={
1181             0, 0,0,
1182             1,1,1,1,
1183         };
1184 
1185         /*ISCII*/
1186         static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1187         static const uint8_t to_iscii[]={
1188             0x41,
1189         };
1190         static const int32_t from_isciiOffs [] ={
1191             0,
1192         };
1193 
1194         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1195                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1196                 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1197             log_err("u-> ibm-943 with stop did not match.\n");
1198 
1199         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1200                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1201                 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1202             log_err("u-> euc-jp with stop did not match.\n");
1203 
1204         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1205                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1206                 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1207             log_err("u-> euc-tw with stop did not match.\n");
1208 
1209         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1210                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1211                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1212             log_err("u-> iso-2022-jp with stop did not match.\n");
1213 
1214         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1215                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1216                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1217             log_err("u-> iso-2022-jp with stop did not match.\n");
1218 
1219         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1220                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1221                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1222             log_err("u-> iso-2022-cn with stop did not match.\n");
1223 
1224         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1225                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1226                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1227             log_err("u-> iso-2022-kr with stop did not match.\n");
1228 
1229         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1230                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1231                 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1232             log_err("u-> HZ with stop did not match.\n");\
1233 
1234         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1235                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1236                 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1237             log_err("u-> iscii with stop did not match.\n");
1238 
1239 
1240     }
1241 #endif
1242 
1243     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1244     {
1245         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1246 
1247         static const uint8_t to_SCSU[]={
1248             0x41,
1249 
1250         };
1251         int32_t from_SCSUOffs [] ={
1252             0,
1253 
1254         };
1255         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1256                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1257                 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1258             log_err("u-> SCSU with skip did not match.\n");
1259 
1260     }
1261 
1262     /*to Unicode*/
1263 
1264 #if !UCONFIG_NO_LEGACY_CONVERSION
1265     if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1266              IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1267             UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1268         log_err("ibm-949->u with stop did not match.\n");
1269     if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1270              IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1271             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1272         log_err("ibm-943->u with stop did not match.\n");
1273     if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1274              IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1275             UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1276         log_err("ibm-930->u with stop did not match.\n");
1277 
1278     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1279     {
1280 
1281         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1282             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1283         };
1284         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1285         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1286 
1287 
1288          /*EUC-JP*/
1289         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1290             0x8f, 0xda, 0xa1,  /*unassigned*/
1291            0x8e, 0xe0,
1292         };
1293         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1294         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1295 
1296           /*EUC_TW*/
1297         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1298             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1299            0xe6, 0xca, 0x8a,
1300         };
1301         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1302         int32_t from_euc_twOffs [] ={ 0, 1, 3};
1303 
1304 
1305 
1306          if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1307              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1308             UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1309         log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1310 
1311         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1312              euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1313             UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1314         log_err("euc-jp->u with stop did not match.\n");
1315 
1316         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1317                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1318                 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1319             log_err("euc-tw->u with stop did not match.\n");
1320     }
1321 #endif
1322 
1323     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1324     {
1325         static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1326             0xe0, 0x80,  0x61,};
1327         static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1328         static const int32_t offsets1[] = {   0x0000, 0x0001};
1329 
1330         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1331                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1332                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1333             log_err("utf8->u with stop did not match.\n");
1334     }
1335     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1336     {
1337         static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1338         static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1339         static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1340 
1341         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1342                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1343                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1344             log_err("scsu->u with stop did not match.\n");
1345     }
1346 
1347 }
1348 
TestSub(int32_t inputsize,int32_t outputsize)1349 static void TestSub(int32_t inputsize, int32_t outputsize)
1350 {
1351     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1352     static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1353 
1354     static const uint8_t expsubIBM_949[] =
1355      { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1356 
1357     static const uint8_t expsubIBM_943[] = {
1358         0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1359 
1360     static const uint8_t expsubIBM_930[] = {
1361         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1362 
1363     static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1364     static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1365     static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1366 
1367     static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1368     static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1369     static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1370 
1371     static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1372     static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1373     static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1374 
1375     gInBufferSize = inputsize;
1376     gOutBufferSize = outputsize;
1377 
1378     /*from unicode*/
1379 
1380 #if !UCONFIG_NO_LEGACY_CONVERSION
1381     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1382             expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1383             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1384         log_err("u-> ibm-949 with subst did not match.\n");
1385     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1386             expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1387             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1388         log_err("u-> ibm-943 with subst did not match.\n");
1389     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1390             expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1391             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1392         log_err("u-> ibm-930 with subst did not match.\n");
1393 
1394     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1395     {
1396         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1397         static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1398         static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1399 
1400 
1401         /* EUC_JP*/
1402         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1403         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1404             0xf4, 0xfe, 0xf4, 0xfe,
1405             0x61, 0x8e, 0xe0,
1406         };
1407         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1408 
1409         /*EUC_TW*/
1410         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1411         static const uint8_t to_euc_tw[]={
1412             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1413             0xfd, 0xfe, 0xfd, 0xfe,
1414             0x61, 0xe6, 0xca, 0x8a,
1415         };
1416 
1417         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1418 
1419         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1420                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1421                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1422             log_err("u-> ibm-943 with substitute did not match.\n");
1423 
1424         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1425                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1426                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1427             log_err("u-> euc-jp with substitute did not match.\n");
1428 
1429         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1430                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1431                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1432             log_err("u-> euc-tw with substitute did not match.\n");
1433     }
1434 #endif
1435 
1436     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1437     {
1438         UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1439 
1440         const uint8_t to_SCSU[]={
1441             0x41,
1442             0x0e, 0xff,0xfd,
1443             0x42
1444 
1445 
1446         };
1447         int32_t from_SCSUOffs [] ={
1448             0,
1449             1,1,1,
1450             2,
1451 
1452         };
1453         const uint8_t to_SCSU_1[]={
1454             0x41,
1455 
1456         };
1457         int32_t from_SCSUOffs_1 [] ={
1458             0,
1459 
1460         };
1461         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1462                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1463                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1464             log_err("u-> SCSU with substitute did not match.\n");
1465 
1466         if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1467                 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1468                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1469             log_err("u-> SCSU with substitute did not match.\n");
1470     }
1471 
1472     log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1473     {
1474         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1475         static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1476                            0xf0, 0x90, 0x90, 0x81,
1477                            0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1478                            0xef, 0xbf, 0xbf, 0x61,
1479 
1480         };
1481         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1482         if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1483                 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1484                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1485             log_err("u-> utf8 with substitute did not match.\n");
1486         }
1487     }
1488 
1489     log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1490     {
1491         static const UChar in[]={ 0x0041, 0xfeff };
1492 
1493         static const uint8_t out[]={
1494 #if U_IS_BIG_ENDIAN
1495             0xfe, 0xff,
1496             0x00, 0x41,
1497             0xfe, 0xff
1498 #else
1499             0xff, 0xfe,
1500             0x41, 0x00,
1501             0xff, 0xfe
1502 #endif
1503         };
1504         static const int32_t offsets[]={
1505             -1, -1, 0, 0, 1, 1
1506         };
1507 
1508         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1509                                    out, UPRV_LENGTHOF(out), "UTF-16",
1510                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1511         ) {
1512             log_err("u->UTF-16 with substitute did not match.\n");
1513         }
1514     }
1515 
1516     log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1517     {
1518         static const UChar in[]={ 0x0041, 0xfeff };
1519 
1520         static const uint8_t out[]={
1521 #if U_IS_BIG_ENDIAN
1522             0x00, 0x00, 0xfe, 0xff,
1523             0x00, 0x00, 0x00, 0x41,
1524             0x00, 0x00, 0xfe, 0xff
1525 #else
1526             0xff, 0xfe, 0x00, 0x00,
1527             0x41, 0x00, 0x00, 0x00,
1528             0xff, 0xfe, 0x00, 0x00
1529 #endif
1530         };
1531         static const int32_t offsets[]={
1532             -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1533         };
1534 
1535         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1536                                    out, UPRV_LENGTHOF(out), "UTF-32",
1537                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1538         ) {
1539             log_err("u->UTF-32 with substitute did not match.\n");
1540         }
1541     }
1542 
1543     /*to unicode*/
1544 
1545 #if !UCONFIG_NO_LEGACY_CONVERSION
1546     if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1547              IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1548             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1549         log_err("ibm-949->u with substitute did not match.\n");
1550     if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1551              IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1552             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1553         log_err("ibm-943->u with substitute did not match.\n");
1554     if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1555              IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1556             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1557         log_err("ibm-930->u with substitute did not match.\n");
1558 
1559     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1560     {
1561 
1562         const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1563             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1564         };
1565         UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1566         };
1567         int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1568 
1569 
1570         /* EUC_JP*/
1571         const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1572             0x8f, 0xda, 0xa1,  /*unassigned*/
1573            0x8e, 0xe0, 0x8a
1574         };
1575         UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1576         int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1577 
1578         /*EUC_TW*/
1579         const uint8_t sampleTxt_euc_tw[]={
1580             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1581             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1582             0xe6, 0xca, 0x8a,
1583         };
1584         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1585         int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1586 
1587 
1588         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1589            EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1590           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1591             log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1592 
1593 
1594         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1595            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1596           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1597             log_err("euc-jp->u with substitute did not match.\n");
1598 
1599 
1600         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1601            euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1602           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1603             log_err("euc-tw->u with substitute  did not match.\n");
1604 
1605 
1606         if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1607            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1608           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1609             log_err("euc-jp->u with substitute did not match.\n");
1610     }
1611 #endif
1612 
1613     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1614     {
1615         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1616             0xe0, 0x80,  0x61,};
1617         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1618         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1619 
1620         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1621                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1622                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1623             log_err("utf8->u with substitute did not match.\n");
1624     }
1625     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1626     {
1627         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1628         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1629         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1630 
1631         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1632                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1633                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1634             log_err("scsu->u with stop did not match.\n");
1635     }
1636 
1637 #if !UCONFIG_NO_LEGACY_CONVERSION
1638     log_verbose("Testing ibm-930 subchar/subchar1\n");
1639     {
1640         static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1641         static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1642         static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1643 
1644         static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1645         static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1646         static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1647 
1648         if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1649                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1650         ) {
1651             log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1652         }
1653 
1654         if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1655                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1656         ) {
1657             log_err("ibm-930->u subchar/subchar1 did not match.\n");
1658         }
1659     }
1660 
1661     log_verbose("Testing GB 18030 with substitute callbacks\n");
1662     {
1663         static const UChar u2[]={
1664             0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1665         static const uint8_t gb2[]={
1666             0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1667         static const int32_t offsets2[]={
1668             0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1669 
1670         if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1671                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1672         ) {
1673             log_err("gb18030->u with substitute did not match.\n");
1674         }
1675     }
1676 #endif
1677 
1678     log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1679     {
1680         static const uint8_t utf7[]={
1681          /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1682             0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1683         };
1684         static const UChar unicode[]={
1685             0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1686         };
1687         static const int32_t offsets[]={
1688             0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1689         };
1690 
1691         if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1692                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1693         ) {
1694             log_err("UTF-7->u with substitute did not match.\n");
1695         }
1696     }
1697 
1698     log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1699     {
1700         static const uint8_t
1701             in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1702             in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1703             in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1704 
1705         static const UChar
1706             out1[]={ 0x4e00, 0xfeff },
1707             out2[]={ 0x004e, 0xfffe },
1708             out3[]={ 0xfefd, 0x4e00, 0xfeff };
1709 
1710         static const int32_t
1711             offsets1[]={ 2, 4 },
1712             offsets2[]={ 2, 4 },
1713             offsets3[]={ 0, 2, 4 };
1714 
1715         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1716                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1717         ) {
1718             log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1719         }
1720 
1721         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1722                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1723         ) {
1724             log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1725         }
1726 
1727         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1728                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1729         ) {
1730             log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1731         }
1732     }
1733 
1734     log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1735     {
1736         static const uint8_t
1737             in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1738             in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1739             in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1740             in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1741 
1742         static const UChar
1743             out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1744             out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1745             out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1746             out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1747 
1748         static const int32_t
1749             offsets1[]={ 4, 4, 8 },
1750             offsets2[]={ 4, 4, 8 },
1751             offsets3[]={ 0, 4, 4, 8, 12 },
1752             offsets4[]={ 0, 0, 4, 8 };
1753 
1754         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1755                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1756         ) {
1757             log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1758         }
1759 
1760         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1761                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1762         ) {
1763             log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1764         }
1765 
1766         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1767                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1768         ) {
1769             log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1770         }
1771 
1772         if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1773                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1774         ) {
1775             log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1776         }
1777     }
1778 }
1779 
TestSubWithValue(int32_t inputsize,int32_t outputsize)1780 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1781 {
1782     UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1783     UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1784 
1785     const uint8_t expsubwvalIBM_949[]= {
1786         0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1787         0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1788 
1789     const uint8_t expsubwvalIBM_943[]= {
1790         0x9f, 0xaf, 0x9f, 0xb1,
1791         0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1792 
1793     const uint8_t expsubwvalIBM_930[] = {
1794         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1795 
1796     int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1797     int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1798     int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1799 
1800     gInBufferSize = inputsize;
1801     gOutBufferSize = outputsize;
1802 
1803     /*from Unicode*/
1804 
1805 #if !UCONFIG_NO_LEGACY_CONVERSION
1806     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1807             expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1808             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1809         log_err("u-> ibm-949 with subst with value did not match.\n");
1810 
1811     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1812             expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1813             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1814         log_err("u-> ibm-943 with sub with value did not match.\n");
1815 
1816     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1817             expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1818             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1819         log_err("u-> ibm-930 with subst with value did not match.\n");
1820 
1821 
1822     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1823     {
1824         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1825         static const uint8_t toIBM943[]= { 0x61,
1826             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1827             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1828             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1829             0x61 };
1830         static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1831 
1832 
1833          /* EUC_JP*/
1834         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1835         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1836             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1837             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1838             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839             0x61, 0x8e, 0xe0,
1840         };
1841         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1842             3, 3, 3, 3, 3, 3,
1843             3, 3, 3, 3, 3, 3,
1844             5, 5, 5, 5, 5, 5,
1845             6, 7, 7,
1846         };
1847 
1848         /*EUC_TW*/
1849         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1850         static const uint8_t to_euc_tw[]={
1851             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1852             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1853             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1854             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855             0x61, 0xe6, 0xca, 0x8a,
1856         };
1857         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1858              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1859              6, 7, 7, 8,
1860         };
1861         /*ISO-2022-JP*/
1862         static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1863         static const uint8_t to_iso_2022_jp1[]={
1864             0x1b,   0x24,   0x42,   0x21, 0x21,
1865             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1866             0x1b,   0x24,   0x42,   0x21, 0x22,
1867             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1868             0x42,
1869         };
1870 
1871         static const int32_t from_iso_2022_jpOffs1 [] ={
1872             0,0,0,0,0,
1873             1,1,1,1,1,1,1,1,1,
1874             2,2,2,2,2,
1875             3,3,3,3,3,3,3,3,3,
1876             4,
1877         };
1878         /* surrogate pair*/
1879         static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1880         static const uint8_t to_iso_2022_jp2[]={
1881                                 0x1b,   0x24,   0x42,   0x21,   0x21,
1882                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1883                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1884                                 0x1b,   0x24,   0x42,   0x21,   0x22,
1885                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1886                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1887                                 0x42,
1888                                 };
1889         static const int32_t from_iso_2022_jpOffs2 [] ={
1890             0,0,0,0,0,
1891             1,1,1,1,1,1,1,1,1,
1892             1,1,1,1,1,1,
1893             3,3,3,3,3,
1894             4,4,4,4,4,4,4,4,4,
1895             4,4,4,4,4,4,
1896             6,
1897         };
1898 
1899         /*ISO-2022-cn*/
1900         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1901         static const uint8_t to_iso_2022_cn[]={
1902             0x41,
1903             0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1904             0x42,
1905         };
1906         static const int32_t from_iso_2022_cnOffs [] ={
1907             0,
1908             1,1,1,1,1,1,
1909             2,
1910         };
1911 
1912         static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1913 
1914         static const uint8_t to_iso_2022_cn4[]={
1915                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1916                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1917                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1918                              0x0e,   0x21,   0x22,
1919                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1920                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1921                              0x42,
1922                              };
1923         static const int32_t from_iso_2022_cnOffs4 [] ={
1924             0,0,0,0,0,0,0,
1925             1,1,1,1,1,1,1,
1926             1,1,1,1,1,1,
1927             3,3,3,
1928             4,4,4,4,4,4,4,
1929             4,4,4,4,4,4,
1930             6
1931 
1932         };
1933 
1934         /*ISO-2022-kr*/
1935         static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1936         static const uint8_t to_iso_2022_kr2[]={
1937             0x1b,   0x24,   0x29,   0x43,
1938             0x41,
1939             0x0e,   0x25,   0x50,
1940             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1941             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1942             0x0e,   0x25,   0x50,
1943             0x0f,   0x42,
1944             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1945             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1946             0x43
1947         };
1948         static const int32_t from_iso_2022_krOffs2 [] ={
1949             -1,-1,-1,-1,
1950              0,
1951             1,1,1,
1952             2,2,2,2,2,2,2,
1953             2,2,2,2,2,2,
1954             4,4,4,
1955             5,5,
1956             6,6,6,6,6,6,
1957             6,6,6,6,6,6,
1958             8,
1959         };
1960 
1961         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1962         static const uint8_t to_iso_2022_kr[]={
1963             0x1b,   0x24,   0x29,   0x43,
1964             0x41,
1965             0x0e,   0x25,   0x50,
1966             0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1967             0x0e,   0x25,   0x50,
1968             0x0f,   0x42,
1969             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1970             0x43
1971         };
1972 
1973 
1974         static const int32_t from_iso_2022_krOffs [] ={
1975             -1,-1,-1,-1,
1976              0,
1977             1,1,1,
1978             2,2,2,2,2,2,2,
1979             3,3,3,
1980             4,4,
1981             5,5,5,5,5,5,
1982             6,
1983         };
1984         /* HZ encoding */
1985         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1986 
1987         static const uint8_t to_hz[]={
1988             0x7e,   0x7d,   0x41,
1989             0x7e,   0x7b,   0x26,   0x30,
1990             0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1991             0x7e,   0x7b,   0x26,   0x30,
1992             0x7e,   0x7d,   0x42,
1993 
1994         };
1995         static const int32_t from_hzOffs [] ={
1996             0,0,0,
1997             1,1,1,1,
1998             2,2,2,2,2,2,2,2,
1999             3,3,3,3,
2000             4,4,4
2001         };
2002 
2003         static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2004         static const uint8_t to_hz2[]={
2005             0x7e,   0x7d,   0x41,
2006             0x7e,   0x7b,   0x26,   0x30,
2007             0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2008             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2009             0x7e,   0x7b,   0x26,   0x30,
2010             0x7e,   0x7d,   0x42,
2011             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2012             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2013             0x43
2014         };
2015         static const int32_t from_hzOffs2 [] ={
2016             0,0,0,
2017             1,1,1,1,
2018             2,2,2,2,2,2,2,2,
2019             2,2,2,2,2,2,
2020             4,4,4,4,
2021             5,5,5,
2022             6,6,6,6,6,6,
2023             6,6,6,6,6,6,
2024             8,
2025         };
2026 
2027                 /*ISCII*/
2028         static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2029         static const uint8_t to_iscii[]={
2030             0x41,
2031             0xef,   0x42,   0xa1,
2032             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2033             0xa2,
2034             0x42,
2035             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2036             0x43
2037         };
2038 
2039 
2040         static const int32_t from_isciiOffs [] ={
2041             0,
2042             1,1,1,
2043             2,2,2,2,2,2,
2044             3,
2045             4,
2046             5,5,5,5,5,5,
2047             6,
2048         };
2049 
2050         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2051                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2052                 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2053             log_err("u-> ibm-943 with subst with value did not match.\n");
2054 
2055         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2056                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2057                 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2058             log_err("u-> euc-jp with subst with value did not match.\n");
2059 
2060         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2061                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2062                 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2063             log_err("u-> euc-tw with subst with value did not match.\n");
2064 
2065         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2066                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2067                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2068             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2069 
2070         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2071                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2072                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2073             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2074 
2075         if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2076                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2077                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2078             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2079         /*ESCAPE OPTIONS*/
2080         {
2081             /* surrogate pair*/
2082             static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2083             static const uint8_t to_iso_2022_jp3_v2[]={
2084                     0x1b,   0x24,   0x42,   0x21,   0x21,
2085                     0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2086 
2087                     0x1b,   0x24,   0x42,   0x21,   0x22,
2088                     0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2089 
2090                     0x42,
2091                     0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2092                     };
2093 
2094             static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2095                 0,0,0,0,0,
2096                 1,1,1,1,1,1,1,1,1,1,1,1,
2097 
2098                 3,3,3,3,3,
2099                 4,4,4,4,4,4,4,4,4,4,4,4,
2100 
2101                 6,
2102                 7,7,7,7,7,7,7,7,7
2103             };
2104 
2105             if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2106                     to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2107                     UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2108                 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2109         }
2110         {
2111             static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2112             static const uint8_t to_iso_2022_cn5_v2[]={
2113                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2114                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2115                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2116                              0x0e,   0x21,   0x22,
2117                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2118                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2119                              0x42,
2120                              0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2121                              };
2122             static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2123                 0,0,0,0,0,0,0,
2124                 1,1,1,1,1,1,1,
2125                 1,1,1,1,1,1,
2126                 3,3,3,
2127                 4,4,4,4,4,4,4,
2128                 4,4,4,4,4,4,
2129                 6,
2130                 7,7,7,7,7,7
2131             };
2132             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2133                 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2134                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2135                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2136 
2137         }
2138         {
2139             static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2140             static const uint8_t to_iso_2022_cn6_v2[]={
2141                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2142                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2143                                 0x0e,   0x21,   0x22,
2144                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2145                                 0x42,
2146                                 0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2147                              };
2148             static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2149                     0,  0,  0,  0,  0,  0,  0,
2150                     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2151                     3,  3,  3,
2152                     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2153                     6,
2154                     7,  7,  7,  7,  7,  7,  7,  7,
2155             };
2156             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2157                 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2158                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2159                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2160 
2161         }
2162         {
2163             static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2164             static const uint8_t to_iso_2022_cn7_v2[]={
2165                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2166                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2167                                 0x0e,   0x21,   0x22,
2168                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2169                                 0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2170                             };
2171             static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2172                                 0,  0,  0,  0,  0,  0,  0,
2173                                 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2174                                 3,  3,  3,
2175                                 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2176                                 6,
2177                                 7,  7,  7,  7,  7,  7,
2178             };
2179             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2180                 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2181                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2182                 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2183 
2184         }
2185         {
2186             static const UChar iso_2022_cn_inputText8[]={
2187                                 0x3000,
2188                                 0xD84D, 0xDC56,
2189                                 0x3001,
2190                                 0xD84D, 0xDC56,
2191                                 0xDBFF, 0xDFFF,
2192                                 0x0042,
2193                                 0x0902};
2194             static const uint8_t to_iso_2022_cn8_v2[]={
2195                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2196                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2197                                 0x0e,   0x21,   0x22,
2198                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2199                                 0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2200                                 0x42,
2201                                 0x5c,   0x39,   0x30,   0x32,   0x20
2202                              };
2203             static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2204                     0,  0,  0,  0,  0,  0,  0,
2205                     1,  1,  1,  1,  1,  1,  1,  1,
2206                     3,  3,  3,
2207                     4,  4,  4,  4,  4,  4,  4,  4,
2208                     6,  6,  6,  6,  6,  6,  6,  6,
2209                     8,
2210                     9,  9,  9,  9,  9
2211             };
2212             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2213                 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2214                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2215                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2216 
2217         }
2218         {
2219             static const uint8_t to_iso_2022_cn4_v3[]={
2220                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2221                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2222                             0x0e,   0x21,   0x22,
2223                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2224                             0x42
2225                              };
2226 
2227 
2228             static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2229                 0,0,0,0,0,0,0,
2230                 1,1,1,1,1,1,1,1,1,1,1,
2231 
2232                 3,3,3,
2233                 4,4,4,4,4,4,4,4,4,4,4,
2234 
2235                 6
2236 
2237             };
2238             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2239                 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2240                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2241             {
2242                 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2243             }
2244         }
2245         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2246                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2247                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2248             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2249 
2250         if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2251                 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2252                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2253             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2254         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2255                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2256                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2257             log_err("u-> iso_2022_kr with subst with value did not match.\n");
2258         if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2259                 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2260                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2261             log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2262         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2263                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2264                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2265             log_err("u-> hz with subst with value did not match.\n");
2266         if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2267                 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2268                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2269             log_err("u-> hz with subst with value did not match.\n");
2270 
2271         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2272                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2273                 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2274             log_err("u-> iscii with subst with value did not match.\n");
2275     }
2276 #endif
2277 
2278     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2279     /*to Unicode*/
2280     {
2281 #if !UCONFIG_NO_LEGACY_CONVERSION
2282         static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2283             0x81, 0xad, /*unassigned*/
2284             0x89, 0xd3 };
2285         static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2286             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2287             0x7B87};
2288         static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2289 
2290         /* EUC_JP*/
2291         static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2292             0x8f, 0xda, 0xa1,  /*unassigned*/
2293            0x8e, 0xe0,
2294         };
2295         static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2296             0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2297             0x00a2 };
2298         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2299             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2300             9,
2301         };
2302 
2303         /*EUC_TW*/
2304         static const uint8_t sampleTxt_euc_tw[]={
2305             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2306             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2307             0xe6, 0xca, 0x8a,
2308         };
2309         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2310              0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2311              0x8706, 0x8a, };
2312         static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2313              7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2314              11, 13};
2315 
2316         /*iso-2022-jp*/
2317         static const uint8_t sampleTxt_iso_2022_jp[]={
2318             0x1b,   0x28,   0x42,   0x41,
2319             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2320             0x1b,   0x28,   0x42,   0x42,
2321 
2322         };
2323                                                    /*     A    %    X    3    A    %    X    1    A     B    */
2324         static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2325         static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2326 
2327         /*iso-2022-cn*/
2328         static const uint8_t sampleTxt_iso_2022_cn[]={
2329             0x0f,   0x41,   0x44,
2330             0x1B,   0x24,   0x29,   0x47,
2331             0x0E,   0x40,   0x6c, /*unassigned*/
2332             0x0f,   0x42,
2333 
2334         };
2335         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2336         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2337 
2338         /*iso-2022-kr*/
2339         static const uint8_t sampleTxt_iso_2022_kr[]={
2340           0x1b, 0x24, 0x29,  0x43,
2341           0x41,
2342           0x0E, 0x7f, 0x1E,
2343           0x0e, 0x25, 0x50,
2344           0x0f, 0x51,
2345           0x42, 0x43,
2346 
2347         };
2348         static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2349         static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2350 
2351         /*hz*/
2352         static const uint8_t sampleTxt_hz[]={
2353             0x41,
2354             0x7e,   0x7b,   0x26,   0x30,
2355             0x7f,   0x1E, /*unassigned*/
2356             0x26,   0x30,
2357             0x7e,   0x7d,   0x42,
2358             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2359             0x7e,   0x7d,   0x42,
2360         };
2361         static const UChar hztoUnicode[]={
2362             0x41,
2363             0x03a0,
2364             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2365             0x03A0,
2366             0x42,
2367             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2368             0x42,};
2369 
2370         static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2371 
2372 
2373         /*iscii*/
2374         static const uint8_t sampleTxt_iscii[]={
2375             0x41,
2376             0x30,
2377             0xEB, /*unassigned*/
2378             0xa3,
2379             0x42,
2380             0xEC, /*unassigned*/
2381             0x42,
2382         };
2383         static const UChar isciitoUnicode[]={
2384             0x41,
2385             0x30,
2386             0x25,  0x58,  0x45, 0x42,
2387             0x0903,
2388             0x42,
2389             0x25,  0x58,  0x45, 0x43,
2390             0x42,};
2391 
2392         static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2393 #endif
2394 
2395         /*UTF8*/
2396         static const uint8_t sampleTxtUTF8[]={
2397             0x20, 0x64, 0x50,
2398             0xC2, 0x7E, /* truncated char */
2399             0x20,
2400             0xE0, 0xB5, 0x7E, /* truncated char */
2401             0x40,
2402         };
2403         static const UChar UTF8ToUnicode[]={
2404             0x0020, 0x0064, 0x0050,
2405             0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2406             0x0020,
2407             0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2408             0x0040
2409         };
2410         static const int32_t fromUTF8[] = {
2411             0, 1, 2,
2412             3, 3, 3, 3, 4,
2413             5,
2414             6, 6, 6, 6, 6, 6, 6, 6, 8,
2415             9
2416         };
2417         static const UChar UTF8ToUnicodeXML_DEC[]={
2418             0x0020, 0x0064, 0x0050,
2419             0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2420             0x0020,
2421             0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2422             0x0040
2423         };
2424         static const int32_t fromUTF8XML_DEC[] = {
2425             0, 1, 2,
2426             3, 3, 3, 3, 3, 3, 4,
2427             5,
2428             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2429             9
2430         };
2431 
2432 
2433 #if !UCONFIG_NO_LEGACY_CONVERSION
2434         if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2435                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2436                 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2437             log_err("ibm-943->u with substitute with value did not match.\n");
2438 
2439         if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2440                  EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2441                 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2442             log_err("euc-jp->u with substitute with value did not match.\n");
2443 
2444         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2445                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2446                 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2447             log_err("euc-tw->u with substitute with value did not match.\n");
2448 
2449         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2450                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2451                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2452             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2453 
2454         if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2455                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2456                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2457             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2458 
2459         {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2460             {
2461                 static const UChar iso_2022_jptoUnicodeDec[]={
2462                                                   0x0041,
2463                                                   /*   &         #         5         8         ;   */
2464                                                   0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2465                                                   0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2466                                                   0x0042 };
2467                 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2468                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2469                      iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2470                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2471                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2472             }
2473             {
2474                 static const UChar iso_2022_jptoUnicodeHex[]={
2475                                                   0x0041,
2476                                                   /*   &       #       x       3       A       ;  */
2477                                                   0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2478                                                   0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2479                                                   0x0042 };
2480                 static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2481                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2482                      iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2483                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2484                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2485             }
2486             {
2487                 static const UChar iso_2022_jptoUnicodeC[]={
2488                                                 0x0041,
2489                                                 0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2490                                                 0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2491                                                 0x0042 };
2492                 int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2493                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2494                      iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2495                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2496                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2497             }
2498         }
2499         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2500                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2501                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2502             log_err("iso-2022-cn->u with substitute with value did not match.\n");
2503 
2504         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2505                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2506                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2507             log_err("iso-2022-kr->u with substitute with value did not match.\n");
2508 
2509          if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2510                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2511                 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2512             log_err("hz->u with substitute with value did not match.\n");
2513 
2514          if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2515                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2516                 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2517             log_err("ISCII ->u with substitute with value did not match.\n");
2518 #endif
2519 
2520         if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2521                 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2522                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2523             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2524         if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2525                 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2526                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2527             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2528     }
2529 }
2530 
2531 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2532 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2533 {
2534     static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2535     static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2536     static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2537 
2538 
2539     static const uint8_t text943[] = {
2540         0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2541     static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2542     static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2543     static const UChar toUnicode943stop[]= { 0x304b};
2544 
2545     static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2546     static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2547     static const int32_t  fromIBM943Offsstop[] = { 0};
2548 
2549     gInBufferSize = inputsize;
2550     gOutBufferSize = outputsize;
2551     /*checking with a legal value*/
2552     if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2553             templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2554             UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2555         log_err("u-> ibm-949 with skip did not match.\n");
2556 
2557     /*checking illegal value for ibm-943 with substitute*/
2558     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2559              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2560             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2561         log_err("ibm-943->u with subst did not match.\n");
2562     /*checking illegal value for ibm-943 with skip */
2563     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2564              toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2565             UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2566         log_err("ibm-943->u with skip did not match.\n");
2567 
2568     /*checking illegal value for ibm-943 with stop */
2569     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2570              toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2571             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2572         log_err("ibm-943->u with stop did not match.\n");
2573 
2574 }
2575 
TestSingleByte(int32_t inputsize,int32_t outputsize)2576 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2577 {
2578     static const uint8_t sampleText[] = {
2579         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2580         0xff, 0x32, 0x33};
2581     static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2582     static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2583     /*checking illegal value for ibm-943 with substitute*/
2584     gInBufferSize = inputsize;
2585     gOutBufferSize = outputsize;
2586 
2587     if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2588              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2589             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2590         log_err("ibm-943->u with subst did not match.\n");
2591 }
2592 
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2593 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2594 {
2595     /*EBCDIC_STATEFUL*/
2596     static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2597     static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2598     static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2599 /*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2600 
2601     /*EBCDIC_STATEFUL with subChar=3f*/
2602     static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2603     static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2604     static const char mySubChar[]={ 0x3f};
2605 
2606     gInBufferSize = inputsize;
2607     gOutBufferSize = outputsize;
2608 
2609     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2610         toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2611         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2612             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2613 
2614     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2615         toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2616         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2617             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2618 }
2619 #endif
2620 
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2621 UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2622                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2623                 const char *mySubChar, int8_t len)
2624 {
2625 
2626 
2627     UErrorCode status = U_ZERO_ERROR;
2628     UConverter *conv = 0;
2629     char junkout[NEW_MAX_BUFFER]; /* FIX */
2630     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2631     const UChar *src;
2632     char *end;
2633     char *targ;
2634     int32_t *offs;
2635     int i;
2636     int32_t  realBufferSize;
2637     char *realBufferEnd;
2638     const UChar *realSourceEnd;
2639     const UChar *sourceLimit;
2640     UBool checkOffsets = TRUE;
2641     UBool doFlush;
2642     char junk[9999];
2643     char offset_str[9999];
2644     char *p;
2645     UConverterFromUCallback oldAction = NULL;
2646     const void* oldContext = NULL;
2647 
2648 
2649     for(i=0;i<NEW_MAX_BUFFER;i++)
2650         junkout[i] = (char)0xF0;
2651     for(i=0;i<NEW_MAX_BUFFER;i++)
2652         junokout[i] = 0xFF;
2653     setNuConvTestName(codepage, "FROM");
2654 
2655     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2656             gOutBufferSize);
2657 
2658     conv = ucnv_open(codepage, &status);
2659     if(U_FAILURE(status))
2660     {
2661         log_data_err("Couldn't open converter %s\n",codepage);
2662         return TRUE;
2663     }
2664 
2665     log_verbose("Converter opened..\n");
2666 
2667     /*----setting the callback routine----*/
2668     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2669     if (U_FAILURE(status))
2670     {
2671         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2672     }
2673     /*------------------------*/
2674     /*setting the subChar*/
2675     if(mySubChar != NULL){
2676         ucnv_setSubstChars(conv, mySubChar, len, &status);
2677         if (U_FAILURE(status))  {
2678             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2679         }
2680     }
2681     /*------------*/
2682 
2683     src = source;
2684     targ = junkout;
2685     offs = junokout;
2686 
2687     realBufferSize = UPRV_LENGTHOF(junkout);
2688     realBufferEnd = junkout + realBufferSize;
2689     realSourceEnd = source + sourceLen;
2690 
2691     if ( gOutBufferSize != realBufferSize )
2692       checkOffsets = FALSE;
2693 
2694     if( gInBufferSize != NEW_MAX_BUFFER )
2695       checkOffsets = FALSE;
2696 
2697     do
2698     {
2699         end = nct_min(targ + gOutBufferSize, realBufferEnd);
2700         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2701 
2702         doFlush = (UBool)(sourceLimit == realSourceEnd);
2703 
2704         if(targ == realBufferEnd)
2705         {
2706             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2707             return FALSE;
2708         }
2709         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2710 
2711 
2712         status = U_ZERO_ERROR;
2713 
2714         ucnv_fromUnicode (conv,
2715                   (char **)&targ,
2716                   (const char *)end,
2717                   &src,
2718                   sourceLimit,
2719                   checkOffsets ? offs : NULL,
2720                   doFlush, /* flush if we're at the end of the input data */
2721                   &status);
2722     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2723 
2724 
2725     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2726         UChar errChars[50]; /* should be sufficient */
2727         int8_t errLen = 50;
2728         UErrorCode err = U_ZERO_ERROR;
2729         const UChar* start= NULL;
2730         ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2731         if(U_FAILURE(err)){
2732             log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2733         }
2734         /* length of in invalid chars should be equal to returned length*/
2735         start = src - errLen;
2736         if(u_strncmp(errChars,start,errLen)!=0){
2737             log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2738         }
2739     }
2740     /* allow failure codes for the stop callback */
2741     if(U_FAILURE(status) &&
2742        (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2743     {
2744         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2745         return FALSE;
2746     }
2747 
2748     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2749         sourceLen, targ-junkout);
2750     if(getTestOption(VERBOSITY_OPTION))
2751     {
2752 
2753         junk[0] = 0;
2754         offset_str[0] = 0;
2755         for(p = junkout;p<targ;p++)
2756         {
2757             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2758             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2759         }
2760 
2761         log_verbose(junk);
2762         printSeq(expect, expectLen);
2763         if ( checkOffsets )
2764         {
2765             log_verbose("\nOffsets:");
2766             log_verbose(offset_str);
2767         }
2768         log_verbose("\n");
2769     }
2770     ucnv_close(conv);
2771 
2772 
2773     if(expectLen != targ-junkout)
2774     {
2775         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2776         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2777         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2778         printSeqErr(expect, expectLen);
2779         return FALSE;
2780     }
2781 
2782     if (checkOffsets && (expectOffsets != 0) )
2783     {
2784         log_verbose("comparing %d offsets..\n", targ-junkout);
2785         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2786             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2787             log_err("Got Output : ");
2788             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2789             log_err("Got Offsets:      ");
2790             for(p=junkout;p<targ;p++)
2791                 log_err("%d,", junokout[p-junkout]);
2792             log_err("\n");
2793             log_err("Expected Offsets: ");
2794             for(i=0; i<(targ-junkout); i++)
2795                 log_err("%d,", expectOffsets[i]);
2796             log_err("\n");
2797             return FALSE;
2798         }
2799     }
2800 
2801     if(!memcmp(junkout, expect, expectLen))
2802     {
2803         log_verbose("String matches! %s\n", gNuConvTestName);
2804         return TRUE;
2805     }
2806     else
2807     {
2808         log_err("String does not match. %s\n", gNuConvTestName);
2809         log_err("source: ");
2810         printUSeqErr(source, sourceLen);
2811         log_err("Got:      ");
2812         printSeqErr((const uint8_t *)junkout, expectLen);
2813         log_err("Expected: ");
2814         printSeqErr(expect, expectLen);
2815         return FALSE;
2816     }
2817 }
2818 
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2819 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2820                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2821                const char *mySubChar, int8_t len)
2822 {
2823     UErrorCode status = U_ZERO_ERROR;
2824     UConverter *conv = 0;
2825     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2826     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2827     const char *src;
2828     const char *realSourceEnd;
2829     const char *srcLimit;
2830     UChar *targ;
2831     UChar *end;
2832     int32_t *offs;
2833     int i;
2834     UBool   checkOffsets = TRUE;
2835     char junk[9999];
2836     char offset_str[9999];
2837     UChar *p;
2838     UConverterToUCallback oldAction = NULL;
2839     const void* oldContext = NULL;
2840 
2841     int32_t   realBufferSize;
2842     UChar *realBufferEnd;
2843 
2844 
2845     for(i=0;i<NEW_MAX_BUFFER;i++)
2846         junkout[i] = 0xFFFE;
2847 
2848     for(i=0;i<NEW_MAX_BUFFER;i++)
2849         junokout[i] = -1;
2850 
2851     setNuConvTestName(codepage, "TO");
2852 
2853     log_verbose("\n=========  %s\n", gNuConvTestName);
2854 
2855     conv = ucnv_open(codepage, &status);
2856     if(U_FAILURE(status))
2857     {
2858         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2859         return TRUE;
2860     }
2861 
2862     log_verbose("Converter opened..\n");
2863 
2864     src = (const char *)source;
2865     targ = junkout;
2866     offs = junokout;
2867 
2868     realBufferSize = UPRV_LENGTHOF(junkout);
2869     realBufferEnd = junkout + realBufferSize;
2870     realSourceEnd = src + sourcelen;
2871     /*----setting the callback routine----*/
2872     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2873     if (U_FAILURE(status))
2874     {
2875         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2876     }
2877     /*-------------------------------------*/
2878     /*setting the subChar*/
2879     if(mySubChar != NULL){
2880         ucnv_setSubstChars(conv, mySubChar, len, &status);
2881         if (U_FAILURE(status))  {
2882             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2883         }
2884     }
2885     /*------------*/
2886 
2887 
2888     if ( gOutBufferSize != realBufferSize )
2889         checkOffsets = FALSE;
2890 
2891     if( gInBufferSize != NEW_MAX_BUFFER )
2892         checkOffsets = FALSE;
2893 
2894     do
2895     {
2896         end = nct_min( targ + gOutBufferSize, realBufferEnd);
2897         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2898 
2899         if(targ == realBufferEnd)
2900         {
2901             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2902             return FALSE;
2903         }
2904         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2905 
2906 
2907 
2908         status = U_ZERO_ERROR;
2909 
2910         ucnv_toUnicode (conv,
2911                 &targ,
2912                 end,
2913                 (const char **)&src,
2914                 (const char *)srcLimit,
2915                 checkOffsets ? offs : NULL,
2916                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2917                 &status);
2918     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2919 
2920     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2921         char errChars[50]; /* should be sufficient */
2922         int8_t errLen = 50;
2923         UErrorCode err = U_ZERO_ERROR;
2924         const char* start= NULL;
2925         ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2926         if(U_FAILURE(err)){
2927             log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2928         }
2929         /* length of in invalid chars should be equal to returned length*/
2930         start = src - errLen;
2931         if(uprv_strncmp(errChars,start,errLen)!=0){
2932             log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2933         }
2934     }
2935     /* allow failure codes for the stop callback */
2936     if(U_FAILURE(status) &&
2937        (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2938     {
2939         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2940         return FALSE;
2941     }
2942 
2943     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2944         sourcelen, targ-junkout);
2945     if(getTestOption(VERBOSITY_OPTION))
2946     {
2947 
2948         junk[0] = 0;
2949         offset_str[0] = 0;
2950 
2951         for(p = junkout;p<targ;p++)
2952         {
2953             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2954             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2955         }
2956 
2957         log_verbose(junk);
2958         printUSeq(expect, expectlen);
2959         if ( checkOffsets )
2960         {
2961             log_verbose("\nOffsets:");
2962             log_verbose(offset_str);
2963         }
2964         log_verbose("\n");
2965     }
2966     ucnv_close(conv);
2967 
2968     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2969 
2970     if (checkOffsets && (expectOffsets != 0))
2971     {
2972         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2973         {
2974             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2975             log_err("Got offsets:      ");
2976             for(p=junkout;p<targ;p++)
2977                 log_err("  %2d,", junokout[p-junkout]);
2978             log_err("\n");
2979             log_err("Expected offsets: ");
2980             for(i=0; i<(targ-junkout); i++)
2981                 log_err("  %2d,", expectOffsets[i]);
2982             log_err("\n");
2983             log_err("Got output:       ");
2984             for(i=0; i<(targ-junkout); i++)
2985                 log_err("0x%04x,", junkout[i]);
2986             log_err("\n");
2987             log_err("From source:      ");
2988             for(i=0; i<(src-(const char *)source); i++)
2989                 log_err("  0x%02x,", (unsigned char)source[i]);
2990             log_err("\n");
2991         }
2992     }
2993 
2994     if(!memcmp(junkout, expect, expectlen*2))
2995     {
2996         log_verbose("Matches!\n");
2997         return TRUE;
2998     }
2999     else
3000     {
3001         log_err("String does not match. %s\n", gNuConvTestName);
3002         log_verbose("String does not match. %s\n", gNuConvTestName);
3003         log_err("Got:      ");
3004         printUSeqErr(junkout, expectlen);
3005         log_err("Expected: ");
3006         printUSeqErr(expect, expectlen);
3007         log_err("\n");
3008         return FALSE;
3009     }
3010 }
3011 
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3012 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3013                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3014                 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3015 {
3016 
3017 
3018     UErrorCode status = U_ZERO_ERROR;
3019     UConverter *conv = 0;
3020     char junkout[NEW_MAX_BUFFER]; /* FIX */
3021     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3022     const UChar *src;
3023     char *end;
3024     char *targ;
3025     int32_t *offs;
3026     int i;
3027     int32_t  realBufferSize;
3028     char *realBufferEnd;
3029     const UChar *realSourceEnd;
3030     const UChar *sourceLimit;
3031     UBool checkOffsets = TRUE;
3032     UBool doFlush;
3033     char junk[9999];
3034     char offset_str[9999];
3035     char *p;
3036     UConverterFromUCallback oldAction = NULL;
3037     const void* oldContext = NULL;
3038 
3039 
3040     for(i=0;i<NEW_MAX_BUFFER;i++)
3041         junkout[i] = (char)0xF0;
3042     for(i=0;i<NEW_MAX_BUFFER;i++)
3043         junokout[i] = 0xFF;
3044     setNuConvTestName(codepage, "FROM");
3045 
3046     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3047             gOutBufferSize);
3048 
3049     conv = ucnv_open(codepage, &status);
3050     if(U_FAILURE(status))
3051     {
3052         log_data_err("Couldn't open converter %s\n",codepage);
3053         return TRUE; /* Because the err has already been logged. */
3054     }
3055 
3056     log_verbose("Converter opened..\n");
3057 
3058     /*----setting the callback routine----*/
3059     ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3060     if (U_FAILURE(status))
3061     {
3062         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3063     }
3064     /*------------------------*/
3065     /*setting the subChar*/
3066     if(mySubChar != NULL){
3067         ucnv_setSubstChars(conv, mySubChar, len, &status);
3068         if (U_FAILURE(status))  {
3069             log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3070         }
3071     }
3072     /*------------*/
3073 
3074     src = source;
3075     targ = junkout;
3076     offs = junokout;
3077 
3078     realBufferSize = UPRV_LENGTHOF(junkout);
3079     realBufferEnd = junkout + realBufferSize;
3080     realSourceEnd = source + sourceLen;
3081 
3082     if ( gOutBufferSize != realBufferSize )
3083       checkOffsets = FALSE;
3084 
3085     if( gInBufferSize != NEW_MAX_BUFFER )
3086       checkOffsets = FALSE;
3087 
3088     do
3089     {
3090         end = nct_min(targ + gOutBufferSize, realBufferEnd);
3091         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3092 
3093         doFlush = (UBool)(sourceLimit == realSourceEnd);
3094 
3095         if(targ == realBufferEnd)
3096         {
3097             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3098             return FALSE;
3099         }
3100         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3101 
3102 
3103         status = U_ZERO_ERROR;
3104 
3105         ucnv_fromUnicode (conv,
3106                   (char **)&targ,
3107                   (const char *)end,
3108                   &src,
3109                   sourceLimit,
3110                   checkOffsets ? offs : NULL,
3111                   doFlush, /* flush if we're at the end of the input data */
3112                   &status);
3113     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3114 
3115     /* allow failure codes for the stop callback */
3116     if(U_FAILURE(status) && status != expectedError)
3117     {
3118         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3119         return FALSE;
3120     }
3121 
3122     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3123         sourceLen, targ-junkout);
3124     if(getTestOption(VERBOSITY_OPTION))
3125     {
3126 
3127         junk[0] = 0;
3128         offset_str[0] = 0;
3129         for(p = junkout;p<targ;p++)
3130         {
3131             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3132             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3133         }
3134 
3135         log_verbose(junk);
3136         printSeq(expect, expectLen);
3137         if ( checkOffsets )
3138         {
3139             log_verbose("\nOffsets:");
3140             log_verbose(offset_str);
3141         }
3142         log_verbose("\n");
3143     }
3144     ucnv_close(conv);
3145 
3146 
3147     if(expectLen != targ-junkout)
3148     {
3149         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3150         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3151         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3152         printSeqErr(expect, expectLen);
3153         return FALSE;
3154     }
3155 
3156     if (checkOffsets && (expectOffsets != 0) )
3157     {
3158         log_verbose("comparing %d offsets..\n", targ-junkout);
3159         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3160             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3161             log_err("Got Output : ");
3162             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3163             log_err("Got Offsets:      ");
3164             for(p=junkout;p<targ;p++)
3165                 log_err("%d,", junokout[p-junkout]);
3166             log_err("\n");
3167             log_err("Expected Offsets: ");
3168             for(i=0; i<(targ-junkout); i++)
3169                 log_err("%d,", expectOffsets[i]);
3170             log_err("\n");
3171             return FALSE;
3172         }
3173     }
3174 
3175     if(!memcmp(junkout, expect, expectLen))
3176     {
3177         log_verbose("String matches! %s\n", gNuConvTestName);
3178         return TRUE;
3179     }
3180     else
3181     {
3182         log_err("String does not match. %s\n", gNuConvTestName);
3183         log_err("source: ");
3184         printUSeqErr(source, sourceLen);
3185         log_err("Got:      ");
3186         printSeqErr((const uint8_t *)junkout, expectLen);
3187         log_err("Expected: ");
3188         printSeqErr(expect, expectLen);
3189         return FALSE;
3190     }
3191 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3192 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3193                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3194                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3195 {
3196     UErrorCode status = U_ZERO_ERROR;
3197     UConverter *conv = 0;
3198     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3199     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3200     const char *src;
3201     const char *realSourceEnd;
3202     const char *srcLimit;
3203     UChar *targ;
3204     UChar *end;
3205     int32_t *offs;
3206     int i;
3207     UBool   checkOffsets = TRUE;
3208     char junk[9999];
3209     char offset_str[9999];
3210     UChar *p;
3211     UConverterToUCallback oldAction = NULL;
3212     const void* oldContext = NULL;
3213 
3214     int32_t   realBufferSize;
3215     UChar *realBufferEnd;
3216 
3217 
3218     for(i=0;i<NEW_MAX_BUFFER;i++)
3219         junkout[i] = 0xFFFE;
3220 
3221     for(i=0;i<NEW_MAX_BUFFER;i++)
3222         junokout[i] = -1;
3223 
3224     setNuConvTestName(codepage, "TO");
3225 
3226     log_verbose("\n=========  %s\n", gNuConvTestName);
3227 
3228     conv = ucnv_open(codepage, &status);
3229     if(U_FAILURE(status))
3230     {
3231         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3232         return TRUE;
3233     }
3234 
3235     log_verbose("Converter opened..\n");
3236 
3237     src = (const char *)source;
3238     targ = junkout;
3239     offs = junokout;
3240 
3241     realBufferSize = UPRV_LENGTHOF(junkout);
3242     realBufferEnd = junkout + realBufferSize;
3243     realSourceEnd = src + sourcelen;
3244     /*----setting the callback routine----*/
3245     ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3246     if (U_FAILURE(status))
3247     {
3248         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3249     }
3250     /*-------------------------------------*/
3251     /*setting the subChar*/
3252     if(mySubChar != NULL){
3253         ucnv_setSubstChars(conv, mySubChar, len, &status);
3254         if (U_FAILURE(status))  {
3255             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3256         }
3257     }
3258     /*------------*/
3259 
3260 
3261     if ( gOutBufferSize != realBufferSize )
3262         checkOffsets = FALSE;
3263 
3264     if( gInBufferSize != NEW_MAX_BUFFER )
3265         checkOffsets = FALSE;
3266 
3267     do
3268     {
3269         end = nct_min( targ + gOutBufferSize, realBufferEnd);
3270         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3271 
3272         if(targ == realBufferEnd)
3273         {
3274             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3275             return FALSE;
3276         }
3277         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3278 
3279 
3280 
3281         status = U_ZERO_ERROR;
3282 
3283         ucnv_toUnicode (conv,
3284                 &targ,
3285                 end,
3286                 (const char **)&src,
3287                 (const char *)srcLimit,
3288                 checkOffsets ? offs : NULL,
3289                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3290                 &status);
3291     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3292 
3293     /* allow failure codes for the stop callback */
3294     if(U_FAILURE(status) && status!=expectedError)
3295     {
3296         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3297         return FALSE;
3298     }
3299 
3300     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3301         sourcelen, targ-junkout);
3302     if(getTestOption(VERBOSITY_OPTION))
3303     {
3304 
3305         junk[0] = 0;
3306         offset_str[0] = 0;
3307 
3308         for(p = junkout;p<targ;p++)
3309         {
3310             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3311             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3312         }
3313 
3314         log_verbose(junk);
3315         printUSeq(expect, expectlen);
3316         if ( checkOffsets )
3317         {
3318             log_verbose("\nOffsets:");
3319             log_verbose(offset_str);
3320         }
3321         log_verbose("\n");
3322     }
3323     ucnv_close(conv);
3324 
3325     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3326 
3327     if (checkOffsets && (expectOffsets != 0))
3328     {
3329         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3330         {
3331             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3332             log_err("Got offsets:      ");
3333             for(p=junkout;p<targ;p++)
3334                 log_err("  %2d,", junokout[p-junkout]);
3335             log_err("\n");
3336             log_err("Expected offsets: ");
3337             for(i=0; i<(targ-junkout); i++)
3338                 log_err("  %2d,", expectOffsets[i]);
3339             log_err("\n");
3340             log_err("Got output:       ");
3341             for(i=0; i<(targ-junkout); i++)
3342                 log_err("0x%04x,", junkout[i]);
3343             log_err("\n");
3344             log_err("From source:      ");
3345             for(i=0; i<(src-(const char *)source); i++)
3346                 log_err("  0x%02x,", (unsigned char)source[i]);
3347             log_err("\n");
3348         }
3349     }
3350 
3351     if(!memcmp(junkout, expect, expectlen*2))
3352     {
3353         log_verbose("Matches!\n");
3354         return TRUE;
3355     }
3356     else
3357     {
3358         log_err("String does not match. %s\n", gNuConvTestName);
3359         log_verbose("String does not match. %s\n", gNuConvTestName);
3360         log_err("Got:      ");
3361         printUSeqErr(junkout, expectlen);
3362         log_err("Expected: ");
3363         printUSeqErr(expect, expectlen);
3364         log_err("\n");
3365         return FALSE;
3366     }
3367 }
3368 
TestCallBackFailure(void)3369 static void TestCallBackFailure(void) {
3370     UErrorCode status = U_USELESS_COLLATOR_ERROR;
3371     ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3372     if (status != U_USELESS_COLLATOR_ERROR) {
3373         log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3374     }
3375     ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3376     if (status != U_USELESS_COLLATOR_ERROR) {
3377         log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3378     }
3379     ucnv_cbFromUWriteSub(NULL, -1, &status);
3380     if (status != U_USELESS_COLLATOR_ERROR) {
3381         log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3382     }
3383     ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3384     if (status != U_USELESS_COLLATOR_ERROR) {
3385         log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3386     }
3387 }
3388