• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 *        Name                            Description
14 *    Madhu Katragadda     7/21/1999      Testing error callback routines
15 ********************************************************************************
16 */
17 #include <ctype.h>
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "cintltst.h"
28 #include "unicode/utypes.h"
29 #include "unicode/ustring.h"
30 #include "nccbtst.h"
31 #include "unicode/ucnv_cb.h"
32 #include "unicode/utf16.h"
33 
34 #define NEW_MAX_BUFFER 999
35 
36 #define nct_min(x,y)  ((x<y) ? x : y)
37 
38 static int32_t  gInBufferSize = 0;
39 static int32_t  gOutBufferSize = 0;
40 static char     gNuConvTestName[1024];
41 
printSeq(const uint8_t * a,int len)42 static void printSeq(const uint8_t* a, int len)
43 {
44     int i=0;
45     log_verbose("\n{");
46     while (i<len)
47         log_verbose("0x%02X, ", a[i++]);
48     log_verbose("}\n");
49 }
50 
printUSeq(const UChar * a,int len)51 static void printUSeq(const UChar* a, int len)
52 {
53     int i=0;
54     log_verbose("{");
55     while (i<len)
56         log_verbose("  0x%04x, ", a[i++]);
57     log_verbose("}\n");
58 }
59 
printSeqErr(const uint8_t * a,int len)60 static void printSeqErr(const uint8_t* a, int len)
61 {
62     int i=0;
63     fprintf(stderr, "{");
64     while (i<len)
65         fprintf(stderr, "  0x%02x, ", a[i++]);
66     fprintf(stderr, "}\n");
67 }
68 
printUSeqErr(const UChar * a,int len)69 static void printUSeqErr(const UChar* a, int len)
70 {
71     int i=0;
72     fprintf(stderr, "{");
73     while (i<len)
74         fprintf(stderr, "0x%04x, ", a[i++]);
75     fprintf(stderr,"}\n");
76 }
77 
setNuConvTestName(const char * codepage,const char * direction)78 static void setNuConvTestName(const char *codepage, const char *direction)
79 {
80     sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
81             codepage,
82             direction,
83             (int)gInBufferSize,
84             (int)gOutBufferSize);
85 }
86 
87 
88 static void TestCallBackFailure(void);
89 
90 void addTestConvertErrorCallBack(TestNode** root);
91 
addTestConvertErrorCallBack(TestNode ** root)92 void addTestConvertErrorCallBack(TestNode** root)
93 {
94     addTest(root, &TestSkipCallBack,  "tsconv/nccbtst/TestSkipCallBack");
95     addTest(root, &TestStopCallBack,  "tsconv/nccbtst/TestStopCallBack");
96     addTest(root, &TestSubCallBack,   "tsconv/nccbtst/TestSubCallBack");
97     addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
98 
99 #if !UCONFIG_NO_LEGACY_CONVERSION
100     addTest(root, &TestLegalAndOtherCallBack,  "tsconv/nccbtst/TestLegalAndOtherCallBack");
101     addTest(root, &TestSingleByteCallBack,  "tsconv/nccbtst/TestSingleByteCallBack");
102 #endif
103 
104     addTest(root, &TestCallBackFailure,  "tsconv/nccbtst/TestCallBackFailure");
105 }
106 
TestSkipCallBack()107 static void TestSkipCallBack()
108 {
109     TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
110     TestSkip(1,NEW_MAX_BUFFER);
111     TestSkip(1,1);
112     TestSkip(NEW_MAX_BUFFER, 1);
113 }
114 
TestStopCallBack()115 static void TestStopCallBack()
116 {
117     TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
118     TestStop(1,NEW_MAX_BUFFER);
119     TestStop(1,1);
120     TestStop(NEW_MAX_BUFFER, 1);
121 }
122 
TestSubCallBack()123 static void TestSubCallBack()
124 {
125     TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
126     TestSub(1,NEW_MAX_BUFFER);
127     TestSub(1,1);
128     TestSub(NEW_MAX_BUFFER, 1);
129 
130 #if !UCONFIG_NO_LEGACY_CONVERSION
131     TestEBCDIC_STATEFUL_Sub(1, 1);
132     TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
133     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
134     TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
135 #endif
136 }
137 
TestSubWithValueCallBack()138 static void TestSubWithValueCallBack()
139 {
140     TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
141     TestSubWithValue(1,NEW_MAX_BUFFER);
142     TestSubWithValue(1,1);
143     TestSubWithValue(NEW_MAX_BUFFER, 1);
144 }
145 
146 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()147 static void TestLegalAndOtherCallBack()
148 {
149     TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
150     TestLegalAndOthers(1,NEW_MAX_BUFFER);
151     TestLegalAndOthers(1,1);
152     TestLegalAndOthers(NEW_MAX_BUFFER, 1);
153 }
154 
TestSingleByteCallBack()155 static void TestSingleByteCallBack()
156 {
157     TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
158     TestSingleByte(1,NEW_MAX_BUFFER);
159     TestSingleByte(1,1);
160     TestSingleByte(NEW_MAX_BUFFER, 1);
161 }
162 #endif
163 
TestSkip(int32_t inputsize,int32_t outputsize)164 static void TestSkip(int32_t inputsize, int32_t outputsize)
165 {
166     static const uint8_t expskipIBM_949[]= {
167         0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
168 
169     static const uint8_t expskipIBM_943[] = {
170         0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
171 
172     static const uint8_t expskipIBM_930[] = {
173         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
174 
175     gInBufferSize = inputsize;
176     gOutBufferSize = outputsize;
177 
178     /*From Unicode*/
179     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP  \n");
180 
181 #if !UCONFIG_NO_LEGACY_CONVERSION
182     {
183         static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
184         static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
185 
186         static const int32_t  toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
187         static const int32_t  toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
188 
189         if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
190                 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
191                 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
192             log_err("u-> ibm-949 with skip did not match.\n");
193         if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
194                 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
195                 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
196             log_err("u-> ibm-943 with skip did not match.\n");
197     }
198 
199     {
200         static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
201         static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
202         static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
203 
204         /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
205         if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
206                                    fromUBytes, UPRV_LENGTHOF(fromUBytes),
207                                    "ibm-930",
208                                    UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
209                                    NULL, 0)
210         ) {
211             log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
212         }
213     }
214 #endif
215 
216     {
217         static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
218         static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
219         static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
220 
221         static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
222         static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
223         static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
224 
225         /* US-ASCII */
226         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
227                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
228                                    "US-ASCII",
229                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
230                                    NULL, 0)
231         ) {
232             log_err("u->US-ASCII with skip did not match.\n");
233         }
234 
235 #if !UCONFIG_NO_LEGACY_CONVERSION
236         /* SBCS NLTC codepage 367 for US-ASCII */
237         if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
238                                    usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
239                                    "ibm-367",
240                                    UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
241                                    NULL, 0)
242         ) {
243             log_err("u->ibm-367 with skip did not match.\n");
244         }
245 #endif
246 
247         /* ISO-Latin-1 */
248         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
249                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
250                                    "LATIN_1",
251                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
252                                    NULL, 0)
253         ) {
254             log_err("u->LATIN_1 with skip did not match.\n");
255         }
256 
257 #if !UCONFIG_NO_LEGACY_CONVERSION
258         /* windows-1252 */
259         if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
260                                    latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
261                                    "windows-1252",
262                                    UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
263                                    NULL, 0)
264         ) {
265             log_err("u->windows-1252 with skip did not match.\n");
266         }
267     }
268 
269     {
270         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
271         static const uint8_t toIBM943[]= { 0x61, 0x61 };
272         static const int32_t offset[]= {0, 4};
273 
274          /* EUC_JP*/
275         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
276         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
277             0x61, 0x8e, 0xe0,
278         };
279         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
280 
281         /*EUC_TW*/
282         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
283         static const uint8_t to_euc_tw[]={
284             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
285             0x61, 0xe6, 0xca, 0x8a,
286         };
287         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
288 
289         /*ISO-2022-JP*/
290         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
291         static const uint8_t to_iso_2022_jp[]={
292             0x41,
293             0x42,
294 
295         };
296         static const int32_t from_iso_2022_jpOffs [] ={0,2};
297 
298         /*ISO-2022-JP*/
299         UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
300         static const uint8_t to_iso_2022_jp2[]={
301             0x41,
302             0x43,
303 
304         };
305         static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
306 
307         /*ISO-2022-cn*/
308         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
309         static const uint8_t to_iso_2022_cn[]={
310             0x41, 0x42
311         };
312         static const int32_t from_iso_2022_cnOffs [] ={
313             0, 2
314         };
315 
316         /*ISO-2022-CN*/
317         static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
318         static const uint8_t to_iso_2022_cn1[]={
319             0x41, 0x43
320 
321         };
322         static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
323 
324         /*ISO-2022-kr*/
325         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
326         static const uint8_t to_iso_2022_kr[]={
327             0x1b,   0x24,   0x29,   0x43,
328             0x41,
329             0x0e,   0x25,   0x50,
330             0x25,   0x50,
331             0x0f,   0x42,
332         };
333         static const int32_t from_iso_2022_krOffs [] ={
334             -1,-1,-1,-1,
335             0,
336             1,1,1,
337             3,3,
338             4,4
339         };
340 
341         /*ISO-2022-kr*/
342         static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
343         static const uint8_t to_iso_2022_kr1[]={
344             0x1b,   0x24,   0x29,   0x43,
345             0x41,
346             0x0e,   0x25,   0x50,
347             0x25,   0x50,
348 
349         };
350         static const int32_t from_iso_2022_krOffs1 [] ={
351             -1,-1,-1,-1,
352             0,
353             1,1,1,
354             3,3,
355 
356         };
357         /* HZ encoding */
358         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
359 
360         static const uint8_t to_hz[]={
361             0x7e,   0x7d,   0x41,
362             0x7e,   0x7b,   0x26,   0x30,
363             0x26,   0x30,
364             0x7e,   0x7d,   0x42,
365 
366         };
367         static const int32_t from_hzOffs [] ={
368             0,0,0,
369             1,1,1,1,
370             3,3,
371             4,4,4,4
372         };
373 
374         static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
375 
376         static const uint8_t to_hz1[]={
377             0x7e,   0x7d,   0x41,
378             0x7e,   0x7b,   0x26,   0x30,
379             0x26,   0x30,
380 
381 
382         };
383         static const int32_t from_hzOffs1 [] ={
384             0,0,0,
385             1,1,1,1,
386             3,3,
387 
388         };
389 
390 #endif
391 
392         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
393 
394         static const uint8_t to_SCSU[]={
395             0x41,
396             0x42
397 
398 
399         };
400         static const int32_t from_SCSUOffs [] ={
401             0,
402             2,
403 
404         };
405 
406 #if !UCONFIG_NO_LEGACY_CONVERSION
407         /* ISCII */
408         static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
409         static const uint8_t to_iscii[]={
410             0x41,
411             0x42,
412         };
413         static const int32_t from_isciiOffs [] ={
414             0,2,
415 
416         };
417         /*ISCII*/
418         static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
419         static const uint8_t to_iscii1[]={
420             0x44,
421             0x43,
422 
423         };
424         static const int32_t from_isciiOffs1 [] ={0,2};
425 
426         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
427                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
428                 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
429             log_err("u-> ibm-943 with skip did not match.\n");
430 
431         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
432                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
433                 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
434             log_err("u-> euc-jp with skip did not match.\n");
435 
436         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
437                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
438                 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
439             log_err("u-> euc-tw with skip did not match.\n");
440 
441         /*iso_2022_jp*/
442         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
443                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
444                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
445             log_err("u-> iso-2022-jp with skip did not match.\n");
446 
447         /* with context */
448         if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
449                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
450                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
451             log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
452 
453         /*iso_2022_cn*/
454         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
455                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
456                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
457             log_err("u-> iso-2022-cn with skip did not match.\n");
458         /*with context*/
459         if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
460                 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
461                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
462             log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
463 
464         /*iso_2022_kr*/
465         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
466                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
467                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
468             log_err("u-> iso-2022-kr with skip did not match.\n");
469           /*with context*/
470         if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
471                 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
472                 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
473             log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
474 
475         /*hz*/
476         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
477                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
478                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
479             log_err("u-> HZ with skip did not match.\n");
480           /*with context*/
481         if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
482                 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
483                 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
484             log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
485 #endif
486 
487         /*SCSU*/
488         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
489                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
490                 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
491             log_err("u-> SCSU with skip did not match.\n");
492 
493 #if !UCONFIG_NO_LEGACY_CONVERSION
494         /*ISCII*/
495         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
496                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
497                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
498             log_err("u-> iscii with skip did not match.\n");
499         /*with context*/
500         if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
501                 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
502                 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
503             log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
504 #endif
505     }
506 
507     log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
508     {
509         static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
510             0xFB, 0xEE, 0x28,       /* from source offset 0 */
511             0x24, 0x1E, 0x52,
512             0xB2,
513             0x20,
514             0xB3,
515             0xB1,
516             0x0D,
517             0x0A,
518 
519             0x20,                   /* from 8 */
520             0x00,
521             0xD0, 0x6C,
522             0xB6,
523             0xD8, 0xA5,
524             0x20,
525             0x68,
526             0x59,
527 
528             0xF9, 0x28,             /* from 16 */
529             0x6D,
530             0x20,
531             0x73,
532             0xE0, 0x2D,
533             0xDE, 0x43,
534             0xD0, 0x33,
535             0x20,
536 
537             0xFA, 0x83,             /* from 24 */
538             0x25, 0x01,
539             0xFB, 0x16, 0x87,
540             0x4B, 0x16,
541             0x20,
542             0xE6, 0xBD,
543             0xEB, 0x5B,
544             0x4B, 0xCC,
545 
546             0xF9, 0xA2,             /* from 32 */
547             0xFC, 0x10, 0x3E,
548             0xFE, 0x16, 0x3A, 0x8C,
549             0x20,
550             0xFC, 0x03, 0xAC,
551 
552             0x01,                   /* from 41 */
553             0xDE, 0x83,
554             0x20,
555             0x09
556         };
557         static const UChar expected[]={
558             0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
559             0x0063, 0x0061, 0x000D, 0x000A,
560 
561             0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
562             0x0930, 0x0020, 0x0918, 0x0909,
563 
564             0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
565             0x4000, 0x4E00, 0x7777, 0x0020,
566 
567             0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
568             0x0020, 0xD7A3, 0xDC00, 0xD800,
569 
570             0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
571             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
572 
573             0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
574             0x0009
575         };
576         static const int32_t offsets[]={
577             0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
578             8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
579             16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
580             24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
581             32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
582             41, 42, 42, 43, 44
583         };
584 
585         /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
586         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
587                                  sampleText, UPRV_LENGTHOF(sampleText),
588                                  "BOCU-1",
589                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
590         ) {
591             log_err("u->BOCU-1 with skip did not match.\n");
592         }
593     }
594 
595     log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
596     {
597         const uint8_t sampleText[]={
598             0x61,                               /* 'a' */
599             0xc4, 0xb5,                         /* U+0135 */
600             0xed, 0x80, 0xa0,                   /* Hangul U+d020 */
601             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
602             0xee, 0x80, 0x80,                   /* PUA U+e000 */
603             0xed, 0xb0, 0x81,                   /* unpaired trail surrogate U+dc01 */
604             0x62,                               /* 'b' */
605             0xed, 0xa0, 0x81,                   /* unpaired lead surrogate U+d801 */
606             0xd0, 0x80                          /* U+0400 */
607         };
608         UChar expected[]={
609             0x0061,
610             0x0135,
611             0xd020,
612             0xd801, 0xdc01,
613             0xe000,
614             0xdc01,
615             0x0062,
616             0xd801,
617             0x0400
618         };
619         int32_t offsets[]={
620             0,
621             1, 1,
622             2, 2, 2,
623             3, 3, 3, 4, 4, 4,
624             5, 5, 5,
625             6, 6, 6,
626             7,
627             8, 8, 8,
628             9, 9
629         };
630 
631         /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
632 
633         /* without offsets */
634         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
635                                  sampleText, UPRV_LENGTHOF(sampleText),
636                                  "CESU-8",
637                                  UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
638         ) {
639             log_err("u->CESU-8 with skip did not match.\n");
640         }
641 
642         /* with offsets */
643         if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
644                                  sampleText, UPRV_LENGTHOF(sampleText),
645                                  "CESU-8",
646                                  UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
647         ) {
648             log_err("u->CESU-8 with skip did not match.\n");
649         }
650     }
651 
652     /*to Unicode*/
653     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP  \n");
654 
655 #if !UCONFIG_NO_LEGACY_CONVERSION
656     {
657 
658         static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
659         static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
660         static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
661 
662         static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5};
663         static const int32_t  fromIBM943Offs [] = { 0, 2, 4};
664         static const int32_t  fromIBM930Offs [] = { 1, 3, 5};
665 
666         if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
667                  IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
668                 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
669             log_err("ibm-949->u with skip did not match.\n");
670         if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
671                  IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
672                 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
673             log_err("ibm-943->u with skip did not match.\n");
674 
675 
676         if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
677                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
678                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
679             log_err("ibm-930->u with skip did not match.\n");
680 
681 
682         if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
683                  IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
684                 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
685             log_err("ibm-930->u with skip did not match.\n");
686     }
687 #endif
688 
689     {
690         static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
691         static const UChar usasciiToU[] = { 0x61, 0x31 };
692         static const int32_t usasciiToUOffsets[] = { 0, 2 };
693 
694         static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
695         static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
696         static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
697 
698         /* US-ASCII */
699         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
700                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
701                                  "US-ASCII",
702                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
703                                  NULL, 0)
704         ) {
705             log_err("US-ASCII->u with skip did not match.\n");
706         }
707 
708 #if !UCONFIG_NO_LEGACY_CONVERSION
709         /* SBCS NLTC codepage 367 for US-ASCII */
710         if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
711                                  usasciiToU, UPRV_LENGTHOF(usasciiToU),
712                                  "ibm-367",
713                                  UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
714                                  NULL, 0)
715         ) {
716             log_err("ibm-367->u with skip did not match.\n");
717         }
718 #endif
719 
720         /* ISO-Latin-1 */
721         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
722                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
723                                  "LATIN_1",
724                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
725                                  NULL, 0)
726         ) {
727             log_err("LATIN_1->u with skip did not match.\n");
728         }
729 
730 #if !UCONFIG_NO_LEGACY_CONVERSION
731         /* windows-1252 */
732         if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
733                                  latin1ToU, UPRV_LENGTHOF(latin1ToU),
734                                  "windows-1252",
735                                  UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
736                                  NULL, 0)
737         ) {
738             log_err("windows-1252->u with skip did not match.\n");
739         }
740 #endif
741     }
742 
743 #if !UCONFIG_NO_LEGACY_CONVERSION
744     {
745         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
746             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
747         };
748         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0x03b4
749         };
750         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
751 
752 
753          /* euc-jp*/
754         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
755             0x8f, 0xda, 0xa1,  /*unassigned*/
756            0x8e, 0xe0,
757         };
758         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
759         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
760 
761          /*EUC_TW*/
762         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
763             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
764            0xe6, 0xca, 0x8a,
765         };
766         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
767         static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
768                 /*iso-2022-jp*/
769         static const uint8_t sampleTxt_iso_2022_jp[]={
770             0x41,
771             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
772             0x1b,   0x28,   0x42,   0x42,
773 
774         };
775         static const UChar iso_2022_jptoUnicode[]={    0x41,0x42 };
776         static const int32_t from_iso_2022_jpOffs [] ={  0,9   };
777 
778         /*iso-2022-cn*/
779         static const uint8_t sampleTxt_iso_2022_cn[]={
780             0x0f,   0x41,   0x44,
781             0x1B,   0x24,   0x29,   0x47,
782             0x0E,   0x40,   0x6f, /*unassigned*/
783             0x0f,   0x42,
784 
785         };
786 
787         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x42 };
788         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   11   };
789 
790         /*iso-2022-kr*/
791         static const uint8_t sampleTxt_iso_2022_kr[]={
792           0x1b, 0x24, 0x29,  0x43,
793           0x41,
794           0x0E, 0x7f, 0x1E,
795           0x0e, 0x25, 0x50,
796           0x0f, 0x51,
797           0x42, 0x43,
798 
799         };
800         static const UChar iso_2022_krtoUnicode[]={     0x41,0x03A0,0x51, 0x42,0x43};
801         static const int32_t from_iso_2022_krOffs [] ={  4,    9,    12,   13  , 14 };
802 
803         /*hz*/
804         static const uint8_t sampleTxt_hz[]={
805             0x41,
806             0x7e,   0x7b,   0x26,   0x30,
807             0x7f,   0x1E, /*unassigned*/
808             0x26,   0x30,
809             0x7e,   0x7d,   0x42,
810             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
811             0x7e,   0x7d,   0x42,
812         };
813         static const UChar hztoUnicode[]={
814             0x41,
815             0x03a0,
816             0x03A0,
817             0x42,
818             0x42,};
819 
820         static const int32_t from_hzOffs [] ={0,3,7,11,18,  };
821 
822         /*ISCII*/
823         static const uint8_t sampleTxt_iscii[]={
824             0x41,
825             0xa1,
826             0xEB,    /*unassigned*/
827             0x26,
828             0x30,
829             0xa2,
830             0xEC,    /*unassigned*/
831             0x42,
832         };
833         static const UChar isciitoUnicode[]={
834             0x41,
835             0x0901,
836             0x26,
837             0x30,
838             0x0902,
839             0x42,
840             };
841 
842         static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
843 
844         /*LMBCS*/
845         static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
846             0x12, 0x92, 0xa0, /*unassigned*/
847             0x12, 0x92, 0xA1,
848         };
849         static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
850         static const int32_t fromLMBCS[] = {0, 6};
851 
852         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
853              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
854             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
855         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
856 
857         if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
858              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
859             UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
860         log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
861 
862         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
863                  euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
864                 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
865             log_err("euc-jp->u with skip did not match.\n");
866 
867 
868 
869         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
870                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
871                 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
872             log_err("euc-tw->u with skip did not match.\n");
873 
874 
875         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
876                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
877                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
878             log_err("iso-2022-jp->u with skip did not match.\n");
879 
880         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
881                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
882                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
883             log_err("iso-2022-cn->u with skip did not match.\n");
884 
885         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
886                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
887                 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
888             log_err("iso-2022-kr->u with skip did not match.\n");
889 
890         if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
891                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
892                 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
893             log_err("HZ->u with skip did not match.\n");
894 
895         if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
896                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
897                 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
898             log_err("iscii->u with skip did not match.\n");
899 
900         if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
901                 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
902                 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
903             log_err("LMBCS->u with skip did not match.\n");
904 
905     }
906 #endif
907 
908     log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
909     {
910         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
911             0xe0, 0x80,  0x61,};
912         UChar    expected1[] = {  0x0031, 0x4e8c, 0x0061};
913         int32_t offsets1[] = {   0x0000, 0x0001, 0x0006};
914 
915         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
916                  expected1, UPRV_LENGTHOF(expected1),"utf8",
917                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
918             log_err("utf8->u with skip did not match.\n");
919     }
920 
921     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
922     {
923         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
924         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffe,0xfffe};
925         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
926 
927         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
928                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
929                 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
930             log_err("scsu->u with skip did not match.\n");
931     }
932 
933     log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
934     {
935         const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
936             0xFB, 0xEE, 0x28,       /* single-code point sequence at offset 0 */
937             0x24, 0x1E, 0x52,       /* 3 */
938             0xB2,                   /* 6 */
939             0x20,                   /* 7 */
940             0x40, 0x07,             /* 8 - wrong trail byte */
941             0xB3,                   /* 10 */
942             0xB1,                   /* 11 */
943             0xD0, 0x20,             /* 12 - wrong trail byte */
944             0x0D,                   /* 14 */
945             0x0A,                   /* 15 */
946             0x20,                   /* 16 */
947             0x00,                   /* 17 */
948             0xD0, 0x6C,             /* 18 */
949             0xB6,                   /* 20 */
950             0xD8, 0xA5,             /* 21 */
951             0x20,                   /* 23 */
952             0x68,                   /* 24 */
953             0x59,                   /* 25 */
954             0xF9, 0x28,             /* 26 */
955             0x6D,                   /* 28 */
956             0x20,                   /* 29 */
957             0x73,                   /* 30 */
958             0xE0, 0x2D,             /* 31 */
959             0xDE, 0x43,             /* 33 */
960             0xD0, 0x33,             /* 35 */
961             0x20,                   /* 37 */
962             0xFA, 0x83,             /* 38 */
963             0x25, 0x01,             /* 40 */
964             0xFB, 0x16, 0x87,       /* 42 */
965             0x4B, 0x16,             /* 45 */
966             0x20,                   /* 47 */
967             0xE6, 0xBD,             /* 48 */
968             0xEB, 0x5B,             /* 50 */
969             0x4B, 0xCC,             /* 52 */
970             0xF9, 0xA2,             /* 54 */
971             0xFC, 0x10, 0x3E,       /* 56 */
972             0xFE, 0x16, 0x3A, 0x8C, /* 59 */
973             0x20,                   /* 63 */
974             0xFC, 0x03, 0xAC,       /* 64 */
975             0xFF,                   /* 67 - FF just resets the state without encoding anything */
976             0x01,                   /* 68 */
977             0xDE, 0x83,             /* 69 */
978             0x20,                   /* 71 */
979             0x09                    /* 72 */
980         };
981         UChar expected[]={
982             0xFEFF, 0x0061, 0x0062, 0x0020,
983             0x0063, 0x0061, 0x000D, 0x000A,
984             0x0020, 0x0000, 0x00DF, 0x00E6,
985             0x0930, 0x0020, 0x0918, 0x0909,
986             0x3086, 0x304D, 0x0020, 0x3053,
987             0x4000, 0x4E00, 0x7777, 0x0020,
988             0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
989             0x0020, 0xD7A3, 0xDC00, 0xD800,
990             0xD800, 0xDC00, 0xD845, 0xDDDD,
991             0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
992             0xDFFF, 0x0001, 0x0E40, 0x0020,
993             0x0009
994         };
995         int32_t offsets[]={
996             0, 3, 6, 7, /* skip 8, */
997             10, 11, /* skip 12, */
998             14, 15, 16, 17, 18,
999             20, 21, 23, 24, 25, 26, 28, 29,
1000             30, 31, 33, 35, 37, 38,
1001             40, 42, 45, 47, 48,
1002             50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1003             63, 64, /* trail */ 64, /* reset only 67, */
1004             68, 69,
1005             71, 72
1006         };
1007 
1008         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1009                                  expected, UPRV_LENGTHOF(expected), "BOCU-1",
1010                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1011         ) {
1012             log_err("BOCU-1->u with skip did not match.\n");
1013         }
1014     }
1015 
1016     log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1017     {
1018         const uint8_t sampleText[]={
1019             0x61,                               /* 0  'a' */
1020             0xc0, 0x80,                         /* 1  non-shortest form */
1021             0xc4, 0xb5,                         /* 3  U+0135 */
1022             0xed, 0x80, 0xa0,                   /* 5  Hangul U+d020 */
1023             0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8  surrogate pair for U+10401 */
1024             0xee, 0x80, 0x80,                   /* 14 PUA U+e000 */
1025             0xed, 0xb0, 0x81,                   /* 17 unpaired trail surrogate U+dc01 */
1026             0xf0, 0x90, 0x80, 0x80,             /* 20 illegal 4-byte form for U+10000 */
1027             0x62,                               /* 24 'b' */
1028             0xed, 0xa0, 0x81,                   /* 25 unpaired lead surrogate U+d801 */
1029             0xed, 0xa0,                         /* 28 incomplete sequence */
1030             0xd0, 0x80                          /* 30 U+0400 */
1031         };
1032         UChar expected[]={
1033             0x0061,
1034             /* skip */
1035             0x0135,
1036             0xd020,
1037             0xd801, 0xdc01,
1038             0xe000,
1039             0xdc01,
1040             /* skip */
1041             0x0062,
1042             0xd801,
1043             0x0400
1044         };
1045         int32_t offsets[]={
1046             0,
1047             /* skip 1, */
1048             3,
1049             5,
1050             8, 11,
1051             14,
1052             17,
1053             /* skip 20, 20, */
1054             24,
1055             25,
1056             /* skip 28 */
1057             30
1058         };
1059 
1060         /* without offsets */
1061         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1062                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1063                                  UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1064         ) {
1065             log_err("CESU-8->u with skip did not match.\n");
1066         }
1067 
1068         /* with offsets */
1069         if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1070                                  expected, UPRV_LENGTHOF(expected), "CESU-8",
1071                                  UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1072         ) {
1073             log_err("CESU-8->u with skip did not match.\n");
1074         }
1075     }
1076 }
1077 
TestStop(int32_t inputsize,int32_t outputsize)1078 static void TestStop(int32_t inputsize, int32_t outputsize)
1079 {
1080     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1081     static const UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1082 
1083     static const uint8_t expstopIBM_949[]= {
1084         0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1085 
1086     static const uint8_t expstopIBM_943[] = {
1087         0x9f, 0xaf, 0x9f, 0xb1};
1088 
1089     static const uint8_t expstopIBM_930[] = {
1090         0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1091 
1092     static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1093     static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1094     static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1095 
1096 
1097     static const int32_t  toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1098     static const int32_t  toIBM943Offsstop [] = { 0, 0, 1, 1};
1099     static const int32_t  toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1100 
1101     static const int32_t  fromIBM949Offs [] = { 0, 1, 3};
1102     static const int32_t  fromIBM943Offs [] = { 0, 2};
1103     static const int32_t  fromIBM930Offs [] = { 1, 3};
1104 
1105     gInBufferSize = inputsize;
1106     gOutBufferSize = outputsize;
1107 
1108     /*From Unicode*/
1109 
1110 #if !UCONFIG_NO_LEGACY_CONVERSION
1111     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1112             expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1113             UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1114         log_err("u-> ibm-949 with stop did not match.\n");
1115     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1116             expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1117             UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1118         log_err("u-> ibm-943 with stop did not match.\n");
1119     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1120             expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1121             UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1122         log_err("u-> ibm-930 with stop did not match.\n");
1123 
1124     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP  \n");
1125     {
1126         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1127         static const uint8_t toIBM943[]= { 0x61,};
1128         static const int32_t offset[]= {0,} ;
1129 
1130          /*EUC_JP*/
1131         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1132         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1133         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1134 
1135         /*EUC_TW*/
1136         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1137         static const uint8_t to_euc_tw[]={
1138             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1139         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1140 
1141         /*ISO-2022-JP*/
1142         static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1143         static const uint8_t to_iso_2022_jp[]={
1144              0x41,
1145 
1146         };
1147         static const int32_t from_iso_2022_jpOffs [] ={0,};
1148 
1149         /*ISO-2022-cn*/
1150         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1151         static const uint8_t to_iso_2022_cn[]={
1152             0x41,
1153 
1154         };
1155         static const int32_t from_iso_2022_cnOffs [] ={
1156             0,0,
1157             2,2,
1158         };
1159 
1160         /*ISO-2022-kr*/
1161         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1162         static const uint8_t to_iso_2022_kr[]={
1163             0x1b,   0x24,   0x29,   0x43,
1164             0x41,
1165             0x0e,   0x25,   0x50,
1166         };
1167         static const int32_t from_iso_2022_krOffs [] ={
1168             -1,-1,-1,-1,
1169              0,
1170             1,1,1,
1171         };
1172 
1173         /* HZ encoding */
1174         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1175 
1176         static const uint8_t to_hz[]={
1177             0x7e,   0x7d, 0x41,
1178             0x7e,   0x7b,   0x26,   0x30,
1179 
1180         };
1181         static const int32_t from_hzOffs [] ={
1182             0, 0,0,
1183             1,1,1,1,
1184         };
1185 
1186         /*ISCII*/
1187         static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1188         static const uint8_t to_iscii[]={
1189             0x41,
1190         };
1191         static const int32_t from_isciiOffs [] ={
1192             0,
1193         };
1194 
1195         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1196                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1197                 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1198             log_err("u-> ibm-943 with stop did not match.\n");
1199 
1200         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1201                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1202                 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1203             log_err("u-> euc-jp with stop did not match.\n");
1204 
1205         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1206                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1207                 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1208             log_err("u-> euc-tw with stop did not match.\n");
1209 
1210         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1211                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1212                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1213             log_err("u-> iso-2022-jp with stop did not match.\n");
1214 
1215         if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1216                 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1217                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1218             log_err("u-> iso-2022-jp with stop did not match.\n");
1219 
1220         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1221                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1222                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1223             log_err("u-> iso-2022-cn with stop did not match.\n");
1224 
1225         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1226                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1227                 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1228             log_err("u-> iso-2022-kr with stop did not match.\n");
1229 
1230         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1231                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1232                 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1233             log_err("u-> HZ with stop did not match.\n");\
1234 
1235         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1236                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1237                 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1238             log_err("u-> iscii with stop did not match.\n");
1239 
1240 
1241     }
1242 #endif
1243 
1244     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1245     {
1246         static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1247 
1248         static const uint8_t to_SCSU[]={
1249             0x41,
1250 
1251         };
1252         int32_t from_SCSUOffs [] ={
1253             0,
1254 
1255         };
1256         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1257                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1258                 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1259             log_err("u-> SCSU with skip did not match.\n");
1260 
1261     }
1262 
1263     /*to Unicode*/
1264 
1265 #if !UCONFIG_NO_LEGACY_CONVERSION
1266     if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1267              IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1268             UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1269         log_err("ibm-949->u with stop did not match.\n");
1270     if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1271              IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1272             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1273         log_err("ibm-943->u with stop did not match.\n");
1274     if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1275              IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1276             UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1277         log_err("ibm-930->u with stop did not match.\n");
1278 
1279     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1280     {
1281 
1282         static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1283             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1284         };
1285         static const UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63 };
1286         static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1287 
1288 
1289          /*EUC-JP*/
1290         static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1291             0x8f, 0xda, 0xa1,  /*unassigned*/
1292            0x8e, 0xe0,
1293         };
1294         static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1295         static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1296 
1297           /*EUC_TW*/
1298         static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1299             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1300            0xe6, 0xca, 0x8a,
1301         };
1302         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1303         int32_t from_euc_twOffs [] ={ 0, 1, 3};
1304 
1305 
1306 
1307          if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1308              EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1309             UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1310         log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1311 
1312         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1313              euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1314             UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1315         log_err("euc-jp->u with stop did not match.\n");
1316 
1317         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1318                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1319                 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1320             log_err("euc-tw->u with stop did not match.\n");
1321     }
1322 #endif
1323 
1324     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1325     {
1326         static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1327             0xe0, 0x80,  0x61,};
1328         static const UChar    expected1[] = {  0x0031, 0x4e8c,};
1329         static const int32_t offsets1[] = {   0x0000, 0x0001};
1330 
1331         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1332                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1333                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1334             log_err("utf8->u with stop did not match.\n");
1335     }
1336     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1337     {
1338         static const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1339         static const UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061};
1340         static const int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003};
1341 
1342         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1343                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1344                 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1345             log_err("scsu->u with stop did not match.\n");
1346     }
1347 
1348 }
1349 
TestSub(int32_t inputsize,int32_t outputsize)1350 static void TestSub(int32_t inputsize, int32_t outputsize)
1351 {
1352     static const UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1353     static const UChar sampleText2[]=    { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1354 
1355     static const uint8_t expsubIBM_949[] =
1356      { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1357 
1358     static const uint8_t expsubIBM_943[] = {
1359         0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1360 
1361     static const uint8_t expsubIBM_930[] = {
1362         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1363 
1364     static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1365     static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1366     static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367 
1368     static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1369     static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1370     static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1371 
1372     static const int32_t  fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1373     static const int32_t  fromIBM943Offs [] = { 0, 2, 4, 6 };
1374     static const int32_t  fromIBM930Offs [] = { 1, 3, 5, 7 };
1375 
1376     gInBufferSize = inputsize;
1377     gOutBufferSize = outputsize;
1378 
1379     /*from unicode*/
1380 
1381 #if !UCONFIG_NO_LEGACY_CONVERSION
1382     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1383             expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1384             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1385         log_err("u-> ibm-949 with subst did not match.\n");
1386     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1387             expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1388             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1389         log_err("u-> ibm-943 with subst did not match.\n");
1390     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1391             expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1392             UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1393         log_err("u-> ibm-930 with subst did not match.\n");
1394 
1395     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE  \n");
1396     {
1397         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1398         static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1399         static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1400 
1401 
1402         /* EUC_JP*/
1403         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1404         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1405             0xf4, 0xfe, 0xf4, 0xfe,
1406             0x61, 0x8e, 0xe0,
1407         };
1408         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1409 
1410         /*EUC_TW*/
1411         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1412         static const uint8_t to_euc_tw[]={
1413             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1414             0xfd, 0xfe, 0xfd, 0xfe,
1415             0x61, 0xe6, 0xca, 0x8a,
1416         };
1417 
1418         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1419 
1420         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1421                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1422                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1423             log_err("u-> ibm-943 with substitute did not match.\n");
1424 
1425         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1426                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1427                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1428             log_err("u-> euc-jp with substitute did not match.\n");
1429 
1430         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1431                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1432                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1433             log_err("u-> euc-tw with substitute did not match.\n");
1434     }
1435 #endif
1436 
1437     log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1438     {
1439         UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1440 
1441         const uint8_t to_SCSU[]={
1442             0x41,
1443             0x0e, 0xff,0xfd,
1444             0x42
1445 
1446 
1447         };
1448         int32_t from_SCSUOffs [] ={
1449             0,
1450             1,1,1,
1451             2,
1452 
1453         };
1454         const uint8_t to_SCSU_1[]={
1455             0x41,
1456 
1457         };
1458         int32_t from_SCSUOffs_1 [] ={
1459             0,
1460 
1461         };
1462         if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1463                 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1464                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1465             log_err("u-> SCSU with substitute did not match.\n");
1466 
1467         if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1468                 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1469                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1470             log_err("u-> SCSU with substitute did not match.\n");
1471     }
1472 
1473     log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1474     {
1475         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1476         static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1477                            0xf0, 0x90, 0x90, 0x81,
1478                            0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1479                            0xef, 0xbf, 0xbf, 0x61,
1480 
1481         };
1482         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1483         if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1484                 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1485                 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1486             log_err("u-> utf8 with substitute did not match.\n");
1487         }
1488     }
1489 
1490     log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1491     {
1492         static const UChar in[]={ 0x0041, 0xfeff };
1493 
1494         static const uint8_t out[]={
1495 #if U_IS_BIG_ENDIAN
1496             0xfe, 0xff,
1497             0x00, 0x41,
1498             0xfe, 0xff
1499 #else
1500             0xff, 0xfe,
1501             0x41, 0x00,
1502             0xff, 0xfe
1503 #endif
1504         };
1505         static const int32_t offsets[]={
1506             -1, -1, 0, 0, 1, 1
1507         };
1508 
1509         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1510                                    out, UPRV_LENGTHOF(out), "UTF-16",
1511                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1512         ) {
1513             log_err("u->UTF-16 with substitute did not match.\n");
1514         }
1515     }
1516 
1517     log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1518     {
1519         static const UChar in[]={ 0x0041, 0xfeff };
1520 
1521         static const uint8_t out[]={
1522 #if U_IS_BIG_ENDIAN
1523             0x00, 0x00, 0xfe, 0xff,
1524             0x00, 0x00, 0x00, 0x41,
1525             0x00, 0x00, 0xfe, 0xff
1526 #else
1527             0xff, 0xfe, 0x00, 0x00,
1528             0x41, 0x00, 0x00, 0x00,
1529             0xff, 0xfe, 0x00, 0x00
1530 #endif
1531         };
1532         static const int32_t offsets[]={
1533             -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1534         };
1535 
1536         if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1537                                    out, UPRV_LENGTHOF(out), "UTF-32",
1538                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1539         ) {
1540             log_err("u->UTF-32 with substitute did not match.\n");
1541         }
1542     }
1543 
1544     /*to unicode*/
1545 
1546 #if !UCONFIG_NO_LEGACY_CONVERSION
1547     if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1548              IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1549             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1550         log_err("ibm-949->u with substitute did not match.\n");
1551     if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1552              IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1553             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1554         log_err("ibm-943->u with substitute did not match.\n");
1555     if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1556              IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1557             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1558         log_err("ibm-930->u with substitute did not match.\n");
1559 
1560     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1561     {
1562 
1563         const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1564             0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1565         };
1566         UChar EBCIDIC_STATEFUL_toUnicode[] ={  0x6d63, 0xfffd, 0x03b4
1567         };
1568         int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1569 
1570 
1571         /* EUC_JP*/
1572         const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1573             0x8f, 0xda, 0xa1,  /*unassigned*/
1574            0x8e, 0xe0, 0x8a
1575         };
1576         UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1577         int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6,  9, 11 };
1578 
1579         /*EUC_TW*/
1580         const uint8_t sampleTxt_euc_tw[]={
1581             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1582             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1583             0xe6, 0xca, 0x8a,
1584         };
1585         UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1586         int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1587 
1588 
1589         if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1590            EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1591           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1592             log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1593 
1594 
1595         if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1596            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1597           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1598             log_err("euc-jp->u with substitute did not match.\n");
1599 
1600 
1601         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1602            euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1603           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1604             log_err("euc-tw->u with substitute  did not match.\n");
1605 
1606 
1607         if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1608            euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1609           UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1610             log_err("euc-jp->u with substitute did not match.\n");
1611     }
1612 #endif
1613 
1614     log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1615     {
1616         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1617             0xe0, 0x80,  0x61,};
1618         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1619         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1620 
1621         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1622                  expected1, UPRV_LENGTHOF(expected1),"utf8",
1623                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1624             log_err("utf8->u with substitute did not match.\n");
1625     }
1626     log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1627     {
1628         const uint8_t sampleText1[] = {  0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1629         UChar    expected1[] = {  0x00ba,  0x008c,  0x00f8,  0x0061,0xfffd,0xfffd};
1630         int32_t offsets1[] = {   0x0000, 0x0001,0x0002,0x0003,4,5};
1631 
1632         if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1633                  expected1, UPRV_LENGTHOF(expected1),"SCSU",
1634                 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1635             log_err("scsu->u with stop did not match.\n");
1636     }
1637 
1638 #if !UCONFIG_NO_LEGACY_CONVERSION
1639     log_verbose("Testing ibm-930 subchar/subchar1\n");
1640     {
1641         static const UChar u1[]={         0x6d63,           0x6d64,     0x6d65,     0x6d66,     0xdf };
1642         static const uint8_t s1[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1643         static const int32_t offsets1[]={ 0,    0,    0,    1,    1,    2,    2,    3,    3,    4,    4 };
1644 
1645         static const UChar u2[]={         0x6d63,           0x6d64,     0xfffd,     0x6d66,     0x1a };
1646         static const uint8_t s2[]={       0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1647         static const int32_t offsets2[]={ 1,                3,          5,          7,          10 };
1648 
1649         if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1650                                    UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1651         ) {
1652             log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1653         }
1654 
1655         if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1656                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1657         ) {
1658             log_err("ibm-930->u subchar/subchar1 did not match.\n");
1659         }
1660     }
1661 
1662     log_verbose("Testing GB 18030 with substitute callbacks\n");
1663     {
1664         static const UChar u2[]={
1665             0x24, 0x7f, 0x80,                   0x1f9,      0x20ac,     0x4e00,     0x9fa6,                 0xffff,                 0xd800, 0xdc00,         0xfffd,                 0xdbff, 0xdfff };
1666         static const uint8_t gb2[]={
1667             0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1668         static const int32_t offsets2[]={
1669             0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1670 
1671         if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1672                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1673         ) {
1674             log_err("gb18030->u with substitute did not match.\n");
1675         }
1676     }
1677 #endif
1678 
1679     log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1680     {
1681         static const uint8_t utf7[]={
1682          /* a~            a+AB~                           a+AB\x0c                        a+AB-                         a+AB.                         a+. */
1683             0x61, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x7e,   0x61, 0x2b, 0x41, 0x42, 0x0c,   0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b,   0x2e
1684         };
1685         static const UChar unicode[]={
1686             0x61, 0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,     0xfffd, 0x61,       0xfffd,           0x61,       0xfffd,     0x2e, 0x61, 0xfffd, 0x2e
1687         };
1688         static const int32_t offsets[]={
1689             0,    1,      2,          4,          6,      7,          9,          11,     12,         14,               17,         19,         21,   22,   23,     24
1690         };
1691 
1692         if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1693                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1694         ) {
1695             log_err("UTF-7->u with substitute did not match.\n");
1696         }
1697     }
1698 
1699     log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1700     {
1701         static const uint8_t
1702             in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1703             in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1704             in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1705 
1706         static const UChar
1707             out1[]={ 0x4e00, 0xfeff },
1708             out2[]={ 0x004e, 0xfffe },
1709             out3[]={ 0xfefd, 0x4e00, 0xfeff };
1710 
1711         static const int32_t
1712             offsets1[]={ 2, 4 },
1713             offsets2[]={ 2, 4 },
1714             offsets3[]={ 0, 2, 4 };
1715 
1716         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1717                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1718         ) {
1719             log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1720         }
1721 
1722         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1723                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1724         ) {
1725             log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1726         }
1727 
1728         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1729                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1730         ) {
1731             log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1732         }
1733     }
1734 
1735     log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1736     {
1737         static const uint8_t
1738             in1[]={ 0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff },
1739             in2[]={ 0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00 },
1740             in3[]={ 0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01 },
1741             in4[]={ 0x00, 0x01, 0x02, 0x03,   0x00, 0x11, 0x12, 0x00,   0x00, 0x00, 0x4e, 0x00 };
1742 
1743         static const UChar
1744             out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1745             out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1746             out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1747             out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1748 
1749         static const int32_t
1750             offsets1[]={ 4, 4, 8 },
1751             offsets2[]={ 4, 4, 8 },
1752             offsets3[]={ 0, 4, 4, 8, 12 },
1753             offsets4[]={ 0, 0, 4, 8 };
1754 
1755         if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1756                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1757         ) {
1758             log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1759         }
1760 
1761         if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1762                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1763         ) {
1764             log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1765         }
1766 
1767         if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1768                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1769         ) {
1770             log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1771         }
1772 
1773         if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1774                                  UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1775         ) {
1776             log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1777         }
1778     }
1779 }
1780 
TestSubWithValue(int32_t inputsize,int32_t outputsize)1781 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1782 {
1783     UChar   sampleText[] =  { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1784     UChar  sampleText2[] =  { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1785 
1786     const uint8_t expsubwvalIBM_949[]= {
1787         0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1788         0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1789 
1790     const uint8_t expsubwvalIBM_943[]= {
1791         0x9f, 0xaf, 0x9f, 0xb1,
1792         0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1793 
1794     const uint8_t expsubwvalIBM_930[] = {
1795         0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1796 
1797     int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1798     int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1799     int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1800 
1801     gInBufferSize = inputsize;
1802     gOutBufferSize = outputsize;
1803 
1804     /*from Unicode*/
1805 
1806 #if !UCONFIG_NO_LEGACY_CONVERSION
1807     if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1808             expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1809             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1810         log_err("u-> ibm-949 with subst with value did not match.\n");
1811 
1812     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1813             expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1814             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1815         log_err("u-> ibm-943 with sub with value did not match.\n");
1816 
1817     if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1818             expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1819             UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1820         log_err("u-> ibm-930 with subst with value did not match.\n");
1821 
1822 
1823     log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
1824     {
1825         static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1826         static const uint8_t toIBM943[]= { 0x61,
1827             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1828             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1829             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1830             0x61 };
1831         static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1832 
1833 
1834          /* EUC_JP*/
1835         static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1836         static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1837             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1838             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1839             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1840             0x61, 0x8e, 0xe0,
1841         };
1842         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1843             3, 3, 3, 3, 3, 3,
1844             3, 3, 3, 3, 3, 3,
1845             5, 5, 5, 5, 5, 5,
1846             6, 7, 7,
1847         };
1848 
1849         /*EUC_TW*/
1850         static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1851         static const uint8_t to_euc_tw[]={
1852             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1853             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1854             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1855             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1856             0x61, 0xe6, 0xca, 0x8a,
1857         };
1858         static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1859              3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1860              6, 7, 7, 8,
1861         };
1862         /*ISO-2022-JP*/
1863         static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1864         static const uint8_t to_iso_2022_jp1[]={
1865             0x1b,   0x24,   0x42,   0x21, 0x21,
1866             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1867             0x1b,   0x24,   0x42,   0x21, 0x22,
1868             0x1b,   0x28,   0x42,   0x25, 0x55,   0x30,   0x30,   0x45,   0x39,
1869             0x42,
1870         };
1871 
1872         static const int32_t from_iso_2022_jpOffs1 [] ={
1873             0,0,0,0,0,
1874             1,1,1,1,1,1,1,1,1,
1875             2,2,2,2,2,
1876             3,3,3,3,3,3,3,3,3,
1877             4,
1878         };
1879         /* surrogate pair*/
1880         static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1881         static const uint8_t to_iso_2022_jp2[]={
1882                                 0x1b,   0x24,   0x42,   0x21,   0x21,
1883                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1884                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1885                                 0x1b,   0x24,   0x42,   0x21,   0x22,
1886                                 0x1b,   0x28,   0x42,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1887                                 0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1888                                 0x42,
1889                                 };
1890         static const int32_t from_iso_2022_jpOffs2 [] ={
1891             0,0,0,0,0,
1892             1,1,1,1,1,1,1,1,1,
1893             1,1,1,1,1,1,
1894             3,3,3,3,3,
1895             4,4,4,4,4,4,4,4,4,
1896             4,4,4,4,4,4,
1897             6,
1898         };
1899 
1900         /*ISO-2022-cn*/
1901         static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1902         static const uint8_t to_iso_2022_cn[]={
1903             0x41,
1904             0x25, 0x55,   0x33,   0x37,   0x31,   0x32,
1905             0x42,
1906         };
1907         static const int32_t from_iso_2022_cnOffs [] ={
1908             0,
1909             1,1,1,1,1,1,
1910             2,
1911         };
1912 
1913         static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1914 
1915         static const uint8_t to_iso_2022_cn4[]={
1916                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
1917                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1918                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1919                              0x0e,   0x21,   0x22,
1920                              0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1921                              0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1922                              0x42,
1923                              };
1924         static const int32_t from_iso_2022_cnOffs4 [] ={
1925             0,0,0,0,0,0,0,
1926             1,1,1,1,1,1,1,
1927             1,1,1,1,1,1,
1928             3,3,3,
1929             4,4,4,4,4,4,4,
1930             4,4,4,4,4,4,
1931             6
1932 
1933         };
1934 
1935         /*ISO-2022-kr*/
1936         static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1937         static const uint8_t to_iso_2022_kr2[]={
1938             0x1b,   0x24,   0x29,   0x43,
1939             0x41,
1940             0x0e,   0x25,   0x50,
1941             0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1942             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1943             0x0e,   0x25,   0x50,
1944             0x0f,   0x42,
1945             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
1946             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
1947             0x43
1948         };
1949         static const int32_t from_iso_2022_krOffs2 [] ={
1950             -1,-1,-1,-1,
1951              0,
1952             1,1,1,
1953             2,2,2,2,2,2,2,
1954             2,2,2,2,2,2,
1955             4,4,4,
1956             5,5,
1957             6,6,6,6,6,6,
1958             6,6,6,6,6,6,
1959             8,
1960         };
1961 
1962         static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1963         static const uint8_t to_iso_2022_kr[]={
1964             0x1b,   0x24,   0x29,   0x43,
1965             0x41,
1966             0x0e,   0x25,   0x50,
1967             0x0f,   0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1968             0x0e,   0x25,   0x50,
1969             0x0f,   0x42,
1970             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
1971             0x43
1972         };
1973 
1974 
1975         static const int32_t from_iso_2022_krOffs [] ={
1976             -1,-1,-1,-1,
1977              0,
1978             1,1,1,
1979             2,2,2,2,2,2,2,
1980             3,3,3,
1981             4,4,
1982             5,5,5,5,5,5,
1983             6,
1984         };
1985         /* HZ encoding */
1986         static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1987 
1988         static const uint8_t to_hz[]={
1989             0x7e,   0x7d,   0x41,
1990             0x7e,   0x7b,   0x26,   0x30,
1991             0x7e,   0x7d,   0x25,   0x55,   0x30,   0x36,   0x36,   0x32,  /*unassigned*/
1992             0x7e,   0x7b,   0x26,   0x30,
1993             0x7e,   0x7d,   0x42,
1994 
1995         };
1996         static const int32_t from_hzOffs [] ={
1997             0,0,0,
1998             1,1,1,1,
1999             2,2,2,2,2,2,2,2,
2000             3,3,3,3,
2001             4,4,4
2002         };
2003 
2004         static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2005         static const uint8_t to_hz2[]={
2006             0x7e,   0x7d,   0x41,
2007             0x7e,   0x7b,   0x26,   0x30,
2008             0x7e,   0x7d,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2009             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2010             0x7e,   0x7b,   0x26,   0x30,
2011             0x7e,   0x7d,   0x42,
2012             0x25,   0x55,   0x44,   0x38,   0x34,   0x44,
2013             0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2014             0x43
2015         };
2016         static const int32_t from_hzOffs2 [] ={
2017             0,0,0,
2018             1,1,1,1,
2019             2,2,2,2,2,2,2,2,
2020             2,2,2,2,2,2,
2021             4,4,4,4,
2022             5,5,5,
2023             6,6,6,6,6,6,
2024             6,6,6,6,6,6,
2025             8,
2026         };
2027 
2028                 /*ISCII*/
2029         static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2030         static const uint8_t to_iscii[]={
2031             0x41,
2032             0xef,   0x42,   0xa1,
2033             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2034             0xa2,
2035             0x42,
2036             0x25,   0x55,   0x33,   0x37,   0x31,   0x32,  /*unassigned*/
2037             0x43
2038         };
2039 
2040 
2041         static const int32_t from_isciiOffs [] ={
2042             0,
2043             1,1,1,
2044             2,2,2,2,2,2,
2045             3,
2046             4,
2047             5,5,5,5,5,5,
2048             6,
2049         };
2050 
2051         if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2052                 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2053                 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2054             log_err("u-> ibm-943 with subst with value did not match.\n");
2055 
2056         if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2057                 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2058                 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2059             log_err("u-> euc-jp with subst with value did not match.\n");
2060 
2061         if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2062                 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2063                 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2064             log_err("u-> euc-tw with subst with value did not match.\n");
2065 
2066         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2067                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2068                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2069             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2070 
2071         if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2072                 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2073                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2074             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2075 
2076         if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2077                 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2078                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2079             log_err("u-> iso_2022_jp with subst with value did not match.\n");
2080         /*ESCAPE OPTIONS*/
2081         {
2082             /* surrogate pair*/
2083             static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2084             static const uint8_t to_iso_2022_jp3_v2[]={
2085                     0x1b,   0x24,   0x42,   0x21,   0x21,
2086                     0x1b,   0x28,   0x42,   0x26,   0x23,   0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2087 
2088                     0x1b,   0x24,   0x42,   0x21,   0x22,
2089                     0x1b,   0x28,   0x42,   0x26,   0x23,  0x31,  0x34,   0x34,   0x34,   0x37, 0x30, 0x3b,
2090 
2091                     0x42,
2092                     0x26,   0x23,   0x33,   0x36,   0x38,   0x39,   0x32,   0x3b,
2093                     };
2094 
2095             static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2096                 0,0,0,0,0,
2097                 1,1,1,1,1,1,1,1,1,1,1,1,
2098 
2099                 3,3,3,3,3,
2100                 4,4,4,4,4,4,4,4,4,4,4,4,
2101 
2102                 6,
2103                 7,7,7,7,7,7,7,7,7
2104             };
2105 
2106             if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2107                     to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2108                     UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2109                 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2110         }
2111         {
2112             static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2113             static const uint8_t to_iso_2022_cn5_v2[]={
2114                              0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2115                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2116                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2117                              0x0e,   0x21,   0x22,
2118                              0x0f,   0x5c,   0x75,   0x44,   0x38,   0x34,   0x44,
2119                              0x5c,   0x75,   0x44,   0x43,   0x35,   0x36,
2120                              0x42,
2121                              0x5c,   0x75,   0x30,   0x39,   0x30,   0x32,
2122                              };
2123             static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2124                 0,0,0,0,0,0,0,
2125                 1,1,1,1,1,1,1,
2126                 1,1,1,1,1,1,
2127                 3,3,3,
2128                 4,4,4,4,4,4,4,
2129                 4,4,4,4,4,4,
2130                 6,
2131                 7,7,7,7,7,7
2132             };
2133             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2134                 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2135                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2136                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2137 
2138         }
2139         {
2140             static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2141             static const uint8_t to_iso_2022_cn6_v2[]={
2142                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2143                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2144                                 0x0e,   0x21,   0x22,
2145                                 0x0f,   0x7b,   0x55,   0x2b,   0x32,   0x33,   0x34,   0x35,   0x36,   0x7d,
2146                                 0x42,
2147                                 0x7b,   0x55,   0x2b,   0x30,   0x39,   0x30,   0x32,   0x7d
2148                              };
2149             static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2150                     0,  0,  0,  0,  0,  0,  0,
2151                     1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2152                     3,  3,  3,
2153                     4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2154                     6,
2155                     7,  7,  7,  7,  7,  7,  7,  7,
2156             };
2157             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2158                 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2159                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2160                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2161 
2162         }
2163         {
2164             static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2165             static const uint8_t to_iso_2022_cn7_v2[]={
2166                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2167                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2168                                 0x0e,   0x21,   0x22,
2169                                 0x0f,   0x25,   0x55,   0x44,   0x38,   0x34,   0x44,   0x25,   0x55,   0x44,   0x43,   0x35,   0x36,
2170                                 0x42,   0x25,   0x55,   0x30,   0x39,   0x30,   0x32,
2171                             };
2172             static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2173                                 0,  0,  0,  0,  0,  0,  0,
2174                                 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
2175                                 3,  3,  3,
2176                                 4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
2177                                 6,
2178                                 7,  7,  7,  7,  7,  7,
2179             };
2180             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2181                 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2182                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2183                 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2184 
2185         }
2186         {
2187             static const UChar iso_2022_cn_inputText8[]={
2188                                 0x3000,
2189                                 0xD84D, 0xDC56,
2190                                 0x3001,
2191                                 0xD84D, 0xDC56,
2192                                 0xDBFF, 0xDFFF,
2193                                 0x0042,
2194                                 0x0902};
2195             static const uint8_t to_iso_2022_cn8_v2[]={
2196                                 0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2197                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2198                                 0x0e,   0x21,   0x22,
2199                                 0x0f,   0x5c,   0x32,   0x33,   0x34,   0x35,   0x36,   0x20,
2200                                 0x5c,   0x31,   0x30,   0x46,   0x46,   0x46,   0x46,   0x20,
2201                                 0x42,
2202                                 0x5c,   0x39,   0x30,   0x32,   0x20
2203                              };
2204             static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2205                     0,  0,  0,  0,  0,  0,  0,
2206                     1,  1,  1,  1,  1,  1,  1,  1,
2207                     3,  3,  3,
2208                     4,  4,  4,  4,  4,  4,  4,  4,
2209                     6,  6,  6,  6,  6,  6,  6,  6,
2210                     8,
2211                     9,  9,  9,  9,  9
2212             };
2213             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2214                 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2215                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2216                 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2217 
2218         }
2219         {
2220             static const uint8_t to_iso_2022_cn4_v3[]={
2221                             0x1b,   0x24,   0x29,   0x41,   0x0e,   0x21,   0x21,
2222                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2223                             0x0e,   0x21,   0x22,
2224                             0x0f,   0x5c,   0x55,   0x30,   0x30,   0x30,   0x32,   0x33,   0x34,   0x35,   0x36,
2225                             0x42
2226                              };
2227 
2228 
2229             static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2230                 0,0,0,0,0,0,0,
2231                 1,1,1,1,1,1,1,1,1,1,1,
2232 
2233                 3,3,3,
2234                 4,4,4,4,4,4,4,4,4,4,4,
2235 
2236                 6
2237 
2238             };
2239             if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2240                 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2241                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2242             {
2243                 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2244             }
2245         }
2246         if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2247                 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2248                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2249             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2250 
2251         if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2252                 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2253                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2254             log_err("u-> iso_2022_cn with subst with value did not match.\n");
2255         if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2256                 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2257                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2258             log_err("u-> iso_2022_kr with subst with value did not match.\n");
2259         if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2260                 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2261                 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2262             log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2263         if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2264                 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2265                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2266             log_err("u-> hz with subst with value did not match.\n");
2267         if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2268                 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2269                 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2270             log_err("u-> hz with subst with value did not match.\n");
2271 
2272         if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2273                 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2274                 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2275             log_err("u-> iscii with subst with value did not match.\n");
2276     }
2277 #endif
2278 
2279     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2280     /*to Unicode*/
2281     {
2282 #if !UCONFIG_NO_LEGACY_CONVERSION
2283         static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2284             0x81, 0xad, /*unassigned*/
2285             0x89, 0xd3 };
2286         static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2287             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2288             0x7B87};
2289         static const int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2290 
2291         /* EUC_JP*/
2292         static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2293             0x8f, 0xda, 0xa1,  /*unassigned*/
2294            0x8e, 0xe0,
2295         };
2296         static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2297             0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2298             0x00a2 };
2299         static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2300             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2301             9,
2302         };
2303 
2304         /*EUC_TW*/
2305         static const uint8_t sampleTxt_euc_tw[]={
2306             0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2307             0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2308             0xe6, 0xca, 0x8a,
2309         };
2310         static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2311              0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2312              0x8706, 0x8a, };
2313         static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2314              7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2315              11, 13};
2316 
2317         /*iso-2022-jp*/
2318         static const uint8_t sampleTxt_iso_2022_jp[]={
2319             0x1b,   0x28,   0x42,   0x41,
2320             0x1b,   0x24,   0x42,   0x3a, 0x1a, /*unassigned*/
2321             0x1b,   0x28,   0x42,   0x42,
2322 
2323         };
2324                                                    /*     A    %    X    3    A    %    X    1    A     B    */
2325         static const UChar iso_2022_jptoUnicode[]={    0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2326         static const int32_t from_iso_2022_jpOffs [] ={  3,   7,   7,   7,   7,   7,   7,   7,   7,    12   };
2327 
2328         /*iso-2022-cn*/
2329         static const uint8_t sampleTxt_iso_2022_cn[]={
2330             0x0f,   0x41,   0x44,
2331             0x1B,   0x24,   0x29,   0x47,
2332             0x0E,   0x40,   0x6c, /*unassigned*/
2333             0x0f,   0x42,
2334 
2335         };
2336         static const UChar iso_2022_cntoUnicode[]={    0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2337         static const int32_t from_iso_2022_cnOffs [] ={  1,   2,   8,   8,   8,   8,   8,   8,   8,  8,    11   };
2338 
2339         /*iso-2022-kr*/
2340         static const uint8_t sampleTxt_iso_2022_kr[]={
2341           0x1b, 0x24, 0x29,  0x43,
2342           0x41,
2343           0x0E, 0x7f, 0x1E,
2344           0x0e, 0x25, 0x50,
2345           0x0f, 0x51,
2346           0x42, 0x43,
2347 
2348         };
2349         static const UChar iso_2022_krtoUnicode[]={     0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2350         static const int32_t from_iso_2022_krOffs [] ={  4,   6,   6,   6,   6,   6,   6,   6,   6,    9,    12,   13  , 14 };
2351 
2352         /*hz*/
2353         static const uint8_t sampleTxt_hz[]={
2354             0x41,
2355             0x7e,   0x7b,   0x26,   0x30,
2356             0x7f,   0x1E, /*unassigned*/
2357             0x26,   0x30,
2358             0x7e,   0x7d,   0x42,
2359             0x7e,   0x7b,   0x7f,   0x1E,/*unassigned*/
2360             0x7e,   0x7d,   0x42,
2361         };
2362         static const UChar hztoUnicode[]={
2363             0x41,
2364             0x03a0,
2365             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2366             0x03A0,
2367             0x42,
2368             0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2369             0x42,};
2370 
2371         static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18,  };
2372 
2373 
2374         /*iscii*/
2375         static const uint8_t sampleTxt_iscii[]={
2376             0x41,
2377             0x30,
2378             0xEB, /*unassigned*/
2379             0xa3,
2380             0x42,
2381             0xEC, /*unassigned*/
2382             0x42,
2383         };
2384         static const UChar isciitoUnicode[]={
2385             0x41,
2386             0x30,
2387             0x25,  0x58,  0x45, 0x42,
2388             0x0903,
2389             0x42,
2390             0x25,  0x58,  0x45, 0x43,
2391             0x42,};
2392 
2393         static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6  };
2394 #endif
2395 
2396         /*UTF8*/
2397         static const uint8_t sampleTxtUTF8[]={
2398             0x20, 0x64, 0x50,
2399             0xC2, 0x7E, /* truncated char */
2400             0x20,
2401             0xE0, 0xB5, 0x7E, /* truncated char */
2402             0x40,
2403         };
2404         static const UChar UTF8ToUnicode[]={
2405             0x0020, 0x0064, 0x0050,
2406             0x0025, 0x0058, 0x0043, 0x0032, 0x007E,  /* \xC2~ */
2407             0x0020,
2408             0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2409             0x0040
2410         };
2411         static const int32_t fromUTF8[] = {
2412             0, 1, 2,
2413             3, 3, 3, 3, 4,
2414             5,
2415             6, 6, 6, 6, 6, 6, 6, 6, 8,
2416             9
2417         };
2418         static const UChar UTF8ToUnicodeXML_DEC[]={
2419             0x0020, 0x0064, 0x0050,
2420             0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E,  /* &#194;~ */
2421             0x0020,
2422             0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2423             0x0040
2424         };
2425         static const int32_t fromUTF8XML_DEC[] = {
2426             0, 1, 2,
2427             3, 3, 3, 3, 3, 3, 4,
2428             5,
2429             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2430             9
2431         };
2432 
2433 
2434 #if !UCONFIG_NO_LEGACY_CONVERSION
2435         if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2436                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2437                 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2438             log_err("ibm-943->u with substitute with value did not match.\n");
2439 
2440         if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2441                  EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2442                 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2443             log_err("euc-jp->u with substitute with value did not match.\n");
2444 
2445         if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2446                  euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2447                 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2448             log_err("euc-tw->u with substitute with value did not match.\n");
2449 
2450         if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2451                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2452                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2453             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2454 
2455         if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2456                  iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2457                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2458             log_err("iso-2022-jp->u with substitute with value did not match.\n");
2459 
2460         {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2461             {
2462                 static const UChar iso_2022_jptoUnicodeDec[]={
2463                                                   0x0041,
2464                                                   /*   &         #         5         8         ;   */
2465                                                   0x0026,   0x0023,   0x0035,   0x0038,   0x003b,
2466                                                   0x0026,   0x0023,   0x0032,   0x0036,   0x003b,
2467                                                   0x0042 };
2468                 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12,  };
2469                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2470                      iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2471                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2472                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2473             }
2474             {
2475                 static const UChar iso_2022_jptoUnicodeHex[]={
2476                                                   0x0041,
2477                                                   /*   &       #       x       3       A       ;  */
2478                                                   0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2479                                                   0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2480                                                   0x0042 };
2481                 static const int32_t from_iso_2022_jpOffsHex [] ={  3,7,7,7,7,7,7,7,7,7,7,7,7,12   };
2482                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2483                      iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2484                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2485                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2486             }
2487             {
2488                 static const UChar iso_2022_jptoUnicodeC[]={
2489                                                 0x0041,
2490                                                 0x005C, 0x0078, 0x0033, 0x0041,   /*  \x3A */
2491                                                 0x005C, 0x0078, 0x0031, 0x0041,   /*  \x1A */
2492                                                 0x0042 };
2493                 int32_t from_iso_2022_jpOffsC [] ={  3,7,7,7,7,7,7,7,7,12   };
2494                 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2495                      iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2496                     UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2497                 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2498             }
2499         }
2500         if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2501                  iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2502                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2503             log_err("iso-2022-cn->u with substitute with value did not match.\n");
2504 
2505         if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2506                  iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2507                 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2508             log_err("iso-2022-kr->u with substitute with value did not match.\n");
2509 
2510          if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2511                  hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2512                 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2513             log_err("hz->u with substitute with value did not match.\n");
2514 
2515          if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2516                  isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2517                 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2518             log_err("ISCII ->u with substitute with value did not match.\n");
2519 #endif
2520 
2521         if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2522                 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2523                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2524             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2525         if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2526                 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2527                 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2528             log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2529     }
2530 }
2531 
2532 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2533 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2534 {
2535     static const UChar    legalText[] =  { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2536     static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2537     static const int32_t  to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2538 
2539 
2540     static const uint8_t text943[] = {
2541         0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2542     static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22,  0x5b57 };
2543     static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22,  0x5b57 };
2544     static const UChar toUnicode943stop[]= { 0x304b};
2545 
2546     static const int32_t  fromIBM943Offssub[]  = { 0, 2, 3, 4, 5, 7 };
2547     static const int32_t  fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2548     static const int32_t  fromIBM943Offsstop[] = { 0};
2549 
2550     gInBufferSize = inputsize;
2551     gOutBufferSize = outputsize;
2552     /*checking with a legal value*/
2553     if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2554             templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2555             UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2556         log_err("u-> ibm-949 with skip did not match.\n");
2557 
2558     /*checking illegal value for ibm-943 with substitute*/
2559     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2560              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2561             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2562         log_err("ibm-943->u with subst did not match.\n");
2563     /*checking illegal value for ibm-943 with skip */
2564     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2565              toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2566             UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2567         log_err("ibm-943->u with skip did not match.\n");
2568 
2569     /*checking illegal value for ibm-943 with stop */
2570     if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2571              toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2572             UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2573         log_err("ibm-943->u with stop did not match.\n");
2574 
2575 }
2576 
TestSingleByte(int32_t inputsize,int32_t outputsize)2577 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2578 {
2579     static const uint8_t sampleText[] = {
2580         0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2581         0xff, 0x32, 0x33};
2582     static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2583     static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2584     /*checking illegal value for ibm-943 with substitute*/
2585     gInBufferSize = inputsize;
2586     gOutBufferSize = outputsize;
2587 
2588     if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2589              toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2590             UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2591         log_err("ibm-943->u with subst did not match.\n");
2592 }
2593 
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2594 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2595 {
2596     /*EBCDIC_STATEFUL*/
2597     static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2598     static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2599     static const int32_t offset_930[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    4,    4,    5,    5    };
2600 /*                              s     SO    doubl       SI    sng   s     SO    fe    fe    SI    s    */
2601 
2602     /*EBCDIC_STATEFUL with subChar=3f*/
2603     static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2604     static const int32_t offset_930_subvaried[]=     { 0,    1,    1,    1,    2,    2,    3,    4,    5    };
2605     static const char mySubChar[]={ 0x3f};
2606 
2607     gInBufferSize = inputsize;
2608     gOutBufferSize = outputsize;
2609 
2610     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2611         toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2612         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2613             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2614 
2615     if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2616         toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2617         UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2618             log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2619 }
2620 #endif
2621 
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2622 UBool testConvertFromUnicode(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
2623                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2624                 const char *mySubChar, int8_t len)
2625 {
2626 
2627 
2628     UErrorCode status = U_ZERO_ERROR;
2629     UConverter *conv = 0;
2630     char junkout[NEW_MAX_BUFFER]; /* FIX */
2631     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2632     const UChar *src;
2633     char *end;
2634     char *targ;
2635     int32_t *offs;
2636     int i;
2637     int32_t  realBufferSize;
2638     char *realBufferEnd;
2639     const UChar *realSourceEnd;
2640     const UChar *sourceLimit;
2641     UBool checkOffsets = true;
2642     UBool doFlush;
2643     char junk[9999];
2644     char offset_str[9999];
2645     char *p;
2646     UConverterFromUCallback oldAction = NULL;
2647     const void* oldContext = NULL;
2648 
2649 
2650     for(i=0;i<NEW_MAX_BUFFER;i++)
2651         junkout[i] = (char)0xF0;
2652     for(i=0;i<NEW_MAX_BUFFER;i++)
2653         junokout[i] = 0xFF;
2654     setNuConvTestName(codepage, "FROM");
2655 
2656     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
2657             gOutBufferSize);
2658 
2659     conv = ucnv_open(codepage, &status);
2660     if(U_FAILURE(status))
2661     {
2662         log_data_err("Couldn't open converter %s\n",codepage);
2663         return true;
2664     }
2665 
2666     log_verbose("Converter opened..\n");
2667 
2668     /*----setting the callback routine----*/
2669     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2670     if (U_FAILURE(status))
2671     {
2672         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2673     }
2674     /*------------------------*/
2675     /*setting the subChar*/
2676     if(mySubChar != NULL){
2677         ucnv_setSubstChars(conv, mySubChar, len, &status);
2678         if (U_FAILURE(status))  {
2679             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2680         }
2681     }
2682     /*------------*/
2683 
2684     src = source;
2685     targ = junkout;
2686     offs = junokout;
2687 
2688     realBufferSize = UPRV_LENGTHOF(junkout);
2689     realBufferEnd = junkout + realBufferSize;
2690     realSourceEnd = source + sourceLen;
2691 
2692     if ( gOutBufferSize != realBufferSize )
2693       checkOffsets = false;
2694 
2695     if( gInBufferSize != NEW_MAX_BUFFER )
2696       checkOffsets = false;
2697 
2698     do
2699     {
2700         end = nct_min(targ + gOutBufferSize, realBufferEnd);
2701         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2702 
2703         doFlush = (UBool)(sourceLimit == realSourceEnd);
2704 
2705         if(targ == realBufferEnd)
2706         {
2707             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2708             return false;
2709         }
2710         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
2711 
2712 
2713         status = U_ZERO_ERROR;
2714 
2715         ucnv_fromUnicode (conv,
2716                   (char **)&targ,
2717                   (const char *)end,
2718                   &src,
2719                   sourceLimit,
2720                   checkOffsets ? offs : NULL,
2721                   doFlush, /* flush if we're at the end of the input data */
2722                   &status);
2723     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2724 
2725 
2726     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2727         UChar errChars[50]; /* should be sufficient */
2728         int8_t errLen = 50;
2729         UErrorCode err = U_ZERO_ERROR;
2730         const UChar* start= NULL;
2731         ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2732         if(U_FAILURE(err)){
2733             log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2734         }
2735         /* length of in invalid chars should be equal to returned length*/
2736         start = src - errLen;
2737         if(u_strncmp(errChars,start,errLen)!=0){
2738             log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2739         }
2740     }
2741     /* allow failure codes for the stop callback */
2742     if(U_FAILURE(status) &&
2743        (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2744     {
2745         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2746         return false;
2747     }
2748 
2749     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2750         sourceLen, targ-junkout);
2751     if(getTestOption(VERBOSITY_OPTION))
2752     {
2753 
2754         junk[0] = 0;
2755         offset_str[0] = 0;
2756         for(p = junkout;p<targ;p++)
2757         {
2758             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2759             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2760         }
2761 
2762         log_verbose(junk);
2763         printSeq(expect, expectLen);
2764         if ( checkOffsets )
2765         {
2766             log_verbose("\nOffsets:");
2767             log_verbose(offset_str);
2768         }
2769         log_verbose("\n");
2770     }
2771     ucnv_close(conv);
2772 
2773 
2774     if(expectLen != targ-junkout)
2775     {
2776         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2777         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2778         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2779         printSeqErr(expect, expectLen);
2780         return false;
2781     }
2782 
2783     if (checkOffsets && (expectOffsets != 0) )
2784     {
2785         log_verbose("comparing %d offsets..\n", targ-junkout);
2786         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2787             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2788             log_err("Got Output : ");
2789             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2790             log_err("Got Offsets:      ");
2791             for(p=junkout;p<targ;p++)
2792                 log_err("%d,", junokout[p-junkout]);
2793             log_err("\n");
2794             log_err("Expected Offsets: ");
2795             for(i=0; i<(targ-junkout); i++)
2796                 log_err("%d,", expectOffsets[i]);
2797             log_err("\n");
2798             return false;
2799         }
2800     }
2801 
2802     if(!memcmp(junkout, expect, expectLen))
2803     {
2804         log_verbose("String matches! %s\n", gNuConvTestName);
2805         return true;
2806     }
2807     else
2808     {
2809         log_err("String does not match. %s\n", gNuConvTestName);
2810         log_err("source: ");
2811         printUSeqErr(source, sourceLen);
2812         log_err("Got:      ");
2813         printSeqErr((const uint8_t *)junkout, expectLen);
2814         log_err("Expected: ");
2815         printSeqErr(expect, expectLen);
2816         return false;
2817     }
2818 }
2819 
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2820 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2821                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2822                const char *mySubChar, int8_t len)
2823 {
2824     UErrorCode status = U_ZERO_ERROR;
2825     UConverter *conv = 0;
2826     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
2827     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2828     const char *src;
2829     const char *realSourceEnd;
2830     const char *srcLimit;
2831     UChar *targ;
2832     UChar *end;
2833     int32_t *offs;
2834     int i;
2835     UBool   checkOffsets = true;
2836     char junk[9999];
2837     char offset_str[9999];
2838     UChar *p;
2839     UConverterToUCallback oldAction = NULL;
2840     const void* oldContext = NULL;
2841 
2842     int32_t   realBufferSize;
2843     UChar *realBufferEnd;
2844 
2845 
2846     for(i=0;i<NEW_MAX_BUFFER;i++)
2847         junkout[i] = 0xFFFE;
2848 
2849     for(i=0;i<NEW_MAX_BUFFER;i++)
2850         junokout[i] = -1;
2851 
2852     setNuConvTestName(codepage, "TO");
2853 
2854     log_verbose("\n=========  %s\n", gNuConvTestName);
2855 
2856     conv = ucnv_open(codepage, &status);
2857     if(U_FAILURE(status))
2858     {
2859         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2860         return true;
2861     }
2862 
2863     log_verbose("Converter opened..\n");
2864 
2865     src = (const char *)source;
2866     targ = junkout;
2867     offs = junokout;
2868 
2869     realBufferSize = UPRV_LENGTHOF(junkout);
2870     realBufferEnd = junkout + realBufferSize;
2871     realSourceEnd = src + sourcelen;
2872     /*----setting the callback routine----*/
2873     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2874     if (U_FAILURE(status))
2875     {
2876         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2877     }
2878     /*-------------------------------------*/
2879     /*setting the subChar*/
2880     if(mySubChar != NULL){
2881         ucnv_setSubstChars(conv, mySubChar, len, &status);
2882         if (U_FAILURE(status))  {
2883             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2884         }
2885     }
2886     /*------------*/
2887 
2888 
2889     if ( gOutBufferSize != realBufferSize )
2890         checkOffsets = false;
2891 
2892     if( gInBufferSize != NEW_MAX_BUFFER )
2893         checkOffsets = false;
2894 
2895     do
2896     {
2897         end = nct_min( targ + gOutBufferSize, realBufferEnd);
2898         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2899 
2900         if(targ == realBufferEnd)
2901         {
2902             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2903             return false;
2904         }
2905         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2906 
2907 
2908 
2909         status = U_ZERO_ERROR;
2910 
2911         ucnv_toUnicode (conv,
2912                 &targ,
2913                 end,
2914                 (const char **)&src,
2915                 (const char *)srcLimit,
2916                 checkOffsets ? offs : NULL,
2917                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2918                 &status);
2919     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2920 
2921     if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2922         char errChars[50]; /* should be sufficient */
2923         int8_t errLen = 50;
2924         UErrorCode err = U_ZERO_ERROR;
2925         const char* start= NULL;
2926         ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2927         if(U_FAILURE(err)){
2928             log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2929         }
2930         /* length of in invalid chars should be equal to returned length*/
2931         start = src - errLen;
2932         if(uprv_strncmp(errChars,start,errLen)!=0){
2933             log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2934         }
2935     }
2936     /* allow failure codes for the stop callback */
2937     if(U_FAILURE(status) &&
2938        (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2939     {
2940         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2941         return false;
2942     }
2943 
2944     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2945         sourcelen, targ-junkout);
2946     if(getTestOption(VERBOSITY_OPTION))
2947     {
2948 
2949         junk[0] = 0;
2950         offset_str[0] = 0;
2951 
2952         for(p = junkout;p<targ;p++)
2953         {
2954             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2955             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2956         }
2957 
2958         log_verbose(junk);
2959         printUSeq(expect, expectlen);
2960         if ( checkOffsets )
2961         {
2962             log_verbose("\nOffsets:");
2963             log_verbose(offset_str);
2964         }
2965         log_verbose("\n");
2966     }
2967     ucnv_close(conv);
2968 
2969     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2970 
2971     if (checkOffsets && (expectOffsets != 0))
2972     {
2973         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2974         {
2975             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2976             log_err("Got offsets:      ");
2977             for(p=junkout;p<targ;p++)
2978                 log_err("  %2d,", junokout[p-junkout]);
2979             log_err("\n");
2980             log_err("Expected offsets: ");
2981             for(i=0; i<(targ-junkout); i++)
2982                 log_err("  %2d,", expectOffsets[i]);
2983             log_err("\n");
2984             log_err("Got output:       ");
2985             for(i=0; i<(targ-junkout); i++)
2986                 log_err("0x%04x,", junkout[i]);
2987             log_err("\n");
2988             log_err("From source:      ");
2989             for(i=0; i<(src-(const char *)source); i++)
2990                 log_err("  0x%02x,", (unsigned char)source[i]);
2991             log_err("\n");
2992         }
2993     }
2994 
2995     if(!memcmp(junkout, expect, expectlen*2))
2996     {
2997         log_verbose("Matches!\n");
2998         return true;
2999     }
3000     else
3001     {
3002         log_err("String does not match. %s\n", gNuConvTestName);
3003         log_verbose("String does not match. %s\n", gNuConvTestName);
3004         log_err("Got:      ");
3005         printUSeqErr(junkout, expectlen);
3006         log_err("Expected: ");
3007         printUSeqErr(expect, expectlen);
3008         log_err("\n");
3009         return false;
3010     }
3011 }
3012 
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3013 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
3014                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3015                 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3016 {
3017 
3018 
3019     UErrorCode status = U_ZERO_ERROR;
3020     UConverter *conv = 0;
3021     char junkout[NEW_MAX_BUFFER]; /* FIX */
3022     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3023     const UChar *src;
3024     char *end;
3025     char *targ;
3026     int32_t *offs;
3027     int i;
3028     int32_t  realBufferSize;
3029     char *realBufferEnd;
3030     const UChar *realSourceEnd;
3031     const UChar *sourceLimit;
3032     UBool checkOffsets = true;
3033     UBool doFlush;
3034     char junk[9999];
3035     char offset_str[9999];
3036     char *p;
3037     UConverterFromUCallback oldAction = NULL;
3038     const void* oldContext = NULL;
3039 
3040 
3041     for(i=0;i<NEW_MAX_BUFFER;i++)
3042         junkout[i] = (char)0xF0;
3043     for(i=0;i<NEW_MAX_BUFFER;i++)
3044         junokout[i] = 0xFF;
3045     setNuConvTestName(codepage, "FROM");
3046 
3047     log_verbose("\nTesting========= %s  FROM \n  inputbuffer= %d   outputbuffer= %d\n", codepage, gInBufferSize,
3048             gOutBufferSize);
3049 
3050     conv = ucnv_open(codepage, &status);
3051     if(U_FAILURE(status))
3052     {
3053         log_data_err("Couldn't open converter %s\n",codepage);
3054         return true; /* Because the err has already been logged. */
3055     }
3056 
3057     log_verbose("Converter opened..\n");
3058 
3059     /*----setting the callback routine----*/
3060     ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3061     if (U_FAILURE(status))
3062     {
3063         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3064     }
3065     /*------------------------*/
3066     /*setting the subChar*/
3067     if(mySubChar != NULL){
3068         ucnv_setSubstChars(conv, mySubChar, len, &status);
3069         if (U_FAILURE(status))  {
3070             log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3071         }
3072     }
3073     /*------------*/
3074 
3075     src = source;
3076     targ = junkout;
3077     offs = junokout;
3078 
3079     realBufferSize = UPRV_LENGTHOF(junkout);
3080     realBufferEnd = junkout + realBufferSize;
3081     realSourceEnd = source + sourceLen;
3082 
3083     if ( gOutBufferSize != realBufferSize )
3084       checkOffsets = false;
3085 
3086     if( gInBufferSize != NEW_MAX_BUFFER )
3087       checkOffsets = false;
3088 
3089     do
3090     {
3091         end = nct_min(targ + gOutBufferSize, realBufferEnd);
3092         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3093 
3094         doFlush = (UBool)(sourceLimit == realSourceEnd);
3095 
3096         if(targ == realBufferEnd)
3097         {
3098             log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3099             return false;
3100         }
3101         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
3102 
3103 
3104         status = U_ZERO_ERROR;
3105 
3106         ucnv_fromUnicode (conv,
3107                   (char **)&targ,
3108                   (const char *)end,
3109                   &src,
3110                   sourceLimit,
3111                   checkOffsets ? offs : NULL,
3112                   doFlush, /* flush if we're at the end of the input data */
3113                   &status);
3114     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3115 
3116     /* allow failure codes for the stop callback */
3117     if(U_FAILURE(status) && status != expectedError)
3118     {
3119         log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3120         return false;
3121     }
3122 
3123     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3124         sourceLen, targ-junkout);
3125     if(getTestOption(VERBOSITY_OPTION))
3126     {
3127 
3128         junk[0] = 0;
3129         offset_str[0] = 0;
3130         for(p = junkout;p<targ;p++)
3131         {
3132             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3133             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3134         }
3135 
3136         log_verbose(junk);
3137         printSeq(expect, expectLen);
3138         if ( checkOffsets )
3139         {
3140             log_verbose("\nOffsets:");
3141             log_verbose(offset_str);
3142         }
3143         log_verbose("\n");
3144     }
3145     ucnv_close(conv);
3146 
3147 
3148     if(expectLen != targ-junkout)
3149     {
3150         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3151         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3152         printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3153         printSeqErr(expect, expectLen);
3154         return false;
3155     }
3156 
3157     if (checkOffsets && (expectOffsets != 0) )
3158     {
3159         log_verbose("comparing %d offsets..\n", targ-junkout);
3160         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3161             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3162             log_err("Got Output : ");
3163             printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3164             log_err("Got Offsets:      ");
3165             for(p=junkout;p<targ;p++)
3166                 log_err("%d,", junokout[p-junkout]);
3167             log_err("\n");
3168             log_err("Expected Offsets: ");
3169             for(i=0; i<(targ-junkout); i++)
3170                 log_err("%d,", expectOffsets[i]);
3171             log_err("\n");
3172             return false;
3173         }
3174     }
3175 
3176     if(!memcmp(junkout, expect, expectLen))
3177     {
3178         log_verbose("String matches! %s\n", gNuConvTestName);
3179         return true;
3180     }
3181     else
3182     {
3183         log_err("String does not match. %s\n", gNuConvTestName);
3184         log_err("source: ");
3185         printUSeqErr(source, sourceLen);
3186         log_err("Got:      ");
3187         printSeqErr((const uint8_t *)junkout, expectLen);
3188         log_err("Expected: ");
3189         printSeqErr(expect, expectLen);
3190         return false;
3191     }
3192 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3193 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3194                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3195                const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3196 {
3197     UErrorCode status = U_ZERO_ERROR;
3198     UConverter *conv = 0;
3199     UChar   junkout[NEW_MAX_BUFFER]; /* FIX */
3200     int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3201     const char *src;
3202     const char *realSourceEnd;
3203     const char *srcLimit;
3204     UChar *targ;
3205     UChar *end;
3206     int32_t *offs;
3207     int i;
3208     UBool   checkOffsets = true;
3209     char junk[9999];
3210     char offset_str[9999];
3211     UChar *p;
3212     UConverterToUCallback oldAction = NULL;
3213     const void* oldContext = NULL;
3214 
3215     int32_t   realBufferSize;
3216     UChar *realBufferEnd;
3217 
3218 
3219     for(i=0;i<NEW_MAX_BUFFER;i++)
3220         junkout[i] = 0xFFFE;
3221 
3222     for(i=0;i<NEW_MAX_BUFFER;i++)
3223         junokout[i] = -1;
3224 
3225     setNuConvTestName(codepage, "TO");
3226 
3227     log_verbose("\n=========  %s\n", gNuConvTestName);
3228 
3229     conv = ucnv_open(codepage, &status);
3230     if(U_FAILURE(status))
3231     {
3232         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3233         return true;
3234     }
3235 
3236     log_verbose("Converter opened..\n");
3237 
3238     src = (const char *)source;
3239     targ = junkout;
3240     offs = junokout;
3241 
3242     realBufferSize = UPRV_LENGTHOF(junkout);
3243     realBufferEnd = junkout + realBufferSize;
3244     realSourceEnd = src + sourcelen;
3245     /*----setting the callback routine----*/
3246     ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3247     if (U_FAILURE(status))
3248     {
3249         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3250     }
3251     /*-------------------------------------*/
3252     /*setting the subChar*/
3253     if(mySubChar != NULL){
3254         ucnv_setSubstChars(conv, mySubChar, len, &status);
3255         if (U_FAILURE(status))  {
3256             log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3257         }
3258     }
3259     /*------------*/
3260 
3261 
3262     if ( gOutBufferSize != realBufferSize )
3263         checkOffsets = false;
3264 
3265     if( gInBufferSize != NEW_MAX_BUFFER )
3266         checkOffsets = false;
3267 
3268     do
3269     {
3270         end = nct_min( targ + gOutBufferSize, realBufferEnd);
3271         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3272 
3273         if(targ == realBufferEnd)
3274         {
3275             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3276             return false;
3277         }
3278         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3279 
3280 
3281 
3282         status = U_ZERO_ERROR;
3283 
3284         ucnv_toUnicode (conv,
3285                 &targ,
3286                 end,
3287                 (const char **)&src,
3288                 (const char *)srcLimit,
3289                 checkOffsets ? offs : NULL,
3290                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3291                 &status);
3292     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3293 
3294     /* allow failure codes for the stop callback */
3295     if(U_FAILURE(status) && status!=expectedError)
3296     {
3297         log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3298         return false;
3299     }
3300 
3301     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3302         sourcelen, targ-junkout);
3303     if(getTestOption(VERBOSITY_OPTION))
3304     {
3305 
3306         junk[0] = 0;
3307         offset_str[0] = 0;
3308 
3309         for(p = junkout;p<targ;p++)
3310         {
3311             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3312             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3313         }
3314 
3315         log_verbose(junk);
3316         printUSeq(expect, expectlen);
3317         if ( checkOffsets )
3318         {
3319             log_verbose("\nOffsets:");
3320             log_verbose(offset_str);
3321         }
3322         log_verbose("\n");
3323     }
3324     ucnv_close(conv);
3325 
3326     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3327 
3328     if (checkOffsets && (expectOffsets != 0))
3329     {
3330         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3331         {
3332             log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3333             log_err("Got offsets:      ");
3334             for(p=junkout;p<targ;p++)
3335                 log_err("  %2d,", junokout[p-junkout]);
3336             log_err("\n");
3337             log_err("Expected offsets: ");
3338             for(i=0; i<(targ-junkout); i++)
3339                 log_err("  %2d,", expectOffsets[i]);
3340             log_err("\n");
3341             log_err("Got output:       ");
3342             for(i=0; i<(targ-junkout); i++)
3343                 log_err("0x%04x,", junkout[i]);
3344             log_err("\n");
3345             log_err("From source:      ");
3346             for(i=0; i<(src-(const char *)source); i++)
3347                 log_err("  0x%02x,", (unsigned char)source[i]);
3348             log_err("\n");
3349         }
3350     }
3351 
3352     if(!memcmp(junkout, expect, expectlen*2))
3353     {
3354         log_verbose("Matches!\n");
3355         return true;
3356     }
3357     else
3358     {
3359         log_err("String does not match. %s\n", gNuConvTestName);
3360         log_verbose("String does not match. %s\n", gNuConvTestName);
3361         log_err("Got:      ");
3362         printUSeqErr(junkout, expectlen);
3363         log_err("Expected: ");
3364         printUSeqErr(expect, expectlen);
3365         log_err("\n");
3366         return false;
3367     }
3368 }
3369 
TestCallBackFailure(void)3370 static void TestCallBackFailure(void) {
3371     UErrorCode status = U_USELESS_COLLATOR_ERROR;
3372     ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3373     if (status != U_USELESS_COLLATOR_ERROR) {
3374         log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3375     }
3376     ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3377     if (status != U_USELESS_COLLATOR_ERROR) {
3378         log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3379     }
3380     ucnv_cbFromUWriteSub(NULL, -1, &status);
3381     if (status != U_USELESS_COLLATOR_ERROR) {
3382         log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3383     }
3384     ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3385     if (status != U_USELESS_COLLATOR_ERROR) {
3386         log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3387     }
3388 }
3389