1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2013, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /*
7 ********************************************************************************
8 * File NCCBTST.C
9 *
10 * Modification History:
11 * Name Description
12 * Madhu Katragadda 7/21/1999 Testing error callback routines
13 ********************************************************************************
14 */
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "nccbtst.h"
27 #include "unicode/ucnv_cb.h"
28 #include "unicode/utf16.h"
29
30 #define NEW_MAX_BUFFER 999
31
32 #define nct_min(x,y) ((x<y) ? x : y)
33 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof((array)[0]))
34
35 static int32_t gInBufferSize = 0;
36 static int32_t gOutBufferSize = 0;
37 static char gNuConvTestName[1024];
38
printSeq(const uint8_t * a,int len)39 static void printSeq(const uint8_t* a, int len)
40 {
41 int i=0;
42 log_verbose("\n{");
43 while (i<len)
44 log_verbose("0x%02X, ", a[i++]);
45 log_verbose("}\n");
46 }
47
printUSeq(const UChar * a,int len)48 static void printUSeq(const UChar* a, int len)
49 {
50 int i=0;
51 log_verbose("{");
52 while (i<len)
53 log_verbose(" 0x%04x, ", a[i++]);
54 log_verbose("}\n");
55 }
56
printSeqErr(const uint8_t * a,int len)57 static void printSeqErr(const uint8_t* a, int len)
58 {
59 int i=0;
60 fprintf(stderr, "{");
61 while (i<len)
62 fprintf(stderr, " 0x%02x, ", a[i++]);
63 fprintf(stderr, "}\n");
64 }
65
printUSeqErr(const UChar * a,int len)66 static void printUSeqErr(const UChar* a, int len)
67 {
68 int i=0;
69 fprintf(stderr, "{");
70 while (i<len)
71 fprintf(stderr, "0x%04x, ", a[i++]);
72 fprintf(stderr,"}\n");
73 }
74
setNuConvTestName(const char * codepage,const char * direction)75 static void setNuConvTestName(const char *codepage, const char *direction)
76 {
77 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
78 codepage,
79 direction,
80 (int)gInBufferSize,
81 (int)gOutBufferSize);
82 }
83
84
85 static void TestCallBackFailure(void);
86
87 void addTestConvertErrorCallBack(TestNode** root);
88
addTestConvertErrorCallBack(TestNode ** root)89 void addTestConvertErrorCallBack(TestNode** root)
90 {
91 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
92 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
93 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
94 /* BEGIN android-removed
95 To save space, Android does not build complete CJK conversion tables.
96 We skip the test here.
97 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
98 END android-removed */
99
100 #if !UCONFIG_NO_LEGACY_CONVERSION
101 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
102 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
103 #endif
104
105 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
106 }
107
TestSkipCallBack()108 static void TestSkipCallBack()
109 {
110 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
111 TestSkip(1,NEW_MAX_BUFFER);
112 TestSkip(1,1);
113 TestSkip(NEW_MAX_BUFFER, 1);
114 }
115
TestStopCallBack()116 static void TestStopCallBack()
117 {
118 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
119 TestStop(1,NEW_MAX_BUFFER);
120 TestStop(1,1);
121 TestStop(NEW_MAX_BUFFER, 1);
122 }
123
TestSubCallBack()124 static void TestSubCallBack()
125 {
126 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
127 TestSub(1,NEW_MAX_BUFFER);
128 TestSub(1,1);
129 TestSub(NEW_MAX_BUFFER, 1);
130
131 #if !UCONFIG_NO_LEGACY_CONVERSION
132 TestEBCDIC_STATEFUL_Sub(1, 1);
133 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
134 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
135 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
136 #endif
137 }
138
TestSubWithValueCallBack()139 static void TestSubWithValueCallBack()
140 {
141 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
142 TestSubWithValue(1,NEW_MAX_BUFFER);
143 TestSubWithValue(1,1);
144 TestSubWithValue(NEW_MAX_BUFFER, 1);
145 }
146
147 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()148 static void TestLegalAndOtherCallBack()
149 {
150 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
151 TestLegalAndOthers(1,NEW_MAX_BUFFER);
152 TestLegalAndOthers(1,1);
153 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
154 }
155
TestSingleByteCallBack()156 static void TestSingleByteCallBack()
157 {
158 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
159 TestSingleByte(1,NEW_MAX_BUFFER);
160 TestSingleByte(1,1);
161 TestSingleByte(NEW_MAX_BUFFER, 1);
162 }
163 #endif
164
TestSkip(int32_t inputsize,int32_t outputsize)165 static void TestSkip(int32_t inputsize, int32_t outputsize)
166 {
167 static const uint8_t expskipIBM_949[]= {
168 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
169
170 static const uint8_t expskipIBM_943[] = {
171 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
172
173 static const uint8_t expskipIBM_930[] = {
174 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
175
176 gInBufferSize = inputsize;
177 gOutBufferSize = outputsize;
178
179 /*From Unicode*/
180 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
181
182 #if !UCONFIG_NO_LEGACY_CONVERSION
183 {
184 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
185 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
186
187 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
188 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
189
190 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
191 expskipIBM_949, sizeof(expskipIBM_949), "ibm-949",
192 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
193 log_err("u-> ibm-949 with skip did not match.\n");
194 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
195 expskipIBM_943, sizeof(expskipIBM_943), "ibm-943",
196 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
197 log_err("u-> ibm-943 with skip did not match.\n");
198 }
199
200 {
201 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
202 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
203 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
204
205 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
206 if(!testConvertFromUnicode(fromU, sizeof(fromU)/U_SIZEOF_UCHAR,
207 fromUBytes, sizeof(fromUBytes),
208 "ibm-930",
209 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
210 NULL, 0)
211 ) {
212 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
213 }
214 }
215 #endif
216
217 {
218 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
219 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
220 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
221
222 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
223 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
224 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
225
226 /* US-ASCII */
227 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
228 usasciiFromUBytes, sizeof(usasciiFromUBytes),
229 "US-ASCII",
230 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
231 NULL, 0)
232 ) {
233 log_err("u->US-ASCII with skip did not match.\n");
234 }
235
236 #if !UCONFIG_NO_LEGACY_CONVERSION
237 /* SBCS NLTC codepage 367 for US-ASCII */
238 if(!testConvertFromUnicode(usasciiFromU, sizeof(usasciiFromU)/U_SIZEOF_UCHAR,
239 usasciiFromUBytes, sizeof(usasciiFromUBytes),
240 "ibm-367",
241 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
242 NULL, 0)
243 ) {
244 log_err("u->ibm-367 with skip did not match.\n");
245 }
246 #endif
247
248 /* ISO-Latin-1 */
249 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
250 latin1FromUBytes, sizeof(latin1FromUBytes),
251 "LATIN_1",
252 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
253 NULL, 0)
254 ) {
255 log_err("u->LATIN_1 with skip did not match.\n");
256 }
257
258 #if !UCONFIG_NO_LEGACY_CONVERSION
259 /* windows-1252 */
260 if(!testConvertFromUnicode(latin1FromU, sizeof(latin1FromU)/U_SIZEOF_UCHAR,
261 latin1FromUBytes, sizeof(latin1FromUBytes),
262 "windows-1252",
263 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
264 NULL, 0)
265 ) {
266 log_err("u->windows-1252 with skip did not match.\n");
267 }
268 }
269
270 {
271 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
272 static const uint8_t toIBM943[]= { 0x61, 0x61 };
273 static const int32_t offset[]= {0, 4};
274
275 /* EUC_JP*/
276 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
277 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
278 0x61, 0x8e, 0xe0,
279 };
280 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
281
282 /*EUC_TW*/
283 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
284 static const uint8_t to_euc_tw[]={
285 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
286 0x61, 0xe6, 0xca, 0x8a,
287 };
288 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
289
290 /*ISO-2022-JP*/
291 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
292 static const uint8_t to_iso_2022_jp[]={
293 0x41,
294 0x42,
295
296 };
297 static const int32_t from_iso_2022_jpOffs [] ={0,2};
298
299 /*ISO-2022-JP*/
300 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
301 static const uint8_t to_iso_2022_jp2[]={
302 0x41,
303 0x43,
304
305 };
306 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
307
308 /*ISO-2022-cn*/
309 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
310 static const uint8_t to_iso_2022_cn[]={
311 0x41, 0x42
312 };
313 static const int32_t from_iso_2022_cnOffs [] ={
314 0, 2
315 };
316
317 /*ISO-2022-CN*/
318 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
319 static const uint8_t to_iso_2022_cn1[]={
320 0x41, 0x43
321
322 };
323 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
324
325 /*ISO-2022-kr*/
326 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
327 static const uint8_t to_iso_2022_kr[]={
328 0x1b, 0x24, 0x29, 0x43,
329 0x41,
330 0x0e, 0x25, 0x50,
331 0x25, 0x50,
332 0x0f, 0x42,
333 };
334 static const int32_t from_iso_2022_krOffs [] ={
335 -1,-1,-1,-1,
336 0,
337 1,1,1,
338 3,3,
339 4,4
340 };
341
342 /*ISO-2022-kr*/
343 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
344 static const uint8_t to_iso_2022_kr1[]={
345 0x1b, 0x24, 0x29, 0x43,
346 0x41,
347 0x0e, 0x25, 0x50,
348 0x25, 0x50,
349
350 };
351 static const int32_t from_iso_2022_krOffs1 [] ={
352 -1,-1,-1,-1,
353 0,
354 1,1,1,
355 3,3,
356
357 };
358 /* HZ encoding */
359 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
360
361 static const uint8_t to_hz[]={
362 0x7e, 0x7d, 0x41,
363 0x7e, 0x7b, 0x26, 0x30,
364 0x26, 0x30,
365 0x7e, 0x7d, 0x42,
366
367 };
368 static const int32_t from_hzOffs [] ={
369 0,0,0,
370 1,1,1,1,
371 3,3,
372 4,4,4,4
373 };
374
375 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
376
377 static const uint8_t to_hz1[]={
378 0x7e, 0x7d, 0x41,
379 0x7e, 0x7b, 0x26, 0x30,
380 0x26, 0x30,
381
382
383 };
384 static const int32_t from_hzOffs1 [] ={
385 0,0,0,
386 1,1,1,1,
387 3,3,
388
389 };
390
391 #endif
392
393 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
394
395 static const uint8_t to_SCSU[]={
396 0x41,
397 0x42
398
399
400 };
401 static const int32_t from_SCSUOffs [] ={
402 0,
403 2,
404
405 };
406
407 #if !UCONFIG_NO_LEGACY_CONVERSION
408 /* ISCII */
409 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
410 static const uint8_t to_iscii[]={
411 0x41,
412 0x42,
413 };
414 static const int32_t from_isciiOffs [] ={
415 0,2,
416
417 };
418 /*ISCII*/
419 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
420 static const uint8_t to_iscii1[]={
421 0x44,
422 0x43,
423
424 };
425 static const int32_t from_isciiOffs1 [] ={0,2};
426
427 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
428 toIBM943, sizeof(toIBM943), "ibm-943",
429 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
430 log_err("u-> ibm-943 with skip did not match.\n");
431
432 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
433 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
434 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
435 log_err("u-> euc-jp with skip did not match.\n");
436
437 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
438 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
439 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
440 log_err("u-> euc-tw with skip did not match.\n");
441
442 /*iso_2022_jp*/
443 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
444 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
445 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
446 log_err("u-> iso-2022-jp with skip did not match.\n");
447
448 /* with context */
449 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
450 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
451 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
452 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
453
454 /*iso_2022_cn*/
455 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
456 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
457 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
458 log_err("u-> iso-2022-cn with skip did not match.\n");
459 /*with context*/
460 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, sizeof(iso_2022_cn_inputText1)/sizeof(iso_2022_cn_inputText1[0]),
461 to_iso_2022_cn1, sizeof(to_iso_2022_cn1), "iso-2022-cn",
462 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
463 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
464
465 /*iso_2022_kr*/
466 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
467 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
468 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
469 log_err("u-> iso-2022-kr with skip did not match.\n");
470 /*with context*/
471 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, sizeof(iso_2022_kr_inputText1)/sizeof(iso_2022_kr_inputText1[0]),
472 to_iso_2022_kr1, sizeof(to_iso_2022_kr1), "iso-2022-kr",
473 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
474 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
475
476 /*hz*/
477 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
478 to_hz, sizeof(to_hz), "HZ",
479 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
480 log_err("u-> HZ with skip did not match.\n");
481 /*with context*/
482 if(!testConvertFromUnicodeWithContext(hz_inputText1, sizeof(hz_inputText1)/sizeof(hz_inputText1[0]),
483 to_hz1, sizeof(to_hz1), "hz",
484 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
485 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
486 #endif
487
488 /*SCSU*/
489 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
490 to_SCSU, sizeof(to_SCSU), "SCSU",
491 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
492 log_err("u-> SCSU with skip did not match.\n");
493
494 #if !UCONFIG_NO_LEGACY_CONVERSION
495 /*ISCII*/
496 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
497 to_iscii, sizeof(to_iscii), "ISCII,version=0",
498 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
499 log_err("u-> iscii with skip did not match.\n");
500 /*with context*/
501 if(!testConvertFromUnicodeWithContext(iscii_inputText1, sizeof(iscii_inputText1)/sizeof(iscii_inputText1[0]),
502 to_iscii1, sizeof(to_iscii1), "ISCII,version=0",
503 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
504 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
505 #endif
506 }
507
508 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
509 {
510 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
511 0xFB, 0xEE, 0x28, /* from source offset 0 */
512 0x24, 0x1E, 0x52,
513 0xB2,
514 0x20,
515 0xB3,
516 0xB1,
517 0x0D,
518 0x0A,
519
520 0x20, /* from 8 */
521 0x00,
522 0xD0, 0x6C,
523 0xB6,
524 0xD8, 0xA5,
525 0x20,
526 0x68,
527 0x59,
528
529 0xF9, 0x28, /* from 16 */
530 0x6D,
531 0x20,
532 0x73,
533 0xE0, 0x2D,
534 0xDE, 0x43,
535 0xD0, 0x33,
536 0x20,
537
538 0xFA, 0x83, /* from 24 */
539 0x25, 0x01,
540 0xFB, 0x16, 0x87,
541 0x4B, 0x16,
542 0x20,
543 0xE6, 0xBD,
544 0xEB, 0x5B,
545 0x4B, 0xCC,
546
547 0xF9, 0xA2, /* from 32 */
548 0xFC, 0x10, 0x3E,
549 0xFE, 0x16, 0x3A, 0x8C,
550 0x20,
551 0xFC, 0x03, 0xAC,
552
553 0x01, /* from 41 */
554 0xDE, 0x83,
555 0x20,
556 0x09
557 };
558 static const UChar expected[]={
559 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
560 0x0063, 0x0061, 0x000D, 0x000A,
561
562 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
563 0x0930, 0x0020, 0x0918, 0x0909,
564
565 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
566 0x4000, 0x4E00, 0x7777, 0x0020,
567
568 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
569 0x0020, 0xD7A3, 0xDC00, 0xD800,
570
571 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
572 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
573
574 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
575 0x0009
576 };
577 static const int32_t offsets[]={
578 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
579 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
580 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
581 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
582 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
583 41, 42, 42, 43, 44
584 };
585
586 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
587 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
588 sampleText, sizeof(sampleText),
589 "BOCU-1",
590 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
591 ) {
592 log_err("u->BOCU-1 with skip did not match.\n");
593 }
594 }
595
596 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
597 {
598 const uint8_t sampleText[]={
599 0x61, /* 'a' */
600 0xc4, 0xb5, /* U+0135 */
601 0xed, 0x80, 0xa0, /* Hangul U+d020 */
602 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
603 0xee, 0x80, 0x80, /* PUA U+e000 */
604 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
605 0x62, /* 'b' */
606 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
607 0xd0, 0x80 /* U+0400 */
608 };
609 UChar expected[]={
610 0x0061,
611 0x0135,
612 0xd020,
613 0xd801, 0xdc01,
614 0xe000,
615 0xdc01,
616 0x0062,
617 0xd801,
618 0x0400
619 };
620 int32_t offsets[]={
621 0,
622 1, 1,
623 2, 2, 2,
624 3, 3, 3, 4, 4, 4,
625 5, 5, 5,
626 6, 6, 6,
627 7,
628 8, 8, 8,
629 9, 9
630 };
631
632 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
633
634 /* without offsets */
635 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
636 sampleText, sizeof(sampleText),
637 "CESU-8",
638 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
639 ) {
640 log_err("u->CESU-8 with skip did not match.\n");
641 }
642
643 /* with offsets */
644 if(!testConvertFromUnicode(expected, ARRAY_LENGTH(expected),
645 sampleText, sizeof(sampleText),
646 "CESU-8",
647 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
648 ) {
649 log_err("u->CESU-8 with skip did not match.\n");
650 }
651 }
652
653 /*to Unicode*/
654 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
655
656 #if !UCONFIG_NO_LEGACY_CONVERSION
657 {
658
659 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
660 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
661 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
662
663 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
664 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
665 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
666
667 if(!testConvertToUnicode(expskipIBM_949, sizeof(expskipIBM_949),
668 IBM_949skiptoUnicode, sizeof(IBM_949skiptoUnicode)/sizeof(IBM_949skiptoUnicode),"ibm-949",
669 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
670 log_err("ibm-949->u with skip did not match.\n");
671 if(!testConvertToUnicode(expskipIBM_943, sizeof(expskipIBM_943),
672 IBM_943skiptoUnicode, sizeof(IBM_943skiptoUnicode)/sizeof(IBM_943skiptoUnicode[0]),"ibm-943",
673 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
674 log_err("ibm-943->u with skip did not match.\n");
675
676
677 if(!testConvertToUnicode(expskipIBM_930, sizeof(expskipIBM_930),
678 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
679 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
680 log_err("ibm-930->u with skip did not match.\n");
681
682
683 if(!testConvertToUnicodeWithContext(expskipIBM_930, sizeof(expskipIBM_930),
684 IBM_930skiptoUnicode, sizeof(IBM_930skiptoUnicode)/sizeof(IBM_930skiptoUnicode[0]),"ibm-930",
685 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
686 log_err("ibm-930->u with skip did not match.\n");
687 }
688 #endif
689
690 {
691 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
692 static const UChar usasciiToU[] = { 0x61, 0x31 };
693 static const int32_t usasciiToUOffsets[] = { 0, 2 };
694
695 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
696 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
697 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
698
699 /* US-ASCII */
700 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
701 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
702 "US-ASCII",
703 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
704 NULL, 0)
705 ) {
706 log_err("US-ASCII->u with skip did not match.\n");
707 }
708
709 #if !UCONFIG_NO_LEGACY_CONVERSION
710 /* SBCS NLTC codepage 367 for US-ASCII */
711 if(!testConvertToUnicode(usasciiToUBytes, sizeof(usasciiToUBytes),
712 usasciiToU, sizeof(usasciiToU)/U_SIZEOF_UCHAR,
713 "ibm-367",
714 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
715 NULL, 0)
716 ) {
717 log_err("ibm-367->u with skip did not match.\n");
718 }
719 #endif
720
721 /* ISO-Latin-1 */
722 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
723 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
724 "LATIN_1",
725 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
726 NULL, 0)
727 ) {
728 log_err("LATIN_1->u with skip did not match.\n");
729 }
730
731 #if !UCONFIG_NO_LEGACY_CONVERSION
732 /* windows-1252 */
733 if(!testConvertToUnicode(latin1ToUBytes, sizeof(latin1ToUBytes),
734 latin1ToU, sizeof(latin1ToU)/U_SIZEOF_UCHAR,
735 "windows-1252",
736 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
737 NULL, 0)
738 ) {
739 log_err("windows-1252->u with skip did not match.\n");
740 }
741 #endif
742 }
743
744 #if !UCONFIG_NO_LEGACY_CONVERSION
745 {
746 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
747 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
748 };
749 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
750 };
751 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
752
753
754 /* euc-jp*/
755 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
756 0x8f, 0xda, 0xa1, /*unassigned*/
757 0x8e, 0xe0,
758 };
759 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
760 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
761
762 /*EUC_TW*/
763 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
764 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
765 0xe6, 0xca, 0x8a,
766 };
767 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
768 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
769 /*iso-2022-jp*/
770 static const uint8_t sampleTxt_iso_2022_jp[]={
771 0x41,
772 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
773 0x1b, 0x28, 0x42, 0x42,
774
775 };
776 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
777 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
778
779 /*iso-2022-cn*/
780 static const uint8_t sampleTxt_iso_2022_cn[]={
781 0x0f, 0x41, 0x44,
782 0x1B, 0x24, 0x29, 0x47,
783 0x0E, 0x40, 0x6f, /*unassigned*/
784 0x0f, 0x42,
785
786 };
787
788 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
789 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
790
791 /*iso-2022-kr*/
792 static const uint8_t sampleTxt_iso_2022_kr[]={
793 0x1b, 0x24, 0x29, 0x43,
794 0x41,
795 0x0E, 0x7f, 0x1E,
796 0x0e, 0x25, 0x50,
797 0x0f, 0x51,
798 0x42, 0x43,
799
800 };
801 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
802 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
803
804 /*hz*/
805 static const uint8_t sampleTxt_hz[]={
806 0x41,
807 0x7e, 0x7b, 0x26, 0x30,
808 0x7f, 0x1E, /*unassigned*/
809 0x26, 0x30,
810 0x7e, 0x7d, 0x42,
811 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
812 0x7e, 0x7d, 0x42,
813 };
814 static const UChar hztoUnicode[]={
815 0x41,
816 0x03a0,
817 0x03A0,
818 0x42,
819 0x42,};
820
821 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
822
823 /*ISCII*/
824 static const uint8_t sampleTxt_iscii[]={
825 0x41,
826 0xa1,
827 0xEB, /*unassigned*/
828 0x26,
829 0x30,
830 0xa2,
831 0xEC, /*unassigned*/
832 0x42,
833 };
834 static const UChar isciitoUnicode[]={
835 0x41,
836 0x0901,
837 0x26,
838 0x30,
839 0x0902,
840 0x42,
841 };
842
843 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
844
845 /*LMBCS*/
846 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
847 0x12, 0x92, 0xa0, /*unassigned*/
848 0x12, 0x92, 0xA1,
849 };
850 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
851 static const int32_t fromLMBCS[] = {0, 6};
852
853 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
854 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
855 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
856 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
857
858 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
859 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
860 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
861 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
862
863 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
864 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
865 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
866 log_err("euc-jp->u with skip did not match.\n");
867
868
869
870 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
871 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
872 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
873 log_err("euc-tw->u with skip did not match.\n");
874
875
876 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
877 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
878 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
879 log_err("iso-2022-jp->u with skip did not match.\n");
880
881 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
882 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
883 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
884 log_err("iso-2022-cn->u with skip did not match.\n");
885
886 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
887 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
888 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
889 log_err("iso-2022-kr->u with skip did not match.\n");
890
891 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
892 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
893 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
894 log_err("HZ->u with skip did not match.\n");
895
896 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
897 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
898 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
899 log_err("iscii->u with skip did not match.\n");
900
901 if(!testConvertToUnicode(sampleTxtLMBCS, sizeof(sampleTxtLMBCS),
902 LMBCSToUnicode, sizeof(LMBCSToUnicode)/sizeof(LMBCSToUnicode[0]),"LMBCS-1",
903 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
904 log_err("LMBCS->u with skip did not match.\n");
905
906 }
907 #endif
908
909 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
910 {
911 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
912 0xe0, 0x80, 0x61,};
913 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
914 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
915
916 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
917 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
918 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
919 log_err("utf8->u with skip did not match.\n");;
920 }
921
922 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
923 {
924 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
925 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
926 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
927
928 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
929 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
930 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
931 log_err("scsu->u with skip did not match.\n");
932 }
933
934 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
935 {
936 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
937 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
938 0x24, 0x1E, 0x52, /* 3 */
939 0xB2, /* 6 */
940 0x20, /* 7 */
941 0x40, 0x07, /* 8 - wrong trail byte */
942 0xB3, /* 10 */
943 0xB1, /* 11 */
944 0xD0, 0x20, /* 12 - wrong trail byte */
945 0x0D, /* 14 */
946 0x0A, /* 15 */
947 0x20, /* 16 */
948 0x00, /* 17 */
949 0xD0, 0x6C, /* 18 */
950 0xB6, /* 20 */
951 0xD8, 0xA5, /* 21 */
952 0x20, /* 23 */
953 0x68, /* 24 */
954 0x59, /* 25 */
955 0xF9, 0x28, /* 26 */
956 0x6D, /* 28 */
957 0x20, /* 29 */
958 0x73, /* 30 */
959 0xE0, 0x2D, /* 31 */
960 0xDE, 0x43, /* 33 */
961 0xD0, 0x33, /* 35 */
962 0x20, /* 37 */
963 0xFA, 0x83, /* 38 */
964 0x25, 0x01, /* 40 */
965 0xFB, 0x16, 0x87, /* 42 */
966 0x4B, 0x16, /* 45 */
967 0x20, /* 47 */
968 0xE6, 0xBD, /* 48 */
969 0xEB, 0x5B, /* 50 */
970 0x4B, 0xCC, /* 52 */
971 0xF9, 0xA2, /* 54 */
972 0xFC, 0x10, 0x3E, /* 56 */
973 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
974 0x20, /* 63 */
975 0xFC, 0x03, 0xAC, /* 64 */
976 0xFF, /* 67 - FF just resets the state without encoding anything */
977 0x01, /* 68 */
978 0xDE, 0x83, /* 69 */
979 0x20, /* 71 */
980 0x09 /* 72 */
981 };
982 UChar expected[]={
983 0xFEFF, 0x0061, 0x0062, 0x0020,
984 0x0063, 0x0061, 0x000D, 0x000A,
985 0x0020, 0x0000, 0x00DF, 0x00E6,
986 0x0930, 0x0020, 0x0918, 0x0909,
987 0x3086, 0x304D, 0x0020, 0x3053,
988 0x4000, 0x4E00, 0x7777, 0x0020,
989 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
990 0x0020, 0xD7A3, 0xDC00, 0xD800,
991 0xD800, 0xDC00, 0xD845, 0xDDDD,
992 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
993 0xDFFF, 0x0001, 0x0E40, 0x0020,
994 0x0009
995 };
996 int32_t offsets[]={
997 0, 3, 6, 7, /* skip 8, */
998 10, 11, /* skip 12, */
999 14, 15, 16, 17, 18,
1000 20, 21, 23, 24, 25, 26, 28, 29,
1001 30, 31, 33, 35, 37, 38,
1002 40, 42, 45, 47, 48,
1003 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1004 63, 64, /* trail */ 64, /* reset only 67, */
1005 68, 69,
1006 71, 72
1007 };
1008
1009 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1010 expected, ARRAY_LENGTH(expected), "BOCU-1",
1011 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1012 ) {
1013 log_err("BOCU-1->u with skip did not match.\n");
1014 }
1015 }
1016
1017 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1018 {
1019 const uint8_t sampleText[]={
1020 0x61, /* 0 'a' */
1021 0xc0, 0x80, /* 1 non-shortest form */
1022 0xc4, 0xb5, /* 3 U+0135 */
1023 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1024 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1025 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1026 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1027 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1028 0x62, /* 24 'b' */
1029 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1030 0xed, 0xa0, /* 28 incomplete sequence */
1031 0xd0, 0x80 /* 30 U+0400 */
1032 };
1033 UChar expected[]={
1034 0x0061,
1035 /* skip */
1036 0x0135,
1037 0xd020,
1038 0xd801, 0xdc01,
1039 0xe000,
1040 0xdc01,
1041 /* skip */
1042 0x0062,
1043 0xd801,
1044 0x0400
1045 };
1046 int32_t offsets[]={
1047 0,
1048 /* skip 1, */
1049 3,
1050 5,
1051 8, 11,
1052 14,
1053 17,
1054 /* skip 20, 20, */
1055 24,
1056 25,
1057 /* skip 28 */
1058 30
1059 };
1060
1061 /* without offsets */
1062 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1063 expected, ARRAY_LENGTH(expected), "CESU-8",
1064 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1065 ) {
1066 log_err("CESU-8->u with skip did not match.\n");
1067 }
1068
1069 /* with offsets */
1070 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
1071 expected, ARRAY_LENGTH(expected), "CESU-8",
1072 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1073 ) {
1074 log_err("CESU-8->u with skip did not match.\n");
1075 }
1076 }
1077 }
1078
TestStop(int32_t inputsize,int32_t outputsize)1079 static void TestStop(int32_t inputsize, int32_t outputsize)
1080 {
1081 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1082 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1083
1084 static const uint8_t expstopIBM_949[]= {
1085 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1086
1087 static const uint8_t expstopIBM_943[] = {
1088 0x9f, 0xaf, 0x9f, 0xb1};
1089
1090 static const uint8_t expstopIBM_930[] = {
1091 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1092
1093 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1094 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1095 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1096
1097
1098 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1099 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1100 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1101
1102 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1103 static const int32_t fromIBM943Offs [] = { 0, 2};
1104 static const int32_t fromIBM930Offs [] = { 1, 3};
1105
1106 gInBufferSize = inputsize;
1107 gOutBufferSize = outputsize;
1108
1109 /*From Unicode*/
1110
1111 #if !UCONFIG_NO_LEGACY_CONVERSION
1112 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1113 expstopIBM_949, sizeof(expstopIBM_949), "ibm-949",
1114 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1115 log_err("u-> ibm-949 with stop did not match.\n");
1116 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1117 expstopIBM_943, sizeof(expstopIBM_943), "ibm-943",
1118 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1119 log_err("u-> ibm-943 with stop did not match.\n");
1120 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1121 expstopIBM_930, sizeof(expstopIBM_930), "ibm-930",
1122 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1123 log_err("u-> ibm-930 with stop did not match.\n");
1124
1125 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1126 {
1127 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1128 static const uint8_t toIBM943[]= { 0x61,};
1129 static const int32_t offset[]= {0,} ;
1130
1131 /*EUC_JP*/
1132 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1133 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1134 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1135
1136 /*EUC_TW*/
1137 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1138 static const uint8_t to_euc_tw[]={
1139 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1140 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1141
1142 /*ISO-2022-JP*/
1143 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1144 static const uint8_t to_iso_2022_jp[]={
1145 0x41,
1146
1147 };
1148 static const int32_t from_iso_2022_jpOffs [] ={0,};
1149
1150 /*ISO-2022-cn*/
1151 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1152 static const uint8_t to_iso_2022_cn[]={
1153 0x41,
1154
1155 };
1156 static const int32_t from_iso_2022_cnOffs [] ={
1157 0,0,
1158 2,2,
1159 };
1160
1161 /*ISO-2022-kr*/
1162 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1163 static const uint8_t to_iso_2022_kr[]={
1164 0x1b, 0x24, 0x29, 0x43,
1165 0x41,
1166 0x0e, 0x25, 0x50,
1167 };
1168 static const int32_t from_iso_2022_krOffs [] ={
1169 -1,-1,-1,-1,
1170 0,
1171 1,1,1,
1172 };
1173
1174 /* HZ encoding */
1175 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1176
1177 static const uint8_t to_hz[]={
1178 0x7e, 0x7d, 0x41,
1179 0x7e, 0x7b, 0x26, 0x30,
1180
1181 };
1182 static const int32_t from_hzOffs [] ={
1183 0, 0,0,
1184 1,1,1,1,
1185 };
1186
1187 /*ISCII*/
1188 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1189 static const uint8_t to_iscii[]={
1190 0x41,
1191 };
1192 static const int32_t from_isciiOffs [] ={
1193 0,
1194 };
1195
1196 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1197 toIBM943, sizeof(toIBM943), "ibm-943",
1198 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1199 log_err("u-> ibm-943 with stop did not match.\n");
1200
1201 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1202 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1203 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1204 log_err("u-> euc-jp with stop did not match.\n");
1205
1206 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1207 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1208 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1209 log_err("u-> euc-tw with stop did not match.\n");
1210
1211 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1212 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1213 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1214 log_err("u-> iso-2022-jp with stop did not match.\n");
1215
1216 if(!testConvertFromUnicode(iso_2022_jp_inputText, sizeof(iso_2022_jp_inputText)/sizeof(iso_2022_jp_inputText[0]),
1217 to_iso_2022_jp, sizeof(to_iso_2022_jp), "iso-2022-jp",
1218 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1219 log_err("u-> iso-2022-jp with stop did not match.\n");
1220
1221 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
1222 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
1223 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1224 log_err("u-> iso-2022-cn with stop did not match.\n");
1225
1226 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
1227 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
1228 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1229 log_err("u-> iso-2022-kr with stop did not match.\n");
1230
1231 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
1232 to_hz, sizeof(to_hz), "HZ",
1233 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1234 log_err("u-> HZ with stop did not match.\n");\
1235
1236 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
1237 to_iscii, sizeof(to_iscii), "ISCII,version=0",
1238 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1239 log_err("u-> iscii with stop did not match.\n");
1240
1241
1242 }
1243 #endif
1244
1245 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1246 {
1247 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1248
1249 static const uint8_t to_SCSU[]={
1250 0x41,
1251
1252 };
1253 int32_t from_SCSUOffs [] ={
1254 0,
1255
1256 };
1257 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1258 to_SCSU, sizeof(to_SCSU), "SCSU",
1259 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1260 log_err("u-> SCSU with skip did not match.\n");
1261
1262 }
1263
1264 /*to Unicode*/
1265
1266 #if !UCONFIG_NO_LEGACY_CONVERSION
1267 if(!testConvertToUnicode(expstopIBM_949, sizeof(expstopIBM_949),
1268 IBM_949stoptoUnicode, sizeof(IBM_949stoptoUnicode)/sizeof(IBM_949stoptoUnicode[0]),"ibm-949",
1269 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1270 log_err("ibm-949->u with stop did not match.\n");
1271 if(!testConvertToUnicode(expstopIBM_943, sizeof(expstopIBM_943),
1272 IBM_943stoptoUnicode, sizeof(IBM_943stoptoUnicode)/sizeof(IBM_943stoptoUnicode[0]),"ibm-943",
1273 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1274 log_err("ibm-943->u with stop did not match.\n");
1275 if(!testConvertToUnicode(expstopIBM_930, sizeof(expstopIBM_930),
1276 IBM_930stoptoUnicode, sizeof(IBM_930stoptoUnicode)/sizeof(IBM_930stoptoUnicode[0]),"ibm-930",
1277 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1278 log_err("ibm-930->u with stop did not match.\n");
1279
1280 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1281 {
1282
1283 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1284 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1285 };
1286 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1287 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1288
1289
1290 /*EUC-JP*/
1291 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1292 0x8f, 0xda, 0xa1, /*unassigned*/
1293 0x8e, 0xe0,
1294 };
1295 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1296 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1297
1298 /*EUC_TW*/
1299 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1300 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1301 0xe6, 0xca, 0x8a,
1302 };
1303 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1304 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1305
1306
1307
1308 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1309 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1310 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1311 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1312
1313 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1314 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1315 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1316 log_err("euc-jp->u with stop did not match.\n");
1317
1318 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1319 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1320 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1321 log_err("euc-tw->u with stop did not match.\n");
1322 }
1323 #endif
1324
1325 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1326 {
1327 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1328 0xe0, 0x80, 0x61,};
1329 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1330 static const int32_t offsets1[] = { 0x0000, 0x0001};
1331
1332 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1333 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1334 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1335 log_err("utf8->u with stop did not match.\n");;
1336 }
1337 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1338 {
1339 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1340 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1341 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1342
1343 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1344 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1345 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1346 log_err("scsu->u with stop did not match.\n");;
1347 }
1348
1349 }
1350
TestSub(int32_t inputsize,int32_t outputsize)1351 static void TestSub(int32_t inputsize, int32_t outputsize)
1352 {
1353 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1354 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1355
1356 static const uint8_t expsubIBM_949[] =
1357 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1358
1359 static const uint8_t expsubIBM_943[] = {
1360 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1361
1362 static const uint8_t expsubIBM_930[] = {
1363 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1364
1365 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1366 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1368
1369 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1370 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1371 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1372
1373 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1374 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1375 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1376
1377 gInBufferSize = inputsize;
1378 gOutBufferSize = outputsize;
1379
1380 /*from unicode*/
1381
1382 #if !UCONFIG_NO_LEGACY_CONVERSION
1383 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1384 expsubIBM_949, sizeof(expsubIBM_949), "ibm-949",
1385 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1386 log_err("u-> ibm-949 with subst did not match.\n");
1387 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1388 expsubIBM_943, sizeof(expsubIBM_943), "ibm-943",
1389 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1390 log_err("u-> ibm-943 with subst did not match.\n");
1391 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1392 expsubIBM_930, sizeof(expsubIBM_930), "ibm-930",
1393 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1394 log_err("u-> ibm-930 with subst did not match.\n");
1395
1396 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1397 {
1398 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1399 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1400 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1401
1402
1403 /* EUC_JP*/
1404 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1405 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1406 0xf4, 0xfe, 0xf4, 0xfe,
1407 0x61, 0x8e, 0xe0,
1408 };
1409 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1410
1411 /*EUC_TW*/
1412 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1413 static const uint8_t to_euc_tw[]={
1414 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1415 0xfd, 0xfe, 0xfd, 0xfe,
1416 0x61, 0xe6, 0xca, 0x8a,
1417 };
1418
1419 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1420
1421 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
1422 toIBM943, sizeof(toIBM943), "ibm-943",
1423 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1424 log_err("u-> ibm-943 with substitute did not match.\n");
1425
1426 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
1427 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
1428 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1429 log_err("u-> euc-jp with substitute did not match.\n");
1430
1431 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
1432 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
1433 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1434 log_err("u-> euc-tw with substitute did not match.\n");
1435 }
1436 #endif
1437
1438 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1439 {
1440 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1441
1442 const uint8_t to_SCSU[]={
1443 0x41,
1444 0x0e, 0xff,0xfd,
1445 0x42
1446
1447
1448 };
1449 int32_t from_SCSUOffs [] ={
1450 0,
1451 1,1,1,
1452 2,
1453
1454 };
1455 const uint8_t to_SCSU_1[]={
1456 0x41,
1457
1458 };
1459 int32_t from_SCSUOffs_1 [] ={
1460 0,
1461
1462 };
1463 if(!testConvertFromUnicode(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1464 to_SCSU, sizeof(to_SCSU), "SCSU",
1465 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1466 log_err("u-> SCSU with substitute did not match.\n");
1467
1468 if(!testConvertFromUnicodeWithContext(SCSU_inputText, sizeof(SCSU_inputText)/sizeof(SCSU_inputText[0]),
1469 to_SCSU_1, sizeof(to_SCSU_1), "SCSU",
1470 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1471 log_err("u-> SCSU with substitute did not match.\n");
1472 }
1473
1474 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1475 {
1476 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1477 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1478 0xf0, 0x90, 0x90, 0x81,
1479 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1480 0xef, 0xbf, 0xbf, 0x61,
1481
1482 };
1483 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1484 if(!testConvertFromUnicode(testinput, sizeof(testinput)/sizeof(testinput[0]),
1485 expectedUTF8, sizeof(expectedUTF8), "utf8",
1486 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1487 log_err("u-> utf8 with stop did not match.\n");
1488 }
1489 }
1490
1491 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1492 {
1493 static const UChar in[]={ 0x0041, 0xfeff };
1494
1495 static const uint8_t out[]={
1496 #if U_IS_BIG_ENDIAN
1497 0xfe, 0xff,
1498 0x00, 0x41,
1499 0xfe, 0xff
1500 #else
1501 0xff, 0xfe,
1502 0x41, 0x00,
1503 0xff, 0xfe
1504 #endif
1505 };
1506 static const int32_t offsets[]={
1507 -1, -1, 0, 0, 1, 1
1508 };
1509
1510 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1511 out, sizeof(out), "UTF-16",
1512 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1513 ) {
1514 log_err("u->UTF-16 with substitute did not match.\n");
1515 }
1516 }
1517
1518 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1519 {
1520 static const UChar in[]={ 0x0041, 0xfeff };
1521
1522 static const uint8_t out[]={
1523 #if U_IS_BIG_ENDIAN
1524 0x00, 0x00, 0xfe, 0xff,
1525 0x00, 0x00, 0x00, 0x41,
1526 0x00, 0x00, 0xfe, 0xff
1527 #else
1528 0xff, 0xfe, 0x00, 0x00,
1529 0x41, 0x00, 0x00, 0x00,
1530 0xff, 0xfe, 0x00, 0x00
1531 #endif
1532 };
1533 static const int32_t offsets[]={
1534 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1535 };
1536
1537 if(!testConvertFromUnicode(in, ARRAY_LENGTH(in),
1538 out, sizeof(out), "UTF-32",
1539 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1540 ) {
1541 log_err("u->UTF-32 with substitute did not match.\n");
1542 }
1543 }
1544
1545 /*to unicode*/
1546
1547 #if !UCONFIG_NO_LEGACY_CONVERSION
1548 if(!testConvertToUnicode(expsubIBM_949, sizeof(expsubIBM_949),
1549 IBM_949subtoUnicode, sizeof(IBM_949subtoUnicode)/sizeof(IBM_949subtoUnicode[0]),"ibm-949",
1550 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1551 log_err("ibm-949->u with substitute did not match.\n");
1552 if(!testConvertToUnicode(expsubIBM_943, sizeof(expsubIBM_943),
1553 IBM_943subtoUnicode, sizeof(IBM_943subtoUnicode)/sizeof(IBM_943subtoUnicode[0]),"ibm-943",
1554 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1555 log_err("ibm-943->u with substitute did not match.\n");
1556 if(!testConvertToUnicode(expsubIBM_930, sizeof(expsubIBM_930),
1557 IBM_930subtoUnicode, sizeof(IBM_930subtoUnicode)/sizeof(IBM_930subtoUnicode[0]),"ibm-930",
1558 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1559 log_err("ibm-930->u with substitute did not match.\n");
1560
1561 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1562 {
1563
1564 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1565 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1566 };
1567 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1568 };
1569 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1570
1571
1572 /* EUC_JP*/
1573 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1574 0x8f, 0xda, 0xa1, /*unassigned*/
1575 0x8e, 0xe0, 0x8a
1576 };
1577 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1578 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1579
1580 /*EUC_TW*/
1581 const uint8_t sampleTxt_euc_tw[]={
1582 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1583 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1584 0xe6, 0xca, 0x8a,
1585 };
1586 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1587 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1588
1589
1590 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, sizeof(sampleTxtEBCIDIC_STATEFUL),
1591 EBCIDIC_STATEFUL_toUnicode, sizeof(EBCIDIC_STATEFUL_toUnicode)/sizeof(EBCIDIC_STATEFUL_toUnicode[0]),"ibm-930",
1592 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1593 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1594
1595
1596 if(!testConvertToUnicode(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1597 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1598 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1599 log_err("euc-jp->u with substitute did not match.\n");
1600
1601
1602 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
1603 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
1604 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1605 log_err("euc-tw->u with substitute did not match.\n");
1606
1607
1608 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, sizeof(sampleTxt_euc_jp),
1609 euc_jptoUnicode, sizeof(euc_jptoUnicode)/sizeof(euc_jptoUnicode[0]),"IBM-eucJP",
1610 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1611 log_err("euc-jp->u with substitute did not match.\n");
1612 }
1613 #endif
1614
1615 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1616 {
1617 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1618 0xe0, 0x80, 0x61,};
1619 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0x0061};
1620 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0006};
1621
1622 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1623 expected1, sizeof(expected1)/sizeof(expected1[0]),"utf8",
1624 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1625 log_err("utf8->u with substitute did not match.\n");;
1626 }
1627 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1628 {
1629 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1630 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1631 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1632
1633 if(!testConvertToUnicode(sampleText1, sizeof(sampleText1),
1634 expected1, sizeof(expected1)/sizeof(expected1[0]),"SCSU",
1635 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1636 log_err("scsu->u with stop did not match.\n");;
1637 }
1638
1639 #if !UCONFIG_NO_LEGACY_CONVERSION
1640 log_verbose("Testing ibm-930 subchar/subchar1\n");
1641 {
1642 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1643 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1644 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1645
1646 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1647 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1648 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1649
1650 if(!testConvertFromUnicode(u1, ARRAY_LENGTH(u1), s1, ARRAY_LENGTH(s1), "ibm-930",
1651 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1652 ) {
1653 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1654 }
1655
1656 if(!testConvertToUnicode(s2, ARRAY_LENGTH(s2), u2, ARRAY_LENGTH(u2), "ibm-930",
1657 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1658 ) {
1659 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1660 }
1661 }
1662
1663 log_verbose("Testing GB 18030 with substitute callbacks\n");
1664 {
1665 static const UChar u2[]={
1666 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1667 static const uint8_t gb2[]={
1668 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1669 static const int32_t offsets2[]={
1670 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1671
1672 if(!testConvertToUnicode(gb2, ARRAY_LENGTH(gb2), u2, ARRAY_LENGTH(u2), "gb18030",
1673 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1674 ) {
1675 log_err("gb18030->u with substitute did not match.\n");
1676 }
1677 }
1678 #endif
1679
1680 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1681 {
1682 static const uint8_t utf7[]={
1683 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1684 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1685 };
1686 static const UChar unicode[]={
1687 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1688 };
1689 static const int32_t offsets[]={
1690 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1691 };
1692
1693 if(!testConvertToUnicode(utf7, ARRAY_LENGTH(utf7), unicode, ARRAY_LENGTH(unicode), "UTF-7",
1694 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1695 ) {
1696 log_err("UTF-7->u with substitute did not match.\n");
1697 }
1698 }
1699
1700 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1701 {
1702 static const uint8_t
1703 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1704 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1705 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1706
1707 static const UChar
1708 out1[]={ 0x4e00, 0xfeff },
1709 out2[]={ 0x004e, 0xfffe },
1710 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1711
1712 static const int32_t
1713 offsets1[]={ 2, 4 },
1714 offsets2[]={ 2, 4 },
1715 offsets3[]={ 0, 2, 4 };
1716
1717 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-16",
1718 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1719 ) {
1720 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1721 }
1722
1723 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-16",
1724 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1725 ) {
1726 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1727 }
1728
1729 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-16",
1730 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1731 ) {
1732 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1733 }
1734 }
1735
1736 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1737 {
1738 static const uint8_t
1739 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1740 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1741 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1742 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1743
1744 static const UChar
1745 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1746 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1747 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1748 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1749
1750 static const int32_t
1751 offsets1[]={ 4, 4, 8 },
1752 offsets2[]={ 4, 4, 8 },
1753 offsets3[]={ 0, 4, 4, 8, 12 },
1754 offsets4[]={ 0, 0, 4, 8 };
1755
1756 if(!testConvertToUnicode(in1, ARRAY_LENGTH(in1), out1, ARRAY_LENGTH(out1), "UTF-32",
1757 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1758 ) {
1759 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1760 }
1761
1762 if(!testConvertToUnicode(in2, ARRAY_LENGTH(in2), out2, ARRAY_LENGTH(out2), "UTF-32",
1763 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1764 ) {
1765 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1766 }
1767
1768 if(!testConvertToUnicode(in3, ARRAY_LENGTH(in3), out3, ARRAY_LENGTH(out3), "UTF-32",
1769 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1770 ) {
1771 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1772 }
1773
1774 if(!testConvertToUnicode(in4, ARRAY_LENGTH(in4), out4, ARRAY_LENGTH(out4), "UTF-32",
1775 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1776 ) {
1777 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1778 }
1779 }
1780 }
1781
TestSubWithValue(int32_t inputsize,int32_t outputsize)1782 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1783 {
1784 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1785 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1786
1787 const uint8_t expsubwvalIBM_949[]= {
1788 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1789 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1790
1791 const uint8_t expsubwvalIBM_943[]= {
1792 0x9f, 0xaf, 0x9f, 0xb1,
1793 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1794
1795 const uint8_t expsubwvalIBM_930[] = {
1796 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1797
1798 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1799 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1800 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1801
1802 gInBufferSize = inputsize;
1803 gOutBufferSize = outputsize;
1804
1805 /*from Unicode*/
1806
1807 #if !UCONFIG_NO_LEGACY_CONVERSION
1808 if(!testConvertFromUnicode(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
1809 expsubwvalIBM_949, sizeof(expsubwvalIBM_949), "ibm-949",
1810 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1811 log_err("u-> ibm-949 with subst with value did not match.\n");
1812
1813 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1814 expsubwvalIBM_943, sizeof(expsubwvalIBM_943), "ibm-943",
1815 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1816 log_err("u-> ibm-943 with sub with value did not match.\n");
1817
1818 if(!testConvertFromUnicode(sampleText2, sizeof(sampleText2)/sizeof(sampleText2[0]),
1819 expsubwvalIBM_930, sizeof(expsubwvalIBM_930), "ibm-930",
1820 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1821 log_err("u-> ibm-930 with subst with value did not match.\n");
1822
1823
1824 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1825 {
1826 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1827 static const uint8_t toIBM943[]= { 0x61,
1828 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1829 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1830 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1831 0x61 };
1832 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1833
1834
1835 /* EUC_JP*/
1836 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1837 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1838 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1839 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1840 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841 0x61, 0x8e, 0xe0,
1842 };
1843 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1844 3, 3, 3, 3, 3, 3,
1845 3, 3, 3, 3, 3, 3,
1846 5, 5, 5, 5, 5, 5,
1847 6, 7, 7,
1848 };
1849
1850 /*EUC_TW*/
1851 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1852 static const uint8_t to_euc_tw[]={
1853 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1854 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1855 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1856 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1857 0x61, 0xe6, 0xca, 0x8a,
1858 };
1859 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1860 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1861 6, 7, 7, 8,
1862 };
1863 /*ISO-2022-JP*/
1864 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1865 static const uint8_t to_iso_2022_jp1[]={
1866 0x1b, 0x24, 0x42, 0x21, 0x21,
1867 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1868 0x1b, 0x24, 0x42, 0x21, 0x22,
1869 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1870 0x42,
1871 };
1872
1873 static const int32_t from_iso_2022_jpOffs1 [] ={
1874 0,0,0,0,0,
1875 1,1,1,1,1,1,1,1,1,
1876 2,2,2,2,2,
1877 3,3,3,3,3,3,3,3,3,
1878 4,
1879 };
1880 /* surrogate pair*/
1881 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1882 static const uint8_t to_iso_2022_jp2[]={
1883 0x1b, 0x24, 0x42, 0x21, 0x21,
1884 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1885 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1886 0x1b, 0x24, 0x42, 0x21, 0x22,
1887 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1888 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1889 0x42,
1890 };
1891 static const int32_t from_iso_2022_jpOffs2 [] ={
1892 0,0,0,0,0,
1893 1,1,1,1,1,1,1,1,1,
1894 1,1,1,1,1,1,
1895 3,3,3,3,3,
1896 4,4,4,4,4,4,4,4,4,
1897 4,4,4,4,4,4,
1898 6,
1899 };
1900
1901 /*ISO-2022-cn*/
1902 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1903 static const uint8_t to_iso_2022_cn[]={
1904 0x41,
1905 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1906 0x42,
1907 };
1908 static const int32_t from_iso_2022_cnOffs [] ={
1909 0,
1910 1,1,1,1,1,1,
1911 2,
1912 };
1913
1914 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1915
1916 static const uint8_t to_iso_2022_cn4[]={
1917 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1918 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1919 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1920 0x0e, 0x21, 0x22,
1921 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1922 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1923 0x42,
1924 };
1925 static const int32_t from_iso_2022_cnOffs4 [] ={
1926 0,0,0,0,0,0,0,
1927 1,1,1,1,1,1,1,
1928 1,1,1,1,1,1,
1929 3,3,3,
1930 4,4,4,4,4,4,4,
1931 4,4,4,4,4,4,
1932 6
1933
1934 };
1935
1936 /*ISO-2022-kr*/
1937 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1938 static const uint8_t to_iso_2022_kr2[]={
1939 0x1b, 0x24, 0x29, 0x43,
1940 0x41,
1941 0x0e, 0x25, 0x50,
1942 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1943 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1944 0x0e, 0x25, 0x50,
1945 0x0f, 0x42,
1946 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1947 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1948 0x43
1949 };
1950 static const int32_t from_iso_2022_krOffs2 [] ={
1951 -1,-1,-1,-1,
1952 0,
1953 1,1,1,
1954 2,2,2,2,2,2,2,
1955 2,2,2,2,2,2,
1956 4,4,4,
1957 5,5,
1958 6,6,6,6,6,6,
1959 6,6,6,6,6,6,
1960 8,
1961 };
1962
1963 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1964 static const uint8_t to_iso_2022_kr[]={
1965 0x1b, 0x24, 0x29, 0x43,
1966 0x41,
1967 0x0e, 0x25, 0x50,
1968 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1969 0x0e, 0x25, 0x50,
1970 0x0f, 0x42,
1971 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1972 0x43
1973 };
1974
1975
1976 static const int32_t from_iso_2022_krOffs [] ={
1977 -1,-1,-1,-1,
1978 0,
1979 1,1,1,
1980 2,2,2,2,2,2,2,
1981 3,3,3,
1982 4,4,
1983 5,5,5,5,5,5,
1984 6,
1985 };
1986 /* HZ encoding */
1987 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1988
1989 static const uint8_t to_hz[]={
1990 0x7e, 0x7d, 0x41,
1991 0x7e, 0x7b, 0x26, 0x30,
1992 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1993 0x7e, 0x7b, 0x26, 0x30,
1994 0x7e, 0x7d, 0x42,
1995
1996 };
1997 static const int32_t from_hzOffs [] ={
1998 0,0,0,
1999 1,1,1,1,
2000 2,2,2,2,2,2,2,2,
2001 3,3,3,3,
2002 4,4,4
2003 };
2004
2005 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2006 static const uint8_t to_hz2[]={
2007 0x7e, 0x7d, 0x41,
2008 0x7e, 0x7b, 0x26, 0x30,
2009 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2010 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2011 0x7e, 0x7b, 0x26, 0x30,
2012 0x7e, 0x7d, 0x42,
2013 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2014 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2015 0x43
2016 };
2017 static const int32_t from_hzOffs2 [] ={
2018 0,0,0,
2019 1,1,1,1,
2020 2,2,2,2,2,2,2,2,
2021 2,2,2,2,2,2,
2022 4,4,4,4,
2023 5,5,5,
2024 6,6,6,6,6,6,
2025 6,6,6,6,6,6,
2026 8,
2027 };
2028
2029 /*ISCII*/
2030 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2031 static const uint8_t to_iscii[]={
2032 0x41,
2033 0xef, 0x42, 0xa1,
2034 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2035 0xa2,
2036 0x42,
2037 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2038 0x43
2039 };
2040
2041
2042 static const int32_t from_isciiOffs [] ={
2043 0,
2044 1,1,1,
2045 2,2,2,2,2,2,
2046 3,
2047 4,
2048 5,5,5,5,5,5,
2049 6,
2050 };
2051
2052 if(!testConvertFromUnicode(inputTest, sizeof(inputTest)/sizeof(inputTest[0]),
2053 toIBM943, sizeof(toIBM943), "ibm-943",
2054 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2055 log_err("u-> ibm-943 with subst with value did not match.\n");
2056
2057 if(!testConvertFromUnicode(euc_jp_inputText, sizeof(euc_jp_inputText)/sizeof(euc_jp_inputText[0]),
2058 to_euc_jp, sizeof(to_euc_jp), "IBM-eucJP",
2059 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2060 log_err("u-> euc-jp with subst with value did not match.\n");
2061
2062 if(!testConvertFromUnicode(euc_tw_inputText, sizeof(euc_tw_inputText)/sizeof(euc_tw_inputText[0]),
2063 to_euc_tw, sizeof(to_euc_tw), "euc-tw",
2064 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2065 log_err("u-> euc-tw with subst with value did not match.\n");
2066
2067 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2068 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2069 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2070 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2071
2072 if(!testConvertFromUnicode(iso_2022_jp_inputText1, sizeof(iso_2022_jp_inputText1)/sizeof(iso_2022_jp_inputText1[0]),
2073 to_iso_2022_jp1, sizeof(to_iso_2022_jp1), "iso-2022-jp",
2074 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2075 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2076
2077 if(!testConvertFromUnicode(iso_2022_jp_inputText2, sizeof(iso_2022_jp_inputText2)/sizeof(iso_2022_jp_inputText2[0]),
2078 to_iso_2022_jp2, sizeof(to_iso_2022_jp2), "iso-2022-jp",
2079 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2080 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2081 /*ESCAPE OPTIONS*/
2082 {
2083 /* surrogate pair*/
2084 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2085 static const uint8_t to_iso_2022_jp3_v2[]={
2086 0x1b, 0x24, 0x42, 0x21, 0x21,
2087 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2088
2089 0x1b, 0x24, 0x42, 0x21, 0x22,
2090 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2091
2092 0x42,
2093 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2094 };
2095
2096 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2097 0,0,0,0,0,
2098 1,1,1,1,1,1,1,1,1,1,1,1,
2099
2100 3,3,3,3,3,
2101 4,4,4,4,4,4,4,4,4,4,4,4,
2102
2103 6,
2104 7,7,7,7,7,7,7,7,7
2105 };
2106
2107 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, sizeof(iso_2022_jp_inputText3)/sizeof(iso_2022_jp_inputText3[0]),
2108 to_iso_2022_jp3_v2, sizeof(to_iso_2022_jp3_v2), "iso-2022-jp",
2109 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2110 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2111 }
2112 {
2113 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2114 static const uint8_t to_iso_2022_cn5_v2[]={
2115 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2116 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2117 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2118 0x0e, 0x21, 0x22,
2119 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2120 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2121 0x42,
2122 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2123 };
2124 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2125 0,0,0,0,0,0,0,
2126 1,1,1,1,1,1,1,
2127 1,1,1,1,1,1,
2128 3,3,3,
2129 4,4,4,4,4,4,4,
2130 4,4,4,4,4,4,
2131 6,
2132 7,7,7,7,7,7
2133 };
2134 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, sizeof(iso_2022_cn_inputText5)/sizeof(iso_2022_cn_inputText5[0]),
2135 to_iso_2022_cn5_v2, sizeof(to_iso_2022_cn5_v2), "iso-2022-cn",
2136 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2137 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2138
2139 }
2140 {
2141 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2142 static const uint8_t to_iso_2022_cn6_v2[]={
2143 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2144 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2145 0x0e, 0x21, 0x22,
2146 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2147 0x42,
2148 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2149 };
2150 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2151 0, 0, 0, 0, 0, 0, 0,
2152 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2153 3, 3, 3,
2154 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2155 6,
2156 7, 7, 7, 7, 7, 7, 7, 7,
2157 };
2158 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, sizeof(iso_2022_cn_inputText6)/sizeof(iso_2022_cn_inputText6[0]),
2159 to_iso_2022_cn6_v2, sizeof(to_iso_2022_cn6_v2), "iso-2022-cn",
2160 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2161 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2162
2163 }
2164 {
2165 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2166 static const uint8_t to_iso_2022_cn7_v2[]={
2167 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2168 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2169 0x0e, 0x21, 0x22,
2170 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2171 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2172 };
2173 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2174 0, 0, 0, 0, 0, 0, 0,
2175 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2176 3, 3, 3,
2177 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2178 6,
2179 7, 7, 7, 7, 7, 7,
2180 };
2181 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, sizeof(iso_2022_cn_inputText7)/sizeof(iso_2022_cn_inputText7[0]),
2182 to_iso_2022_cn7_v2, sizeof(to_iso_2022_cn7_v2), "iso-2022-cn",
2183 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2184 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2185
2186 }
2187 {
2188 static const UChar iso_2022_cn_inputText8[]={
2189 0x3000,
2190 0xD84D, 0xDC56,
2191 0x3001,
2192 0xD84D, 0xDC56,
2193 0xDBFF, 0xDFFF,
2194 0x0042,
2195 0x0902};
2196 static const uint8_t to_iso_2022_cn8_v2[]={
2197 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2198 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2199 0x0e, 0x21, 0x22,
2200 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2201 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2202 0x42,
2203 0x5c, 0x39, 0x30, 0x32, 0x20
2204 };
2205 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2206 0, 0, 0, 0, 0, 0, 0,
2207 1, 1, 1, 1, 1, 1, 1, 1,
2208 3, 3, 3,
2209 4, 4, 4, 4, 4, 4, 4, 4,
2210 6, 6, 6, 6, 6, 6, 6, 6,
2211 8,
2212 9, 9, 9, 9, 9
2213 };
2214 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, sizeof(iso_2022_cn_inputText8)/sizeof(iso_2022_cn_inputText8[0]),
2215 to_iso_2022_cn8_v2, sizeof(to_iso_2022_cn8_v2), "iso-2022-cn",
2216 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2217 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2218
2219 }
2220 {
2221 static const uint8_t to_iso_2022_cn4_v3[]={
2222 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2223 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2224 0x0e, 0x21, 0x22,
2225 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2226 0x42
2227 };
2228
2229
2230 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2231 0,0,0,0,0,0,0,
2232 1,1,1,1,1,1,1,1,1,1,1,
2233
2234 3,3,3,
2235 4,4,4,4,4,4,4,4,4,4,4,
2236
2237 6
2238
2239 };
2240 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2241 to_iso_2022_cn4_v3, sizeof(to_iso_2022_cn4_v3), "iso-2022-cn",
2242 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2243 {
2244 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2245 }
2246 }
2247 if(!testConvertFromUnicode(iso_2022_cn_inputText, sizeof(iso_2022_cn_inputText)/sizeof(iso_2022_cn_inputText[0]),
2248 to_iso_2022_cn, sizeof(to_iso_2022_cn), "iso-2022-cn",
2249 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2250 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2251
2252 if(!testConvertFromUnicode(iso_2022_cn_inputText4, sizeof(iso_2022_cn_inputText4)/sizeof(iso_2022_cn_inputText4[0]),
2253 to_iso_2022_cn4, sizeof(to_iso_2022_cn4), "iso-2022-cn",
2254 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2255 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2256 if(!testConvertFromUnicode(iso_2022_kr_inputText, sizeof(iso_2022_kr_inputText)/sizeof(iso_2022_kr_inputText[0]),
2257 to_iso_2022_kr, sizeof(to_iso_2022_kr), "iso-2022-kr",
2258 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2259 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2260 if(!testConvertFromUnicode(iso_2022_kr_inputText2, sizeof(iso_2022_kr_inputText2)/sizeof(iso_2022_kr_inputText2[0]),
2261 to_iso_2022_kr2, sizeof(to_iso_2022_kr2), "iso-2022-kr",
2262 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2263 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2264 if(!testConvertFromUnicode(hz_inputText, sizeof(hz_inputText)/sizeof(hz_inputText[0]),
2265 to_hz, sizeof(to_hz), "HZ",
2266 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2267 log_err("u-> hz with subst with value did not match.\n");
2268 if(!testConvertFromUnicode(hz_inputText2, sizeof(hz_inputText2)/sizeof(hz_inputText2[0]),
2269 to_hz2, sizeof(to_hz2), "HZ",
2270 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2271 log_err("u-> hz with subst with value did not match.\n");
2272
2273 if(!testConvertFromUnicode(iscii_inputText, sizeof(iscii_inputText)/sizeof(iscii_inputText[0]),
2274 to_iscii, sizeof(to_iscii), "ISCII,version=0",
2275 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2276 log_err("u-> iscii with subst with value did not match.\n");
2277 }
2278 #endif
2279
2280 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2281 /*to Unicode*/
2282 {
2283 #if !UCONFIG_NO_LEGACY_CONVERSION
2284 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2285 0x81, 0xad, /*unassigned*/
2286 0x89, 0xd3 };
2287 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2288 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2289 0x7B87};
2290 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2291
2292 /* EUC_JP*/
2293 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2294 0x8f, 0xda, 0xa1, /*unassigned*/
2295 0x8e, 0xe0,
2296 };
2297 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2298 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2299 0x00a2 };
2300 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2301 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2302 9,
2303 };
2304
2305 /*EUC_TW*/
2306 static const uint8_t sampleTxt_euc_tw[]={
2307 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2308 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2309 0xe6, 0xca, 0x8a,
2310 };
2311 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2312 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2313 0x8706, 0x8a, };
2314 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2315 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2316 11, 13};
2317
2318 /*iso-2022-jp*/
2319 static const uint8_t sampleTxt_iso_2022_jp[]={
2320 0x1b, 0x28, 0x42, 0x41,
2321 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2322 0x1b, 0x28, 0x42, 0x42,
2323
2324 };
2325 /* A % X 3 A % X 1 A B */
2326 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2327 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2328
2329 /*iso-2022-cn*/
2330 static const uint8_t sampleTxt_iso_2022_cn[]={
2331 0x0f, 0x41, 0x44,
2332 0x1B, 0x24, 0x29, 0x47,
2333 0x0E, 0x40, 0x6c, /*unassigned*/
2334 0x0f, 0x42,
2335
2336 };
2337 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2338 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2339
2340 /*iso-2022-kr*/
2341 static const uint8_t sampleTxt_iso_2022_kr[]={
2342 0x1b, 0x24, 0x29, 0x43,
2343 0x41,
2344 0x0E, 0x7f, 0x1E,
2345 0x0e, 0x25, 0x50,
2346 0x0f, 0x51,
2347 0x42, 0x43,
2348
2349 };
2350 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2351 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2352
2353 /*hz*/
2354 static const uint8_t sampleTxt_hz[]={
2355 0x41,
2356 0x7e, 0x7b, 0x26, 0x30,
2357 0x7f, 0x1E, /*unassigned*/
2358 0x26, 0x30,
2359 0x7e, 0x7d, 0x42,
2360 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2361 0x7e, 0x7d, 0x42,
2362 };
2363 static const UChar hztoUnicode[]={
2364 0x41,
2365 0x03a0,
2366 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2367 0x03A0,
2368 0x42,
2369 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370 0x42,};
2371
2372 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2373
2374
2375 /*iscii*/
2376 static const uint8_t sampleTxt_iscii[]={
2377 0x41,
2378 0x30,
2379 0xEB, /*unassigned*/
2380 0xa3,
2381 0x42,
2382 0xEC, /*unassigned*/
2383 0x42,
2384 };
2385 static const UChar isciitoUnicode[]={
2386 0x41,
2387 0x30,
2388 0x25, 0x58, 0x45, 0x42,
2389 0x0903,
2390 0x42,
2391 0x25, 0x58, 0x45, 0x43,
2392 0x42,};
2393
2394 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2395 #endif
2396
2397 /*UTF8*/
2398 static const uint8_t sampleTxtUTF8[]={
2399 0x20, 0x64, 0x50,
2400 0xC2, 0x7E, /* truncated char */
2401 0x20,
2402 0xE0, 0xB5, 0x7E, /* truncated char */
2403 0x40,
2404 };
2405 static const UChar UTF8ToUnicode[]={
2406 0x0020, 0x0064, 0x0050,
2407 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2408 0x0020,
2409 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2410 0x0040
2411 };
2412 static const int32_t fromUTF8[] = {
2413 0, 1, 2,
2414 3, 3, 3, 3, 4,
2415 5,
2416 6, 6, 6, 6, 6, 6, 6, 6, 8,
2417 9
2418 };
2419 static const UChar UTF8ToUnicodeXML_DEC[]={
2420 0x0020, 0x0064, 0x0050,
2421 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2422 0x0020,
2423 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2424 0x0040
2425 };
2426 static const int32_t fromUTF8XML_DEC[] = {
2427 0, 1, 2,
2428 3, 3, 3, 3, 3, 3, 4,
2429 5,
2430 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2431 9
2432 };
2433
2434
2435 #if !UCONFIG_NO_LEGACY_CONVERSION
2436 if(!testConvertToUnicode(sampleTxtToU, sizeof(sampleTxtToU),
2437 IBM_943toUnicode, sizeof(IBM_943toUnicode)/sizeof(IBM_943toUnicode[0]),"ibm-943",
2438 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2439 log_err("ibm-943->u with substitute with value did not match.\n");
2440
2441 if(!testConvertToUnicode(sampleTxt_EUC_JP, sizeof(sampleTxt_EUC_JP),
2442 EUC_JPtoUnicode, sizeof(EUC_JPtoUnicode)/sizeof(EUC_JPtoUnicode[0]),"IBM-eucJP",
2443 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2444 log_err("euc-jp->u with substitute with value did not match.\n");
2445
2446 if(!testConvertToUnicode(sampleTxt_euc_tw, sizeof(sampleTxt_euc_tw),
2447 euc_twtoUnicode, sizeof(euc_twtoUnicode)/sizeof(euc_twtoUnicode[0]),"euc-tw",
2448 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2449 log_err("euc-tw->u with substitute with value did not match.\n");
2450
2451 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2452 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2453 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2454 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2455
2456 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2457 iso_2022_jptoUnicode, sizeof(iso_2022_jptoUnicode)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2458 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2459 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2460
2461 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2462 {
2463 static const UChar iso_2022_jptoUnicodeDec[]={
2464 0x0041,
2465 /* & # 5 8 ; */
2466 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2467 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2468 0x0042 };
2469 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2470 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2471 iso_2022_jptoUnicodeDec, sizeof(iso_2022_jptoUnicodeDec)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2472 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2473 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2474 }
2475 {
2476 static const UChar iso_2022_jptoUnicodeHex[]={
2477 0x0041,
2478 /* & # x 3 A ; */
2479 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2480 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2481 0x0042 };
2482 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2483 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2484 iso_2022_jptoUnicodeHex, sizeof(iso_2022_jptoUnicodeHex)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2485 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2486 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2487 }
2488 {
2489 static const UChar iso_2022_jptoUnicodeC[]={
2490 0x0041,
2491 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2492 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2493 0x0042 };
2494 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2495 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, sizeof(sampleTxt_iso_2022_jp),
2496 iso_2022_jptoUnicodeC, sizeof(iso_2022_jptoUnicodeC)/sizeof(iso_2022_jptoUnicode[0]),"iso-2022-jp",
2497 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2498 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2499 }
2500 }
2501 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, sizeof(sampleTxt_iso_2022_cn),
2502 iso_2022_cntoUnicode, sizeof(iso_2022_cntoUnicode)/sizeof(iso_2022_cntoUnicode[0]),"iso-2022-cn",
2503 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2504 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2505
2506 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, sizeof(sampleTxt_iso_2022_kr),
2507 iso_2022_krtoUnicode, sizeof(iso_2022_krtoUnicode)/sizeof(iso_2022_krtoUnicode[0]),"iso-2022-kr",
2508 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2509 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2510
2511 if(!testConvertToUnicode(sampleTxt_hz, sizeof(sampleTxt_hz),
2512 hztoUnicode, sizeof(hztoUnicode)/sizeof(hztoUnicode[0]),"HZ",
2513 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2514 log_err("hz->u with substitute with value did not match.\n");
2515
2516 if(!testConvertToUnicode(sampleTxt_iscii, sizeof(sampleTxt_iscii),
2517 isciitoUnicode, sizeof(isciitoUnicode)/sizeof(isciitoUnicode[0]),"ISCII,version=0",
2518 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2519 log_err("ISCII ->u with substitute with value did not match.\n");
2520 #endif
2521
2522 if(!testConvertToUnicode(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2523 UTF8ToUnicode, sizeof(UTF8ToUnicode)/sizeof(UTF8ToUnicode[0]),"UTF-8",
2524 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2525 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2526 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, sizeof(sampleTxtUTF8),
2527 UTF8ToUnicodeXML_DEC, sizeof(UTF8ToUnicodeXML_DEC)/sizeof(UTF8ToUnicodeXML_DEC[0]),"UTF-8",
2528 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2529 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2530 }
2531 }
2532
2533 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2534 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2535 {
2536 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2537 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2538 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2539
2540
2541 static const uint8_t text943[] = {
2542 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2543 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2544 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2545 static const UChar toUnicode943stop[]= { 0x304b};
2546
2547 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2548 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2549 static const int32_t fromIBM943Offsstop[] = { 0};
2550
2551 gInBufferSize = inputsize;
2552 gOutBufferSize = outputsize;
2553 /*checking with a legal value*/
2554 if(!testConvertFromUnicode(legalText, sizeof(legalText)/sizeof(legalText[0]),
2555 templegal949, sizeof(templegal949), "ibm-949",
2556 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2557 log_err("u-> ibm-949 with skip did not match.\n");
2558
2559 /*checking illegal value for ibm-943 with substitute*/
2560 if(!testConvertToUnicode(text943, sizeof(text943),
2561 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2562 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2563 log_err("ibm-943->u with subst did not match.\n");
2564 /*checking illegal value for ibm-943 with skip */
2565 if(!testConvertToUnicode(text943, sizeof(text943),
2566 toUnicode943skip, sizeof(toUnicode943skip)/sizeof(toUnicode943skip[0]),"ibm-943",
2567 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2568 log_err("ibm-943->u with skip did not match.\n");
2569
2570 /*checking illegal value for ibm-943 with stop */
2571 if(!testConvertToUnicode(text943, sizeof(text943),
2572 toUnicode943stop, sizeof(toUnicode943stop)/sizeof(toUnicode943stop[0]),"ibm-943",
2573 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2574 log_err("ibm-943->u with stop did not match.\n");
2575
2576 }
2577
TestSingleByte(int32_t inputsize,int32_t outputsize)2578 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2579 {
2580 static const uint8_t sampleText[] = {
2581 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2582 0xff, 0x32, 0x33};
2583 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2584 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2585 /*checking illegal value for ibm-943 with substitute*/
2586 gInBufferSize = inputsize;
2587 gOutBufferSize = outputsize;
2588
2589 if(!testConvertToUnicode(sampleText, sizeof(sampleText),
2590 toUnicode943sub, sizeof(toUnicode943sub)/sizeof(toUnicode943sub[0]),"ibm-943",
2591 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2592 log_err("ibm-943->u with subst did not match.\n");
2593 }
2594
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2595 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2596 {
2597 /*EBCDIC_STATEFUL*/
2598 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2599 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2600 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2601 /* s SO doubl SI sng s SO fe fe SI s */
2602
2603 /*EBCDIC_STATEFUL with subChar=3f*/
2604 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2605 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2606 static const char mySubChar[]={ 0x3f};
2607
2608 gInBufferSize = inputsize;
2609 gOutBufferSize = outputsize;
2610
2611 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2612 toIBM930, sizeof(toIBM930), "ibm-930",
2613 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2614 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2615
2616 if(!testConvertFromUnicode(ebcdic_inputTest, sizeof(ebcdic_inputTest)/sizeof(ebcdic_inputTest[0]),
2617 toIBM930_subvaried, sizeof(toIBM930_subvaried), "ibm-930",
2618 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2619 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2620 }
2621 #endif
2622
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2623 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2624 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2625 const char *mySubChar, int8_t len)
2626 {
2627
2628
2629 UErrorCode status = U_ZERO_ERROR;
2630 UConverter *conv = 0;
2631 char junkout[NEW_MAX_BUFFER]; /* FIX */
2632 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2633 const UChar *src;
2634 char *end;
2635 char *targ;
2636 int32_t *offs;
2637 int i;
2638 int32_t realBufferSize;
2639 char *realBufferEnd;
2640 const UChar *realSourceEnd;
2641 const UChar *sourceLimit;
2642 UBool checkOffsets = TRUE;
2643 UBool doFlush;
2644 char junk[9999];
2645 char offset_str[9999];
2646 char *p;
2647 UConverterFromUCallback oldAction = NULL;
2648 const void* oldContext = NULL;
2649
2650
2651 for(i=0;i<NEW_MAX_BUFFER;i++)
2652 junkout[i] = (char)0xF0;
2653 for(i=0;i<NEW_MAX_BUFFER;i++)
2654 junokout[i] = 0xFF;
2655 setNuConvTestName(codepage, "FROM");
2656
2657 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2658 gOutBufferSize);
2659
2660 conv = ucnv_open(codepage, &status);
2661 if(U_FAILURE(status))
2662 {
2663 log_data_err("Couldn't open converter %s\n",codepage);
2664 return TRUE;
2665 }
2666
2667 log_verbose("Converter opened..\n");
2668
2669 /*----setting the callback routine----*/
2670 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2671 if (U_FAILURE(status))
2672 {
2673 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2674 }
2675 /*------------------------*/
2676 /*setting the subChar*/
2677 if(mySubChar != NULL){
2678 ucnv_setSubstChars(conv, mySubChar, len, &status);
2679 if (U_FAILURE(status)) {
2680 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2681 }
2682 }
2683 /*------------*/
2684
2685 src = source;
2686 targ = junkout;
2687 offs = junokout;
2688
2689 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2690 realBufferEnd = junkout + realBufferSize;
2691 realSourceEnd = source + sourceLen;
2692
2693 if ( gOutBufferSize != realBufferSize )
2694 checkOffsets = FALSE;
2695
2696 if( gInBufferSize != NEW_MAX_BUFFER )
2697 checkOffsets = FALSE;
2698
2699 do
2700 {
2701 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2702 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2703
2704 doFlush = (UBool)(sourceLimit == realSourceEnd);
2705
2706 if(targ == realBufferEnd)
2707 {
2708 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2709 return FALSE;
2710 }
2711 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2712
2713
2714 status = U_ZERO_ERROR;
2715
2716 ucnv_fromUnicode (conv,
2717 (char **)&targ,
2718 (const char *)end,
2719 &src,
2720 sourceLimit,
2721 checkOffsets ? offs : NULL,
2722 doFlush, /* flush if we're at the end of the input data */
2723 &status);
2724 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2725
2726
2727 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2728 UChar errChars[50]; /* should be sufficient */
2729 int8_t errLen = 50;
2730 UErrorCode err = U_ZERO_ERROR;
2731 const UChar* start= NULL;
2732 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2733 if(U_FAILURE(err)){
2734 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2735 }
2736 /* length of in invalid chars should be equal to returned length*/
2737 start = src - errLen;
2738 if(u_strncmp(errChars,start,errLen)!=0){
2739 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2740 }
2741 }
2742 /* allow failure codes for the stop callback */
2743 if(U_FAILURE(status) &&
2744 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2745 {
2746 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2747 return FALSE;
2748 }
2749
2750 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2751 sourceLen, targ-junkout);
2752 if(getTestOption(VERBOSITY_OPTION))
2753 {
2754
2755 junk[0] = 0;
2756 offset_str[0] = 0;
2757 for(p = junkout;p<targ;p++)
2758 {
2759 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2760 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2761 }
2762
2763 log_verbose(junk);
2764 printSeq(expect, expectLen);
2765 if ( checkOffsets )
2766 {
2767 log_verbose("\nOffsets:");
2768 log_verbose(offset_str);
2769 }
2770 log_verbose("\n");
2771 }
2772 ucnv_close(conv);
2773
2774
2775 if(expectLen != targ-junkout)
2776 {
2777 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2778 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2779 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2780 printSeqErr(expect, expectLen);
2781 return FALSE;
2782 }
2783
2784 if (checkOffsets && (expectOffsets != 0) )
2785 {
2786 log_verbose("comparing %d offsets..\n", targ-junkout);
2787 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2788 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2789 log_err("Got Output : ");
2790 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2791 log_err("Got Offsets: ");
2792 for(p=junkout;p<targ;p++)
2793 log_err("%d,", junokout[p-junkout]);
2794 log_err("\n");
2795 log_err("Expected Offsets: ");
2796 for(i=0; i<(targ-junkout); i++)
2797 log_err("%d,", expectOffsets[i]);
2798 log_err("\n");
2799 return FALSE;
2800 }
2801 }
2802
2803 if(!memcmp(junkout, expect, expectLen))
2804 {
2805 log_verbose("String matches! %s\n", gNuConvTestName);
2806 return TRUE;
2807 }
2808 else
2809 {
2810 log_err("String does not match. %s\n", gNuConvTestName);
2811 log_err("source: ");
2812 printUSeqErr(source, sourceLen);
2813 log_err("Got: ");
2814 printSeqErr((const uint8_t *)junkout, expectLen);
2815 log_err("Expected: ");
2816 printSeqErr(expect, expectLen);
2817 return FALSE;
2818 }
2819 }
2820
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2821 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2822 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2823 const char *mySubChar, int8_t len)
2824 {
2825 UErrorCode status = U_ZERO_ERROR;
2826 UConverter *conv = 0;
2827 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2828 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2829 const char *src;
2830 const char *realSourceEnd;
2831 const char *srcLimit;
2832 UChar *targ;
2833 UChar *end;
2834 int32_t *offs;
2835 int i;
2836 UBool checkOffsets = TRUE;
2837 char junk[9999];
2838 char offset_str[9999];
2839 UChar *p;
2840 UConverterToUCallback oldAction = NULL;
2841 const void* oldContext = NULL;
2842
2843 int32_t realBufferSize;
2844 UChar *realBufferEnd;
2845
2846
2847 for(i=0;i<NEW_MAX_BUFFER;i++)
2848 junkout[i] = 0xFFFE;
2849
2850 for(i=0;i<NEW_MAX_BUFFER;i++)
2851 junokout[i] = -1;
2852
2853 setNuConvTestName(codepage, "TO");
2854
2855 log_verbose("\n========= %s\n", gNuConvTestName);
2856
2857 conv = ucnv_open(codepage, &status);
2858 if(U_FAILURE(status))
2859 {
2860 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2861 return TRUE;
2862 }
2863
2864 log_verbose("Converter opened..\n");
2865
2866 src = (const char *)source;
2867 targ = junkout;
2868 offs = junokout;
2869
2870 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
2871 realBufferEnd = junkout + realBufferSize;
2872 realSourceEnd = src + sourcelen;
2873 /*----setting the callback routine----*/
2874 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2875 if (U_FAILURE(status))
2876 {
2877 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2878 }
2879 /*-------------------------------------*/
2880 /*setting the subChar*/
2881 if(mySubChar != NULL){
2882 ucnv_setSubstChars(conv, mySubChar, len, &status);
2883 if (U_FAILURE(status)) {
2884 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2885 }
2886 }
2887 /*------------*/
2888
2889
2890 if ( gOutBufferSize != realBufferSize )
2891 checkOffsets = FALSE;
2892
2893 if( gInBufferSize != NEW_MAX_BUFFER )
2894 checkOffsets = FALSE;
2895
2896 do
2897 {
2898 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2899 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2900
2901 if(targ == realBufferEnd)
2902 {
2903 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2904 return FALSE;
2905 }
2906 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2907
2908
2909
2910 status = U_ZERO_ERROR;
2911
2912 ucnv_toUnicode (conv,
2913 &targ,
2914 end,
2915 (const char **)&src,
2916 (const char *)srcLimit,
2917 checkOffsets ? offs : NULL,
2918 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2919 &status);
2920 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2921
2922 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2923 char errChars[50]; /* should be sufficient */
2924 int8_t errLen = 50;
2925 UErrorCode err = U_ZERO_ERROR;
2926 const char* start= NULL;
2927 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2928 if(U_FAILURE(err)){
2929 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2930 }
2931 /* length of in invalid chars should be equal to returned length*/
2932 start = src - errLen;
2933 if(uprv_strncmp(errChars,start,errLen)!=0){
2934 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2935 }
2936 }
2937 /* allow failure codes for the stop callback */
2938 if(U_FAILURE(status) &&
2939 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2940 {
2941 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2942 return FALSE;
2943 }
2944
2945 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2946 sourcelen, targ-junkout);
2947 if(getTestOption(VERBOSITY_OPTION))
2948 {
2949
2950 junk[0] = 0;
2951 offset_str[0] = 0;
2952
2953 for(p = junkout;p<targ;p++)
2954 {
2955 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2956 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2957 }
2958
2959 log_verbose(junk);
2960 printUSeq(expect, expectlen);
2961 if ( checkOffsets )
2962 {
2963 log_verbose("\nOffsets:");
2964 log_verbose(offset_str);
2965 }
2966 log_verbose("\n");
2967 }
2968 ucnv_close(conv);
2969
2970 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2971
2972 if (checkOffsets && (expectOffsets != 0))
2973 {
2974 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2975 {
2976 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2977 log_err("Got offsets: ");
2978 for(p=junkout;p<targ;p++)
2979 log_err(" %2d,", junokout[p-junkout]);
2980 log_err("\n");
2981 log_err("Expected offsets: ");
2982 for(i=0; i<(targ-junkout); i++)
2983 log_err(" %2d,", expectOffsets[i]);
2984 log_err("\n");
2985 log_err("Got output: ");
2986 for(i=0; i<(targ-junkout); i++)
2987 log_err("0x%04x,", junkout[i]);
2988 log_err("\n");
2989 log_err("From source: ");
2990 for(i=0; i<(src-(const char *)source); i++)
2991 log_err(" 0x%02x,", (unsigned char)source[i]);
2992 log_err("\n");
2993 }
2994 }
2995
2996 if(!memcmp(junkout, expect, expectlen*2))
2997 {
2998 log_verbose("Matches!\n");
2999 return TRUE;
3000 }
3001 else
3002 {
3003 log_err("String does not match. %s\n", gNuConvTestName);
3004 log_verbose("String does not match. %s\n", gNuConvTestName);
3005 log_err("Got: ");
3006 printUSeqErr(junkout, expectlen);
3007 log_err("Expected: ");
3008 printUSeqErr(expect, expectlen);
3009 log_err("\n");
3010 return FALSE;
3011 }
3012 }
3013
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3014 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3015 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3016 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3017 {
3018
3019
3020 UErrorCode status = U_ZERO_ERROR;
3021 UConverter *conv = 0;
3022 char junkout[NEW_MAX_BUFFER]; /* FIX */
3023 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3024 const UChar *src;
3025 char *end;
3026 char *targ;
3027 int32_t *offs;
3028 int i;
3029 int32_t realBufferSize;
3030 char *realBufferEnd;
3031 const UChar *realSourceEnd;
3032 const UChar *sourceLimit;
3033 UBool checkOffsets = TRUE;
3034 UBool doFlush;
3035 char junk[9999];
3036 char offset_str[9999];
3037 char *p;
3038 UConverterFromUCallback oldAction = NULL;
3039 const void* oldContext = NULL;
3040
3041
3042 for(i=0;i<NEW_MAX_BUFFER;i++)
3043 junkout[i] = (char)0xF0;
3044 for(i=0;i<NEW_MAX_BUFFER;i++)
3045 junokout[i] = 0xFF;
3046 setNuConvTestName(codepage, "FROM");
3047
3048 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3049 gOutBufferSize);
3050
3051 conv = ucnv_open(codepage, &status);
3052 if(U_FAILURE(status))
3053 {
3054 log_data_err("Couldn't open converter %s\n",codepage);
3055 return TRUE; /* Because the err has already been logged. */
3056 }
3057
3058 log_verbose("Converter opened..\n");
3059
3060 /*----setting the callback routine----*/
3061 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3062 if (U_FAILURE(status))
3063 {
3064 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3065 }
3066 /*------------------------*/
3067 /*setting the subChar*/
3068 if(mySubChar != NULL){
3069 ucnv_setSubstChars(conv, mySubChar, len, &status);
3070 if (U_FAILURE(status)) {
3071 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3072 }
3073 }
3074 /*------------*/
3075
3076 src = source;
3077 targ = junkout;
3078 offs = junokout;
3079
3080 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3081 realBufferEnd = junkout + realBufferSize;
3082 realSourceEnd = source + sourceLen;
3083
3084 if ( gOutBufferSize != realBufferSize )
3085 checkOffsets = FALSE;
3086
3087 if( gInBufferSize != NEW_MAX_BUFFER )
3088 checkOffsets = FALSE;
3089
3090 do
3091 {
3092 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3093 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3094
3095 doFlush = (UBool)(sourceLimit == realSourceEnd);
3096
3097 if(targ == realBufferEnd)
3098 {
3099 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3100 return FALSE;
3101 }
3102 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3103
3104
3105 status = U_ZERO_ERROR;
3106
3107 ucnv_fromUnicode (conv,
3108 (char **)&targ,
3109 (const char *)end,
3110 &src,
3111 sourceLimit,
3112 checkOffsets ? offs : NULL,
3113 doFlush, /* flush if we're at the end of the input data */
3114 &status);
3115 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3116
3117 /* allow failure codes for the stop callback */
3118 if(U_FAILURE(status) && status != expectedError)
3119 {
3120 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3121 return FALSE;
3122 }
3123
3124 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3125 sourceLen, targ-junkout);
3126 if(getTestOption(VERBOSITY_OPTION))
3127 {
3128
3129 junk[0] = 0;
3130 offset_str[0] = 0;
3131 for(p = junkout;p<targ;p++)
3132 {
3133 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3134 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3135 }
3136
3137 log_verbose(junk);
3138 printSeq(expect, expectLen);
3139 if ( checkOffsets )
3140 {
3141 log_verbose("\nOffsets:");
3142 log_verbose(offset_str);
3143 }
3144 log_verbose("\n");
3145 }
3146 ucnv_close(conv);
3147
3148
3149 if(expectLen != targ-junkout)
3150 {
3151 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3152 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3153 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3154 printSeqErr(expect, expectLen);
3155 return FALSE;
3156 }
3157
3158 if (checkOffsets && (expectOffsets != 0) )
3159 {
3160 log_verbose("comparing %d offsets..\n", targ-junkout);
3161 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3162 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3163 log_err("Got Output : ");
3164 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3165 log_err("Got Offsets: ");
3166 for(p=junkout;p<targ;p++)
3167 log_err("%d,", junokout[p-junkout]);
3168 log_err("\n");
3169 log_err("Expected Offsets: ");
3170 for(i=0; i<(targ-junkout); i++)
3171 log_err("%d,", expectOffsets[i]);
3172 log_err("\n");
3173 return FALSE;
3174 }
3175 }
3176
3177 if(!memcmp(junkout, expect, expectLen))
3178 {
3179 log_verbose("String matches! %s\n", gNuConvTestName);
3180 return TRUE;
3181 }
3182 else
3183 {
3184 log_err("String does not match. %s\n", gNuConvTestName);
3185 log_err("source: ");
3186 printUSeqErr(source, sourceLen);
3187 log_err("Got: ");
3188 printSeqErr((const uint8_t *)junkout, expectLen);
3189 log_err("Expected: ");
3190 printSeqErr(expect, expectLen);
3191 return FALSE;
3192 }
3193 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3194 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3195 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3196 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3197 {
3198 UErrorCode status = U_ZERO_ERROR;
3199 UConverter *conv = 0;
3200 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3201 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3202 const char *src;
3203 const char *realSourceEnd;
3204 const char *srcLimit;
3205 UChar *targ;
3206 UChar *end;
3207 int32_t *offs;
3208 int i;
3209 UBool checkOffsets = TRUE;
3210 char junk[9999];
3211 char offset_str[9999];
3212 UChar *p;
3213 UConverterToUCallback oldAction = NULL;
3214 const void* oldContext = NULL;
3215
3216 int32_t realBufferSize;
3217 UChar *realBufferEnd;
3218
3219
3220 for(i=0;i<NEW_MAX_BUFFER;i++)
3221 junkout[i] = 0xFFFE;
3222
3223 for(i=0;i<NEW_MAX_BUFFER;i++)
3224 junokout[i] = -1;
3225
3226 setNuConvTestName(codepage, "TO");
3227
3228 log_verbose("\n========= %s\n", gNuConvTestName);
3229
3230 conv = ucnv_open(codepage, &status);
3231 if(U_FAILURE(status))
3232 {
3233 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3234 return TRUE;
3235 }
3236
3237 log_verbose("Converter opened..\n");
3238
3239 src = (const char *)source;
3240 targ = junkout;
3241 offs = junokout;
3242
3243 realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
3244 realBufferEnd = junkout + realBufferSize;
3245 realSourceEnd = src + sourcelen;
3246 /*----setting the callback routine----*/
3247 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3248 if (U_FAILURE(status))
3249 {
3250 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3251 }
3252 /*-------------------------------------*/
3253 /*setting the subChar*/
3254 if(mySubChar != NULL){
3255 ucnv_setSubstChars(conv, mySubChar, len, &status);
3256 if (U_FAILURE(status)) {
3257 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3258 }
3259 }
3260 /*------------*/
3261
3262
3263 if ( gOutBufferSize != realBufferSize )
3264 checkOffsets = FALSE;
3265
3266 if( gInBufferSize != NEW_MAX_BUFFER )
3267 checkOffsets = FALSE;
3268
3269 do
3270 {
3271 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3272 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3273
3274 if(targ == realBufferEnd)
3275 {
3276 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3277 return FALSE;
3278 }
3279 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3280
3281
3282
3283 status = U_ZERO_ERROR;
3284
3285 ucnv_toUnicode (conv,
3286 &targ,
3287 end,
3288 (const char **)&src,
3289 (const char *)srcLimit,
3290 checkOffsets ? offs : NULL,
3291 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3292 &status);
3293 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3294
3295 /* allow failure codes for the stop callback */
3296 if(U_FAILURE(status) && status!=expectedError)
3297 {
3298 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3299 return FALSE;
3300 }
3301
3302 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3303 sourcelen, targ-junkout);
3304 if(getTestOption(VERBOSITY_OPTION))
3305 {
3306
3307 junk[0] = 0;
3308 offset_str[0] = 0;
3309
3310 for(p = junkout;p<targ;p++)
3311 {
3312 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3313 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3314 }
3315
3316 log_verbose(junk);
3317 printUSeq(expect, expectlen);
3318 if ( checkOffsets )
3319 {
3320 log_verbose("\nOffsets:");
3321 log_verbose(offset_str);
3322 }
3323 log_verbose("\n");
3324 }
3325 ucnv_close(conv);
3326
3327 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3328
3329 if (checkOffsets && (expectOffsets != 0))
3330 {
3331 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3332 {
3333 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3334 log_err("Got offsets: ");
3335 for(p=junkout;p<targ;p++)
3336 log_err(" %2d,", junokout[p-junkout]);
3337 log_err("\n");
3338 log_err("Expected offsets: ");
3339 for(i=0; i<(targ-junkout); i++)
3340 log_err(" %2d,", expectOffsets[i]);
3341 log_err("\n");
3342 log_err("Got output: ");
3343 for(i=0; i<(targ-junkout); i++)
3344 log_err("0x%04x,", junkout[i]);
3345 log_err("\n");
3346 log_err("From source: ");
3347 for(i=0; i<(src-(const char *)source); i++)
3348 log_err(" 0x%02x,", (unsigned char)source[i]);
3349 log_err("\n");
3350 }
3351 }
3352
3353 if(!memcmp(junkout, expect, expectlen*2))
3354 {
3355 log_verbose("Matches!\n");
3356 return TRUE;
3357 }
3358 else
3359 {
3360 log_err("String does not match. %s\n", gNuConvTestName);
3361 log_verbose("String does not match. %s\n", gNuConvTestName);
3362 log_err("Got: ");
3363 printUSeqErr(junkout, expectlen);
3364 log_err("Expected: ");
3365 printUSeqErr(expect, expectlen);
3366 log_err("\n");
3367 return FALSE;
3368 }
3369 }
3370
TestCallBackFailure(void)3371 static void TestCallBackFailure(void) {
3372 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3373 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3374 if (status != U_USELESS_COLLATOR_ERROR) {
3375 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3376 }
3377 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3378 if (status != U_USELESS_COLLATOR_ERROR) {
3379 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3380 }
3381 ucnv_cbFromUWriteSub(NULL, -1, &status);
3382 if (status != U_USELESS_COLLATOR_ERROR) {
3383 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3384 }
3385 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3386 if (status != U_USELESS_COLLATOR_ERROR) {
3387 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3388 }
3389 }
3390