1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda 7/21/1999 Testing error callback routines
15 ********************************************************************************
16 */
17 #include <ctype.h>
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "cintltst.h"
28 #include "unicode/utypes.h"
29 #include "unicode/ustring.h"
30 #include "nccbtst.h"
31 #include "unicode/ucnv_cb.h"
32 #include "unicode/utf16.h"
33
34 #define NEW_MAX_BUFFER 999
35
36 #define nct_min(x,y) ((x<y) ? x : y)
37
38 static int32_t gInBufferSize = 0;
39 static int32_t gOutBufferSize = 0;
40 static char gNuConvTestName[1024];
41
printSeq(const uint8_t * a,int len)42 static void printSeq(const uint8_t* a, int len)
43 {
44 int i=0;
45 log_verbose("\n{");
46 while (i<len)
47 log_verbose("0x%02X, ", a[i++]);
48 log_verbose("}\n");
49 }
50
printUSeq(const UChar * a,int len)51 static void printUSeq(const UChar* a, int len)
52 {
53 int i=0;
54 log_verbose("{");
55 while (i<len)
56 log_verbose(" 0x%04x, ", a[i++]);
57 log_verbose("}\n");
58 }
59
printSeqErr(const uint8_t * a,int len)60 static void printSeqErr(const uint8_t* a, int len)
61 {
62 int i=0;
63 fprintf(stderr, "{");
64 while (i<len)
65 fprintf(stderr, " 0x%02x, ", a[i++]);
66 fprintf(stderr, "}\n");
67 }
68
printUSeqErr(const UChar * a,int len)69 static void printUSeqErr(const UChar* a, int len)
70 {
71 int i=0;
72 fprintf(stderr, "{");
73 while (i<len)
74 fprintf(stderr, "0x%04x, ", a[i++]);
75 fprintf(stderr,"}\n");
76 }
77
setNuConvTestName(const char * codepage,const char * direction)78 static void setNuConvTestName(const char *codepage, const char *direction)
79 {
80 snprintf(gNuConvTestName, sizeof(gNuConvTestName), "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
81 codepage,
82 direction,
83 (int)gInBufferSize,
84 (int)gOutBufferSize);
85 }
86
87
88 static void TestCallBackFailure(void);
89
90 void addTestConvertErrorCallBack(TestNode** root);
91
addTestConvertErrorCallBack(TestNode ** root)92 void addTestConvertErrorCallBack(TestNode** root)
93 {
94 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
95 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
96 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
97 /* BEGIN android-removed
98 To save space, Android does not build complete CJK conversion tables.
99 We skip the test here.
100 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
101 END android-removed */
102
103 #if !UCONFIG_NO_LEGACY_CONVERSION
104 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
105 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
106 #endif
107
108 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
109 }
110
TestSkipCallBack(void)111 static void TestSkipCallBack(void)
112 {
113 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
114 TestSkip(1,NEW_MAX_BUFFER);
115 TestSkip(1,1);
116 TestSkip(NEW_MAX_BUFFER, 1);
117 }
118
TestStopCallBack(void)119 static void TestStopCallBack(void)
120 {
121 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
122 TestStop(1,NEW_MAX_BUFFER);
123 TestStop(1,1);
124 TestStop(NEW_MAX_BUFFER, 1);
125 }
126
TestSubCallBack(void)127 static void TestSubCallBack(void)
128 {
129 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
130 TestSub(1,NEW_MAX_BUFFER);
131 TestSub(1,1);
132 TestSub(NEW_MAX_BUFFER, 1);
133
134 #if !UCONFIG_NO_LEGACY_CONVERSION
135 TestEBCDIC_STATEFUL_Sub(1, 1);
136 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
137 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
138 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
139 #endif
140 }
141
TestSubWithValueCallBack(void)142 static void TestSubWithValueCallBack(void)
143 {
144 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
145 TestSubWithValue(1,NEW_MAX_BUFFER);
146 TestSubWithValue(1,1);
147 TestSubWithValue(NEW_MAX_BUFFER, 1);
148 }
149
150 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack(void)151 static void TestLegalAndOtherCallBack(void)
152 {
153 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
154 TestLegalAndOthers(1,NEW_MAX_BUFFER);
155 TestLegalAndOthers(1,1);
156 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
157 }
158
TestSingleByteCallBack(void)159 static void TestSingleByteCallBack(void)
160 {
161 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
162 TestSingleByte(1,NEW_MAX_BUFFER);
163 TestSingleByte(1,1);
164 TestSingleByte(NEW_MAX_BUFFER, 1);
165 }
166 #endif
167
TestSkip(int32_t inputsize,int32_t outputsize)168 static void TestSkip(int32_t inputsize, int32_t outputsize)
169 {
170 static const uint8_t expskipIBM_949[]= {
171 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
172
173 static const uint8_t expskipIBM_943[] = {
174 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
175
176 static const uint8_t expskipIBM_930[] = {
177 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
178
179 gInBufferSize = inputsize;
180 gOutBufferSize = outputsize;
181
182 /*From Unicode*/
183 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
184
185 #if !UCONFIG_NO_LEGACY_CONVERSION
186 {
187 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
188 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
189
190 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
191 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
192
193 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
194 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
195 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
196 log_err("u-> ibm-949 with skip did not match.\n");
197 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
198 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
199 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
200 log_err("u-> ibm-943 with skip did not match.\n");
201 }
202
203 {
204 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
205 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
206 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
207
208 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
209 if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
210 fromUBytes, UPRV_LENGTHOF(fromUBytes),
211 "ibm-930",
212 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
213 NULL, 0)
214 ) {
215 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
216 }
217 }
218 #endif
219
220 {
221 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
222 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
223 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
224
225 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
226 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
227 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
228
229 /* US-ASCII */
230 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
231 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
232 "US-ASCII",
233 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
234 NULL, 0)
235 ) {
236 log_err("u->US-ASCII with skip did not match.\n");
237 }
238
239 #if !UCONFIG_NO_LEGACY_CONVERSION
240 /* SBCS NLTC codepage 367 for US-ASCII */
241 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
242 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
243 "ibm-367",
244 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
245 NULL, 0)
246 ) {
247 log_err("u->ibm-367 with skip did not match.\n");
248 }
249 #endif
250
251 /* ISO-Latin-1 */
252 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
253 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
254 "LATIN_1",
255 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
256 NULL, 0)
257 ) {
258 log_err("u->LATIN_1 with skip did not match.\n");
259 }
260
261 #if !UCONFIG_NO_LEGACY_CONVERSION
262 /* windows-1252 */
263 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
264 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
265 "windows-1252",
266 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
267 NULL, 0)
268 ) {
269 log_err("u->windows-1252 with skip did not match.\n");
270 }
271 }
272
273 {
274 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
275 static const uint8_t toIBM943[]= { 0x61, 0x61 };
276 static const int32_t offset[]= {0, 4};
277
278 /* EUC_JP*/
279 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
280 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
281 0x61, 0x8e, 0xe0,
282 };
283 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
284
285 /*EUC_TW*/
286 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
287 static const uint8_t to_euc_tw[]={
288 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
289 0x61, 0xe6, 0xca, 0x8a,
290 };
291 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
292
293 /*ISO-2022-JP*/
294 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
295 static const uint8_t to_iso_2022_jp[]={
296 0x41,
297 0x42,
298
299 };
300 static const int32_t from_iso_2022_jpOffs [] ={0,2};
301
302 /*ISO-2022-JP*/
303 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
304 static const uint8_t to_iso_2022_jp2[]={
305 0x41,
306 0x43,
307
308 };
309 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
310
311 /*ISO-2022-cn*/
312 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
313 static const uint8_t to_iso_2022_cn[]={
314 0x41, 0x42
315 };
316 static const int32_t from_iso_2022_cnOffs [] ={
317 0, 2
318 };
319
320 /*ISO-2022-CN*/
321 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
322 static const uint8_t to_iso_2022_cn1[]={
323 0x41, 0x43
324
325 };
326 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
327
328 /*ISO-2022-kr*/
329 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
330 static const uint8_t to_iso_2022_kr[]={
331 0x1b, 0x24, 0x29, 0x43,
332 0x41,
333 0x0e, 0x25, 0x50,
334 0x25, 0x50,
335 0x0f, 0x42,
336 };
337 static const int32_t from_iso_2022_krOffs [] ={
338 -1,-1,-1,-1,
339 0,
340 1,1,1,
341 3,3,
342 4,4
343 };
344
345 /*ISO-2022-kr*/
346 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
347 static const uint8_t to_iso_2022_kr1[]={
348 0x1b, 0x24, 0x29, 0x43,
349 0x41,
350 0x0e, 0x25, 0x50,
351 0x25, 0x50,
352
353 };
354 static const int32_t from_iso_2022_krOffs1 [] ={
355 -1,-1,-1,-1,
356 0,
357 1,1,1,
358 3,3,
359
360 };
361 /* HZ encoding */
362 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
363
364 static const uint8_t to_hz[]={
365 0x7e, 0x7d, 0x41,
366 0x7e, 0x7b, 0x26, 0x30,
367 0x26, 0x30,
368 0x7e, 0x7d, 0x42,
369
370 };
371 static const int32_t from_hzOffs [] ={
372 0,0,0,
373 1,1,1,1,
374 3,3,
375 4,4,4,4
376 };
377
378 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
379
380 static const uint8_t to_hz1[]={
381 0x7e, 0x7d, 0x41,
382 0x7e, 0x7b, 0x26, 0x30,
383 0x26, 0x30,
384
385
386 };
387 static const int32_t from_hzOffs1 [] ={
388 0,0,0,
389 1,1,1,1,
390 3,3,
391
392 };
393
394 #endif
395
396 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
397
398 static const uint8_t to_SCSU[]={
399 0x41,
400 0x42
401
402
403 };
404 static const int32_t from_SCSUOffs [] ={
405 0,
406 2,
407
408 };
409
410 #if !UCONFIG_NO_LEGACY_CONVERSION
411 /* ISCII */
412 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
413 static const uint8_t to_iscii[]={
414 0x41,
415 0x42,
416 };
417 static const int32_t from_isciiOffs [] ={
418 0,2,
419
420 };
421 /*ISCII*/
422 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
423 static const uint8_t to_iscii1[]={
424 0x44,
425 0x43,
426
427 };
428 static const int32_t from_isciiOffs1 [] ={0,2};
429
430 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
431 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
432 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
433 log_err("u-> ibm-943 with skip did not match.\n");
434
435 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
436 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
437 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
438 log_err("u-> euc-jp with skip did not match.\n");
439
440 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
441 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
442 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
443 log_err("u-> euc-tw with skip did not match.\n");
444
445 /*iso_2022_jp*/
446 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
447 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
448 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
449 log_err("u-> iso-2022-jp with skip did not match.\n");
450
451 /* with context */
452 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
453 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
454 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
455 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
456
457 /*iso_2022_cn*/
458 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
459 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
460 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
461 log_err("u-> iso-2022-cn with skip did not match.\n");
462 /*with context*/
463 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
464 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
465 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
466 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
467
468 /*iso_2022_kr*/
469 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
470 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
471 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
472 log_err("u-> iso-2022-kr with skip did not match.\n");
473 /*with context*/
474 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
475 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
476 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
477 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
478
479 /*hz*/
480 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
481 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
482 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
483 log_err("u-> HZ with skip did not match.\n");
484 /*with context*/
485 if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
486 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
487 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
488 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
489 #endif
490
491 /*SCSU*/
492 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
493 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
494 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
495 log_err("u-> SCSU with skip did not match.\n");
496
497 #if !UCONFIG_NO_LEGACY_CONVERSION
498 /*ISCII*/
499 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
500 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
501 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
502 log_err("u-> iscii with skip did not match.\n");
503 /*with context*/
504 if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
505 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
506 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
507 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
508 #endif
509 }
510
511 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
512 {
513 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
514 0xFB, 0xEE, 0x28, /* from source offset 0 */
515 0x24, 0x1E, 0x52,
516 0xB2,
517 0x20,
518 0xB3,
519 0xB1,
520 0x0D,
521 0x0A,
522
523 0x20, /* from 8 */
524 0x00,
525 0xD0, 0x6C,
526 0xB6,
527 0xD8, 0xA5,
528 0x20,
529 0x68,
530 0x59,
531
532 0xF9, 0x28, /* from 16 */
533 0x6D,
534 0x20,
535 0x73,
536 0xE0, 0x2D,
537 0xDE, 0x43,
538 0xD0, 0x33,
539 0x20,
540
541 0xFA, 0x83, /* from 24 */
542 0x25, 0x01,
543 0xFB, 0x16, 0x87,
544 0x4B, 0x16,
545 0x20,
546 0xE6, 0xBD,
547 0xEB, 0x5B,
548 0x4B, 0xCC,
549
550 0xF9, 0xA2, /* from 32 */
551 0xFC, 0x10, 0x3E,
552 0xFE, 0x16, 0x3A, 0x8C,
553 0x20,
554 0xFC, 0x03, 0xAC,
555
556 0x01, /* from 41 */
557 0xDE, 0x83,
558 0x20,
559 0x09
560 };
561 static const UChar expected[]={
562 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
563 0x0063, 0x0061, 0x000D, 0x000A,
564
565 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
566 0x0930, 0x0020, 0x0918, 0x0909,
567
568 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
569 0x4000, 0x4E00, 0x7777, 0x0020,
570
571 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
572 0x0020, 0xD7A3, 0xDC00, 0xD800,
573
574 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
575 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
576
577 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
578 0x0009
579 };
580 static const int32_t offsets[]={
581 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
582 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
583 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
584 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
585 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
586 41, 42, 42, 43, 44
587 };
588
589 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
590 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
591 sampleText, UPRV_LENGTHOF(sampleText),
592 "BOCU-1",
593 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
594 ) {
595 log_err("u->BOCU-1 with skip did not match.\n");
596 }
597 }
598
599 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
600 {
601 const uint8_t sampleText[]={
602 0x61, /* 'a' */
603 0xc4, 0xb5, /* U+0135 */
604 0xed, 0x80, 0xa0, /* Hangul U+d020 */
605 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
606 0xee, 0x80, 0x80, /* PUA U+e000 */
607 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
608 0x62, /* 'b' */
609 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
610 0xd0, 0x80 /* U+0400 */
611 };
612 UChar expected[]={
613 0x0061,
614 0x0135,
615 0xd020,
616 0xd801, 0xdc01,
617 0xe000,
618 0xdc01,
619 0x0062,
620 0xd801,
621 0x0400
622 };
623 int32_t offsets[]={
624 0,
625 1, 1,
626 2, 2, 2,
627 3, 3, 3, 4, 4, 4,
628 5, 5, 5,
629 6, 6, 6,
630 7,
631 8, 8, 8,
632 9, 9
633 };
634
635 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
636
637 /* without offsets */
638 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
639 sampleText, UPRV_LENGTHOF(sampleText),
640 "CESU-8",
641 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
642 ) {
643 log_err("u->CESU-8 with skip did not match.\n");
644 }
645
646 /* with offsets */
647 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
648 sampleText, UPRV_LENGTHOF(sampleText),
649 "CESU-8",
650 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
651 ) {
652 log_err("u->CESU-8 with skip did not match.\n");
653 }
654 }
655
656 /*to Unicode*/
657 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
658
659 #if !UCONFIG_NO_LEGACY_CONVERSION
660 {
661
662 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
663 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
664 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
665
666 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
667 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
668 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
669
670 if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
671 IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
672 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
673 log_err("ibm-949->u with skip did not match.\n");
674 if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
675 IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
676 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
677 log_err("ibm-943->u with skip did not match.\n");
678
679
680 if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
681 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
682 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
683 log_err("ibm-930->u with skip did not match.\n");
684
685
686 if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
687 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
688 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
689 log_err("ibm-930->u with skip did not match.\n");
690 }
691 #endif
692
693 {
694 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
695 static const UChar usasciiToU[] = { 0x61, 0x31 };
696 static const int32_t usasciiToUOffsets[] = { 0, 2 };
697
698 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
699 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
700 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
701
702 /* US-ASCII */
703 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
704 usasciiToU, UPRV_LENGTHOF(usasciiToU),
705 "US-ASCII",
706 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
707 NULL, 0)
708 ) {
709 log_err("US-ASCII->u with skip did not match.\n");
710 }
711
712 #if !UCONFIG_NO_LEGACY_CONVERSION
713 /* SBCS NLTC codepage 367 for US-ASCII */
714 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
715 usasciiToU, UPRV_LENGTHOF(usasciiToU),
716 "ibm-367",
717 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
718 NULL, 0)
719 ) {
720 log_err("ibm-367->u with skip did not match.\n");
721 }
722 #endif
723
724 /* ISO-Latin-1 */
725 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
726 latin1ToU, UPRV_LENGTHOF(latin1ToU),
727 "LATIN_1",
728 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
729 NULL, 0)
730 ) {
731 log_err("LATIN_1->u with skip did not match.\n");
732 }
733
734 #if !UCONFIG_NO_LEGACY_CONVERSION
735 /* windows-1252 */
736 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
737 latin1ToU, UPRV_LENGTHOF(latin1ToU),
738 "windows-1252",
739 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
740 NULL, 0)
741 ) {
742 log_err("windows-1252->u with skip did not match.\n");
743 }
744 #endif
745 }
746
747 #if !UCONFIG_NO_LEGACY_CONVERSION
748 {
749 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
750 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
751 };
752 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
753 };
754 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
755
756
757 /* euc-jp*/
758 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
759 0x8f, 0xda, 0xa1, /*unassigned*/
760 0x8e, 0xe0,
761 };
762 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
763 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
764
765 /*EUC_TW*/
766 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
767 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
768 0xe6, 0xca, 0x8a,
769 };
770 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
771 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
772 /*iso-2022-jp*/
773 static const uint8_t sampleTxt_iso_2022_jp[]={
774 0x41,
775 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
776 0x1b, 0x28, 0x42, 0x42,
777
778 };
779 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
780 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
781
782 /*iso-2022-cn*/
783 static const uint8_t sampleTxt_iso_2022_cn[]={
784 0x0f, 0x41, 0x44,
785 0x1B, 0x24, 0x29, 0x47,
786 0x0E, 0x40, 0x6f, /*unassigned*/
787 0x0f, 0x42,
788
789 };
790
791 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
792 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
793
794 /*iso-2022-kr*/
795 static const uint8_t sampleTxt_iso_2022_kr[]={
796 0x1b, 0x24, 0x29, 0x43,
797 0x41,
798 0x0E, 0x7f, 0x1E,
799 0x0e, 0x25, 0x50,
800 0x0f, 0x51,
801 0x42, 0x43,
802
803 };
804 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
805 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
806
807 /*hz*/
808 static const uint8_t sampleTxt_hz[]={
809 0x41,
810 0x7e, 0x7b, 0x26, 0x30,
811 0x7f, 0x1E, /*unassigned*/
812 0x26, 0x30,
813 0x7e, 0x7d, 0x42,
814 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
815 0x7e, 0x7d, 0x42,
816 };
817 static const UChar hztoUnicode[]={
818 0x41,
819 0x03a0,
820 0x03A0,
821 0x42,
822 0x42,};
823
824 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
825
826 /*ISCII*/
827 static const uint8_t sampleTxt_iscii[]={
828 0x41,
829 0xa1,
830 0xEB, /*unassigned*/
831 0x26,
832 0x30,
833 0xa2,
834 0xEC, /*unassigned*/
835 0x42,
836 };
837 static const UChar isciitoUnicode[]={
838 0x41,
839 0x0901,
840 0x26,
841 0x30,
842 0x0902,
843 0x42,
844 };
845
846 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
847
848 /*LMBCS*/
849 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
850 0x12, 0x92, 0xa0, /*unassigned*/
851 0x12, 0x92, 0xA1,
852 };
853 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
854 static const int32_t fromLMBCS[] = {0, 6};
855
856 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
857 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
858 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
859 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
860
861 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
862 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
863 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
864 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
865
866 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
867 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
868 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
869 log_err("euc-jp->u with skip did not match.\n");
870
871
872
873 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
874 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
875 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
876 log_err("euc-tw->u with skip did not match.\n");
877
878
879 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
880 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
881 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
882 log_err("iso-2022-jp->u with skip did not match.\n");
883
884 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
885 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
886 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
887 log_err("iso-2022-cn->u with skip did not match.\n");
888
889 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
890 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
891 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
892 log_err("iso-2022-kr->u with skip did not match.\n");
893
894 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
895 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
896 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
897 log_err("HZ->u with skip did not match.\n");
898
899 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
900 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
901 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
902 log_err("iscii->u with skip did not match.\n");
903
904 if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
905 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
906 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
907 log_err("LMBCS->u with skip did not match.\n");
908
909 }
910 #endif
911
912 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
913 {
914 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
915 0xe0, 0x80, 0x61,};
916 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
917 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
918
919 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
920 expected1, UPRV_LENGTHOF(expected1),"utf8",
921 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
922 log_err("utf8->u with skip did not match.\n");
923 }
924
925 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
926 {
927 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
928 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
929 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
930
931 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
932 expected1, UPRV_LENGTHOF(expected1),"SCSU",
933 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
934 log_err("scsu->u with skip did not match.\n");
935 }
936
937 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
938 {
939 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
940 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
941 0x24, 0x1E, 0x52, /* 3 */
942 0xB2, /* 6 */
943 0x20, /* 7 */
944 0x40, 0x07, /* 8 - wrong trail byte */
945 0xB3, /* 10 */
946 0xB1, /* 11 */
947 0xD0, 0x20, /* 12 - wrong trail byte */
948 0x0D, /* 14 */
949 0x0A, /* 15 */
950 0x20, /* 16 */
951 0x00, /* 17 */
952 0xD0, 0x6C, /* 18 */
953 0xB6, /* 20 */
954 0xD8, 0xA5, /* 21 */
955 0x20, /* 23 */
956 0x68, /* 24 */
957 0x59, /* 25 */
958 0xF9, 0x28, /* 26 */
959 0x6D, /* 28 */
960 0x20, /* 29 */
961 0x73, /* 30 */
962 0xE0, 0x2D, /* 31 */
963 0xDE, 0x43, /* 33 */
964 0xD0, 0x33, /* 35 */
965 0x20, /* 37 */
966 0xFA, 0x83, /* 38 */
967 0x25, 0x01, /* 40 */
968 0xFB, 0x16, 0x87, /* 42 */
969 0x4B, 0x16, /* 45 */
970 0x20, /* 47 */
971 0xE6, 0xBD, /* 48 */
972 0xEB, 0x5B, /* 50 */
973 0x4B, 0xCC, /* 52 */
974 0xF9, 0xA2, /* 54 */
975 0xFC, 0x10, 0x3E, /* 56 */
976 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
977 0x20, /* 63 */
978 0xFC, 0x03, 0xAC, /* 64 */
979 0xFF, /* 67 - FF just resets the state without encoding anything */
980 0x01, /* 68 */
981 0xDE, 0x83, /* 69 */
982 0x20, /* 71 */
983 0x09 /* 72 */
984 };
985 UChar expected[]={
986 0xFEFF, 0x0061, 0x0062, 0x0020,
987 0x0063, 0x0061, 0x000D, 0x000A,
988 0x0020, 0x0000, 0x00DF, 0x00E6,
989 0x0930, 0x0020, 0x0918, 0x0909,
990 0x3086, 0x304D, 0x0020, 0x3053,
991 0x4000, 0x4E00, 0x7777, 0x0020,
992 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
993 0x0020, 0xD7A3, 0xDC00, 0xD800,
994 0xD800, 0xDC00, 0xD845, 0xDDDD,
995 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
996 0xDFFF, 0x0001, 0x0E40, 0x0020,
997 0x0009
998 };
999 int32_t offsets[]={
1000 0, 3, 6, 7, /* skip 8, */
1001 10, 11, /* skip 12, */
1002 14, 15, 16, 17, 18,
1003 20, 21, 23, 24, 25, 26, 28, 29,
1004 30, 31, 33, 35, 37, 38,
1005 40, 42, 45, 47, 48,
1006 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1007 63, 64, /* trail */ 64, /* reset only 67, */
1008 68, 69,
1009 71, 72
1010 };
1011
1012 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1013 expected, UPRV_LENGTHOF(expected), "BOCU-1",
1014 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1015 ) {
1016 log_err("BOCU-1->u with skip did not match.\n");
1017 }
1018 }
1019
1020 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1021 {
1022 const uint8_t sampleText[]={
1023 0x61, /* 0 'a' */
1024 0xc0, 0x80, /* 1 non-shortest form */
1025 0xc4, 0xb5, /* 3 U+0135 */
1026 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1027 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1028 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1029 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1030 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1031 0x62, /* 24 'b' */
1032 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1033 0xed, 0xa0, /* 28 incomplete sequence */
1034 0xd0, 0x80 /* 30 U+0400 */
1035 };
1036 UChar expected[]={
1037 0x0061,
1038 /* skip */
1039 0x0135,
1040 0xd020,
1041 0xd801, 0xdc01,
1042 0xe000,
1043 0xdc01,
1044 /* skip */
1045 0x0062,
1046 0xd801,
1047 0x0400
1048 };
1049 int32_t offsets[]={
1050 0,
1051 /* skip 1, */
1052 3,
1053 5,
1054 8, 11,
1055 14,
1056 17,
1057 /* skip 20, 20, */
1058 24,
1059 25,
1060 /* skip 28 */
1061 30
1062 };
1063
1064 /* without offsets */
1065 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1066 expected, UPRV_LENGTHOF(expected), "CESU-8",
1067 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1068 ) {
1069 log_err("CESU-8->u with skip did not match.\n");
1070 }
1071
1072 /* with offsets */
1073 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1074 expected, UPRV_LENGTHOF(expected), "CESU-8",
1075 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1076 ) {
1077 log_err("CESU-8->u with skip did not match.\n");
1078 }
1079 }
1080 }
1081
TestStop(int32_t inputsize,int32_t outputsize)1082 static void TestStop(int32_t inputsize, int32_t outputsize)
1083 {
1084 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1085 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1086
1087 static const uint8_t expstopIBM_949[]= {
1088 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1089
1090 static const uint8_t expstopIBM_943[] = {
1091 0x9f, 0xaf, 0x9f, 0xb1};
1092
1093 static const uint8_t expstopIBM_930[] = {
1094 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1095
1096 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1097 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1098 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1099
1100
1101 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1102 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1103 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1104
1105 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1106 static const int32_t fromIBM943Offs [] = { 0, 2};
1107 static const int32_t fromIBM930Offs [] = { 1, 3};
1108
1109 gInBufferSize = inputsize;
1110 gOutBufferSize = outputsize;
1111
1112 /*From Unicode*/
1113
1114 #if !UCONFIG_NO_LEGACY_CONVERSION
1115 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1116 expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1117 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1118 log_err("u-> ibm-949 with stop did not match.\n");
1119 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1120 expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1121 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1122 log_err("u-> ibm-943 with stop did not match.\n");
1123 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1124 expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1125 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1126 log_err("u-> ibm-930 with stop did not match.\n");
1127
1128 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1129 {
1130 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1131 static const uint8_t toIBM943[]= { 0x61,};
1132 static const int32_t offset[]= {0,} ;
1133
1134 /*EUC_JP*/
1135 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1136 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1137 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1138
1139 /*EUC_TW*/
1140 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1141 static const uint8_t to_euc_tw[]={
1142 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1143 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1144
1145 /*ISO-2022-JP*/
1146 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1147 static const uint8_t to_iso_2022_jp[]={
1148 0x41,
1149
1150 };
1151 static const int32_t from_iso_2022_jpOffs [] ={0,};
1152
1153 /*ISO-2022-cn*/
1154 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1155 static const uint8_t to_iso_2022_cn[]={
1156 0x41,
1157
1158 };
1159 static const int32_t from_iso_2022_cnOffs [] ={
1160 0,0,
1161 2,2,
1162 };
1163
1164 /*ISO-2022-kr*/
1165 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1166 static const uint8_t to_iso_2022_kr[]={
1167 0x1b, 0x24, 0x29, 0x43,
1168 0x41,
1169 0x0e, 0x25, 0x50,
1170 };
1171 static const int32_t from_iso_2022_krOffs [] ={
1172 -1,-1,-1,-1,
1173 0,
1174 1,1,1,
1175 };
1176
1177 /* HZ encoding */
1178 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1179
1180 static const uint8_t to_hz[]={
1181 0x7e, 0x7d, 0x41,
1182 0x7e, 0x7b, 0x26, 0x30,
1183
1184 };
1185 static const int32_t from_hzOffs [] ={
1186 0, 0,0,
1187 1,1,1,1,
1188 };
1189
1190 /*ISCII*/
1191 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1192 static const uint8_t to_iscii[]={
1193 0x41,
1194 };
1195 static const int32_t from_isciiOffs [] ={
1196 0,
1197 };
1198
1199 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1200 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1201 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1202 log_err("u-> ibm-943 with stop did not match.\n");
1203
1204 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1205 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1206 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1207 log_err("u-> euc-jp with stop did not match.\n");
1208
1209 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1210 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1211 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1212 log_err("u-> euc-tw with stop did not match.\n");
1213
1214 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1215 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1216 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1217 log_err("u-> iso-2022-jp with stop did not match.\n");
1218
1219 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1220 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1221 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1222 log_err("u-> iso-2022-jp with stop did not match.\n");
1223
1224 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1225 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1226 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1227 log_err("u-> iso-2022-cn with stop did not match.\n");
1228
1229 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1230 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1231 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1232 log_err("u-> iso-2022-kr with stop did not match.\n");
1233
1234 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1235 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1236 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1237 log_err("u-> HZ with stop did not match.\n");\
1238
1239 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1240 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1241 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1242 log_err("u-> iscii with stop did not match.\n");
1243
1244
1245 }
1246 #endif
1247
1248 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1249 {
1250 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1251
1252 static const uint8_t to_SCSU[]={
1253 0x41,
1254
1255 };
1256 int32_t from_SCSUOffs [] ={
1257 0,
1258
1259 };
1260 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1261 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1262 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1263 log_err("u-> SCSU with skip did not match.\n");
1264
1265 }
1266
1267 /*to Unicode*/
1268
1269 #if !UCONFIG_NO_LEGACY_CONVERSION
1270 if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1271 IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1272 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1273 log_err("ibm-949->u with stop did not match.\n");
1274 if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1275 IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1276 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1277 log_err("ibm-943->u with stop did not match.\n");
1278 if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1279 IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1280 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1281 log_err("ibm-930->u with stop did not match.\n");
1282
1283 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1284 {
1285
1286 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1287 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1288 };
1289 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1290 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1291
1292
1293 /*EUC-JP*/
1294 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1295 0x8f, 0xda, 0xa1, /*unassigned*/
1296 0x8e, 0xe0,
1297 };
1298 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1299 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1300
1301 /*EUC_TW*/
1302 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1303 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1304 0xe6, 0xca, 0x8a,
1305 };
1306 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1307 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1308
1309
1310
1311 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1312 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1313 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1314 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1315
1316 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1317 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1318 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1319 log_err("euc-jp->u with stop did not match.\n");
1320
1321 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1322 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1323 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1324 log_err("euc-tw->u with stop did not match.\n");
1325 }
1326 #endif
1327
1328 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1329 {
1330 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1331 0xe0, 0x80, 0x61,};
1332 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1333 static const int32_t offsets1[] = { 0x0000, 0x0001};
1334
1335 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1336 expected1, UPRV_LENGTHOF(expected1),"utf8",
1337 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1338 log_err("utf8->u with stop did not match.\n");
1339 }
1340 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1341 {
1342 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1343 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1344 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1345
1346 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1347 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1348 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1349 log_err("scsu->u with stop did not match.\n");
1350 }
1351
1352 }
1353
TestSub(int32_t inputsize,int32_t outputsize)1354 static void TestSub(int32_t inputsize, int32_t outputsize)
1355 {
1356 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1357 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1358
1359 static const uint8_t expsubIBM_949[] =
1360 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1361
1362 static const uint8_t expsubIBM_943[] = {
1363 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1364
1365 static const uint8_t expsubIBM_930[] = {
1366 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1367
1368 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1369 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1370 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1371
1372 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1373 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1374 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1375
1376 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1377 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1378 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1379
1380 gInBufferSize = inputsize;
1381 gOutBufferSize = outputsize;
1382
1383 /*from unicode*/
1384
1385 #if !UCONFIG_NO_LEGACY_CONVERSION
1386 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1387 expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1388 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1389 log_err("u-> ibm-949 with subst did not match.\n");
1390 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1391 expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1392 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1393 log_err("u-> ibm-943 with subst did not match.\n");
1394 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1395 expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1396 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1397 log_err("u-> ibm-930 with subst did not match.\n");
1398
1399 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1400 {
1401 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1402 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1403 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1404
1405
1406 /* EUC_JP*/
1407 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1408 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1409 0xf4, 0xfe, 0xf4, 0xfe,
1410 0x61, 0x8e, 0xe0,
1411 };
1412 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1413
1414 /*EUC_TW*/
1415 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1416 static const uint8_t to_euc_tw[]={
1417 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1418 0xfd, 0xfe, 0xfd, 0xfe,
1419 0x61, 0xe6, 0xca, 0x8a,
1420 };
1421
1422 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1423
1424 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1425 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1426 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1427 log_err("u-> ibm-943 with substitute did not match.\n");
1428
1429 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1430 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1431 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1432 log_err("u-> euc-jp with substitute did not match.\n");
1433
1434 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1435 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1436 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1437 log_err("u-> euc-tw with substitute did not match.\n");
1438 }
1439 #endif
1440
1441 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1442 {
1443 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1444
1445 const uint8_t to_SCSU[]={
1446 0x41,
1447 0x0e, 0xff,0xfd,
1448 0x42
1449
1450
1451 };
1452 int32_t from_SCSUOffs [] ={
1453 0,
1454 1,1,1,
1455 2,
1456
1457 };
1458 const uint8_t to_SCSU_1[]={
1459 0x41,
1460
1461 };
1462 int32_t from_SCSUOffs_1 [] ={
1463 0,
1464
1465 };
1466 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1467 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1468 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1469 log_err("u-> SCSU with substitute did not match.\n");
1470
1471 if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1472 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1473 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1474 log_err("u-> SCSU with substitute did not match.\n");
1475 }
1476
1477 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1478 {
1479 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1480 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1481 0xf0, 0x90, 0x90, 0x81,
1482 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1483 0xef, 0xbf, 0xbf, 0x61,
1484
1485 };
1486 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1487 if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1488 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1489 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1490 log_err("u-> utf8 with substitute did not match.\n");
1491 }
1492 }
1493
1494 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1495 {
1496 static const UChar in[]={ 0x0041, 0xfeff };
1497
1498 static const uint8_t out[]={
1499 #if U_IS_BIG_ENDIAN
1500 0xfe, 0xff,
1501 0x00, 0x41,
1502 0xfe, 0xff
1503 #else
1504 0xff, 0xfe,
1505 0x41, 0x00,
1506 0xff, 0xfe
1507 #endif
1508 };
1509 static const int32_t offsets[]={
1510 -1, -1, 0, 0, 1, 1
1511 };
1512
1513 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1514 out, UPRV_LENGTHOF(out), "UTF-16",
1515 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1516 ) {
1517 log_err("u->UTF-16 with substitute did not match.\n");
1518 }
1519 }
1520
1521 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1522 {
1523 static const UChar in[]={ 0x0041, 0xfeff };
1524
1525 static const uint8_t out[]={
1526 #if U_IS_BIG_ENDIAN
1527 0x00, 0x00, 0xfe, 0xff,
1528 0x00, 0x00, 0x00, 0x41,
1529 0x00, 0x00, 0xfe, 0xff
1530 #else
1531 0xff, 0xfe, 0x00, 0x00,
1532 0x41, 0x00, 0x00, 0x00,
1533 0xff, 0xfe, 0x00, 0x00
1534 #endif
1535 };
1536 static const int32_t offsets[]={
1537 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1538 };
1539
1540 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1541 out, UPRV_LENGTHOF(out), "UTF-32",
1542 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1543 ) {
1544 log_err("u->UTF-32 with substitute did not match.\n");
1545 }
1546 }
1547
1548 /*to unicode*/
1549
1550 #if !UCONFIG_NO_LEGACY_CONVERSION
1551 if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1552 IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1553 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1554 log_err("ibm-949->u with substitute did not match.\n");
1555 if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1556 IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1557 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1558 log_err("ibm-943->u with substitute did not match.\n");
1559 if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1560 IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1561 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1562 log_err("ibm-930->u with substitute did not match.\n");
1563
1564 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1565 {
1566
1567 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1568 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1569 };
1570 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1571 };
1572 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1573
1574
1575 /* EUC_JP*/
1576 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1577 0x8f, 0xda, 0xa1, /*unassigned*/
1578 0x8e, 0xe0, 0x8a
1579 };
1580 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1581 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1582
1583 /*EUC_TW*/
1584 const uint8_t sampleTxt_euc_tw[]={
1585 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1586 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1587 0xe6, 0xca, 0x8a,
1588 };
1589 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1590 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1591
1592
1593 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1594 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1595 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1596 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1597
1598
1599 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1600 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1601 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1602 log_err("euc-jp->u with substitute did not match.\n");
1603
1604
1605 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1606 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1607 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1608 log_err("euc-tw->u with substitute did not match.\n");
1609
1610
1611 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1612 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1613 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1614 log_err("euc-jp->u with substitute did not match.\n");
1615 }
1616 #endif
1617
1618 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1619 {
1620 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1621 0xe0, 0x80, 0x61,};
1622 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1623 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1624
1625 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1626 expected1, UPRV_LENGTHOF(expected1),"utf8",
1627 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1628 log_err("utf8->u with substitute did not match.\n");
1629 }
1630 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1631 {
1632 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1633 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1634 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1635
1636 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1637 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1638 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1639 log_err("scsu->u with stop did not match.\n");
1640 }
1641
1642 #if !UCONFIG_NO_LEGACY_CONVERSION
1643 log_verbose("Testing ibm-930 subchar/subchar1\n");
1644 {
1645 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1646 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1647 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1648
1649 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1650 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1651 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1652
1653 if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1654 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1655 ) {
1656 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1657 }
1658
1659 if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1660 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1661 ) {
1662 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1663 }
1664 }
1665
1666 log_verbose("Testing GB 18030 with substitute callbacks\n");
1667 {
1668 static const UChar u2[]={
1669 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1670 static const uint8_t gb2[]={
1671 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1672 static const int32_t offsets2[]={
1673 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1674
1675 if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1676 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1677 ) {
1678 log_err("gb18030->u with substitute did not match.\n");
1679 }
1680 }
1681 #endif
1682
1683 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1684 {
1685 static const uint8_t utf7[]={
1686 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1687 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1688 };
1689 static const UChar unicode[]={
1690 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1691 };
1692 static const int32_t offsets[]={
1693 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1694 };
1695
1696 if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1697 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1698 ) {
1699 log_err("UTF-7->u with substitute did not match.\n");
1700 }
1701 }
1702
1703 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1704 {
1705 static const uint8_t
1706 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1707 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1708 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1709
1710 static const UChar
1711 out1[]={ 0x4e00, 0xfeff },
1712 out2[]={ 0x004e, 0xfffe },
1713 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1714
1715 static const int32_t
1716 offsets1[]={ 2, 4 },
1717 offsets2[]={ 2, 4 },
1718 offsets3[]={ 0, 2, 4 };
1719
1720 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1721 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1722 ) {
1723 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1724 }
1725
1726 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1727 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1728 ) {
1729 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1730 }
1731
1732 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1733 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1734 ) {
1735 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1736 }
1737 }
1738
1739 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1740 {
1741 static const uint8_t
1742 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1743 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1744 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1745 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1746
1747 static const UChar
1748 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1749 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1750 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1751 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1752
1753 static const int32_t
1754 offsets1[]={ 4, 4, 8 },
1755 offsets2[]={ 4, 4, 8 },
1756 offsets3[]={ 0, 4, 4, 8, 12 },
1757 offsets4[]={ 0, 0, 4, 8 };
1758
1759 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1760 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1761 ) {
1762 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1763 }
1764
1765 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1766 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1767 ) {
1768 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1769 }
1770
1771 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1772 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1773 ) {
1774 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1775 }
1776
1777 if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1778 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1779 ) {
1780 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1781 }
1782 }
1783 }
1784
TestSubWithValue(int32_t inputsize,int32_t outputsize)1785 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1786 {
1787 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1788 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1789
1790 const uint8_t expsubwvalIBM_949[]= {
1791 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1792 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1793
1794 const uint8_t expsubwvalIBM_943[]= {
1795 0x9f, 0xaf, 0x9f, 0xb1,
1796 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1797
1798 const uint8_t expsubwvalIBM_930[] = {
1799 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1800
1801 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1802 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1803 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1804
1805 gInBufferSize = inputsize;
1806 gOutBufferSize = outputsize;
1807
1808 /*from Unicode*/
1809
1810 #if !UCONFIG_NO_LEGACY_CONVERSION
1811 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1812 expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1813 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1814 log_err("u-> ibm-949 with subst with value did not match.\n");
1815
1816 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1817 expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1818 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1819 log_err("u-> ibm-943 with sub with value did not match.\n");
1820
1821 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1822 expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1823 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1824 log_err("u-> ibm-930 with subst with value did not match.\n");
1825
1826
1827 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1828 {
1829 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1830 static const uint8_t toIBM943[]= { 0x61,
1831 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1832 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1833 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1834 0x61 };
1835 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1836
1837
1838 /* EUC_JP*/
1839 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1840 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1841 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1842 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1843 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1844 0x61, 0x8e, 0xe0,
1845 };
1846 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1847 3, 3, 3, 3, 3, 3,
1848 3, 3, 3, 3, 3, 3,
1849 5, 5, 5, 5, 5, 5,
1850 6, 7, 7,
1851 };
1852
1853 /*EUC_TW*/
1854 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1855 static const uint8_t to_euc_tw[]={
1856 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1857 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1858 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1859 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1860 0x61, 0xe6, 0xca, 0x8a,
1861 };
1862 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1863 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1864 6, 7, 7, 8,
1865 };
1866 /*ISO-2022-JP*/
1867 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1868 static const uint8_t to_iso_2022_jp1[]={
1869 0x1b, 0x24, 0x42, 0x21, 0x21,
1870 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1871 0x1b, 0x24, 0x42, 0x21, 0x22,
1872 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1873 0x42,
1874 };
1875
1876 static const int32_t from_iso_2022_jpOffs1 [] ={
1877 0,0,0,0,0,
1878 1,1,1,1,1,1,1,1,1,
1879 2,2,2,2,2,
1880 3,3,3,3,3,3,3,3,3,
1881 4,
1882 };
1883 /* surrogate pair*/
1884 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1885 static const uint8_t to_iso_2022_jp2[]={
1886 0x1b, 0x24, 0x42, 0x21, 0x21,
1887 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1888 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1889 0x1b, 0x24, 0x42, 0x21, 0x22,
1890 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1891 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1892 0x42,
1893 };
1894 static const int32_t from_iso_2022_jpOffs2 [] ={
1895 0,0,0,0,0,
1896 1,1,1,1,1,1,1,1,1,
1897 1,1,1,1,1,1,
1898 3,3,3,3,3,
1899 4,4,4,4,4,4,4,4,4,
1900 4,4,4,4,4,4,
1901 6,
1902 };
1903
1904 /*ISO-2022-cn*/
1905 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1906 static const uint8_t to_iso_2022_cn[]={
1907 0x41,
1908 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1909 0x42,
1910 };
1911 static const int32_t from_iso_2022_cnOffs [] ={
1912 0,
1913 1,1,1,1,1,1,
1914 2,
1915 };
1916
1917 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1918
1919 static const uint8_t to_iso_2022_cn4[]={
1920 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1921 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1922 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1923 0x0e, 0x21, 0x22,
1924 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1925 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1926 0x42,
1927 };
1928 static const int32_t from_iso_2022_cnOffs4 [] ={
1929 0,0,0,0,0,0,0,
1930 1,1,1,1,1,1,1,
1931 1,1,1,1,1,1,
1932 3,3,3,
1933 4,4,4,4,4,4,4,
1934 4,4,4,4,4,4,
1935 6
1936
1937 };
1938
1939 /*ISO-2022-kr*/
1940 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1941 static const uint8_t to_iso_2022_kr2[]={
1942 0x1b, 0x24, 0x29, 0x43,
1943 0x41,
1944 0x0e, 0x25, 0x50,
1945 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1946 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1947 0x0e, 0x25, 0x50,
1948 0x0f, 0x42,
1949 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1950 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1951 0x43
1952 };
1953 static const int32_t from_iso_2022_krOffs2 [] ={
1954 -1,-1,-1,-1,
1955 0,
1956 1,1,1,
1957 2,2,2,2,2,2,2,
1958 2,2,2,2,2,2,
1959 4,4,4,
1960 5,5,
1961 6,6,6,6,6,6,
1962 6,6,6,6,6,6,
1963 8,
1964 };
1965
1966 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1967 static const uint8_t to_iso_2022_kr[]={
1968 0x1b, 0x24, 0x29, 0x43,
1969 0x41,
1970 0x0e, 0x25, 0x50,
1971 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1972 0x0e, 0x25, 0x50,
1973 0x0f, 0x42,
1974 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1975 0x43
1976 };
1977
1978
1979 static const int32_t from_iso_2022_krOffs [] ={
1980 -1,-1,-1,-1,
1981 0,
1982 1,1,1,
1983 2,2,2,2,2,2,2,
1984 3,3,3,
1985 4,4,
1986 5,5,5,5,5,5,
1987 6,
1988 };
1989 /* HZ encoding */
1990 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1991
1992 static const uint8_t to_hz[]={
1993 0x7e, 0x7d, 0x41,
1994 0x7e, 0x7b, 0x26, 0x30,
1995 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1996 0x7e, 0x7b, 0x26, 0x30,
1997 0x7e, 0x7d, 0x42,
1998
1999 };
2000 static const int32_t from_hzOffs [] ={
2001 0,0,0,
2002 1,1,1,1,
2003 2,2,2,2,2,2,2,2,
2004 3,3,3,3,
2005 4,4,4
2006 };
2007
2008 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2009 static const uint8_t to_hz2[]={
2010 0x7e, 0x7d, 0x41,
2011 0x7e, 0x7b, 0x26, 0x30,
2012 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2013 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2014 0x7e, 0x7b, 0x26, 0x30,
2015 0x7e, 0x7d, 0x42,
2016 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2017 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2018 0x43
2019 };
2020 static const int32_t from_hzOffs2 [] ={
2021 0,0,0,
2022 1,1,1,1,
2023 2,2,2,2,2,2,2,2,
2024 2,2,2,2,2,2,
2025 4,4,4,4,
2026 5,5,5,
2027 6,6,6,6,6,6,
2028 6,6,6,6,6,6,
2029 8,
2030 };
2031
2032 /*ISCII*/
2033 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2034 static const uint8_t to_iscii[]={
2035 0x41,
2036 0xef, 0x42, 0xa1,
2037 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2038 0xa2,
2039 0x42,
2040 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2041 0x43
2042 };
2043
2044
2045 static const int32_t from_isciiOffs [] ={
2046 0,
2047 1,1,1,
2048 2,2,2,2,2,2,
2049 3,
2050 4,
2051 5,5,5,5,5,5,
2052 6,
2053 };
2054
2055 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2056 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2057 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2058 log_err("u-> ibm-943 with subst with value did not match.\n");
2059
2060 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2061 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2062 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2063 log_err("u-> euc-jp with subst with value did not match.\n");
2064
2065 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2066 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2067 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2068 log_err("u-> euc-tw with subst with value did not match.\n");
2069
2070 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2071 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2072 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2073 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2074
2075 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2076 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2077 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2078 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2079
2080 if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2081 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2082 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2083 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2084 /*ESCAPE OPTIONS*/
2085 {
2086 /* surrogate pair*/
2087 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2088 static const uint8_t to_iso_2022_jp3_v2[]={
2089 0x1b, 0x24, 0x42, 0x21, 0x21,
2090 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2091
2092 0x1b, 0x24, 0x42, 0x21, 0x22,
2093 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2094
2095 0x42,
2096 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2097 };
2098
2099 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2100 0,0,0,0,0,
2101 1,1,1,1,1,1,1,1,1,1,1,1,
2102
2103 3,3,3,3,3,
2104 4,4,4,4,4,4,4,4,4,4,4,4,
2105
2106 6,
2107 7,7,7,7,7,7,7,7,7
2108 };
2109
2110 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2111 to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2112 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2113 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2114 }
2115 {
2116 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2117 static const uint8_t to_iso_2022_cn5_v2[]={
2118 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2119 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2120 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2121 0x0e, 0x21, 0x22,
2122 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2123 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2124 0x42,
2125 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2126 };
2127 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2128 0,0,0,0,0,0,0,
2129 1,1,1,1,1,1,1,
2130 1,1,1,1,1,1,
2131 3,3,3,
2132 4,4,4,4,4,4,4,
2133 4,4,4,4,4,4,
2134 6,
2135 7,7,7,7,7,7
2136 };
2137 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2138 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2139 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2140 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2141
2142 }
2143 {
2144 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2145 static const uint8_t to_iso_2022_cn6_v2[]={
2146 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2147 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2148 0x0e, 0x21, 0x22,
2149 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2150 0x42,
2151 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2152 };
2153 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2154 0, 0, 0, 0, 0, 0, 0,
2155 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2156 3, 3, 3,
2157 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2158 6,
2159 7, 7, 7, 7, 7, 7, 7, 7,
2160 };
2161 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2162 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2163 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2164 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2165
2166 }
2167 {
2168 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2169 static const uint8_t to_iso_2022_cn7_v2[]={
2170 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2171 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2172 0x0e, 0x21, 0x22,
2173 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2174 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2175 };
2176 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2177 0, 0, 0, 0, 0, 0, 0,
2178 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2179 3, 3, 3,
2180 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2181 6,
2182 7, 7, 7, 7, 7, 7,
2183 };
2184 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2185 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2186 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2187 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2188
2189 }
2190 {
2191 static const UChar iso_2022_cn_inputText8[]={
2192 0x3000,
2193 0xD84D, 0xDC56,
2194 0x3001,
2195 0xD84D, 0xDC56,
2196 0xDBFF, 0xDFFF,
2197 0x0042,
2198 0x0902};
2199 static const uint8_t to_iso_2022_cn8_v2[]={
2200 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2201 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2202 0x0e, 0x21, 0x22,
2203 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2204 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2205 0x42,
2206 0x5c, 0x39, 0x30, 0x32, 0x20
2207 };
2208 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2209 0, 0, 0, 0, 0, 0, 0,
2210 1, 1, 1, 1, 1, 1, 1, 1,
2211 3, 3, 3,
2212 4, 4, 4, 4, 4, 4, 4, 4,
2213 6, 6, 6, 6, 6, 6, 6, 6,
2214 8,
2215 9, 9, 9, 9, 9
2216 };
2217 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2218 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2219 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2220 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2221
2222 }
2223 {
2224 static const uint8_t to_iso_2022_cn4_v3[]={
2225 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2226 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2227 0x0e, 0x21, 0x22,
2228 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2229 0x42
2230 };
2231
2232
2233 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2234 0,0,0,0,0,0,0,
2235 1,1,1,1,1,1,1,1,1,1,1,
2236
2237 3,3,3,
2238 4,4,4,4,4,4,4,4,4,4,4,
2239
2240 6
2241
2242 };
2243 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2244 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2245 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2246 {
2247 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2248 }
2249 }
2250 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2251 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2252 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2253 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2254
2255 if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2256 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2257 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2258 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2259 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2260 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2261 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2262 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2263 if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2264 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2265 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2266 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2267 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2268 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2269 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2270 log_err("u-> hz with subst with value did not match.\n");
2271 if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2272 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2273 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2274 log_err("u-> hz with subst with value did not match.\n");
2275
2276 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2277 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2278 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2279 log_err("u-> iscii with subst with value did not match.\n");
2280 }
2281 #endif
2282
2283 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2284 /*to Unicode*/
2285 {
2286 #if !UCONFIG_NO_LEGACY_CONVERSION
2287 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2288 0x81, 0xad, /*unassigned*/
2289 0x89, 0xd3 };
2290 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2291 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2292 0x7B87};
2293 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2294
2295 /* EUC_JP*/
2296 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2297 0x8f, 0xda, 0xa1, /*unassigned*/
2298 0x8e, 0xe0,
2299 };
2300 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2301 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2302 0x00a2 };
2303 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2304 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2305 9,
2306 };
2307
2308 /*EUC_TW*/
2309 static const uint8_t sampleTxt_euc_tw[]={
2310 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2311 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2312 0xe6, 0xca, 0x8a,
2313 };
2314 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2315 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2316 0x8706, 0x8a, };
2317 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2318 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2319 11, 13};
2320
2321 /*iso-2022-jp*/
2322 static const uint8_t sampleTxt_iso_2022_jp[]={
2323 0x1b, 0x28, 0x42, 0x41,
2324 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2325 0x1b, 0x28, 0x42, 0x42,
2326
2327 };
2328 /* A % X 3 A % X 1 A B */
2329 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2330 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2331
2332 /*iso-2022-cn*/
2333 static const uint8_t sampleTxt_iso_2022_cn[]={
2334 0x0f, 0x41, 0x44,
2335 0x1B, 0x24, 0x29, 0x47,
2336 0x0E, 0x40, 0x6c, /*unassigned*/
2337 0x0f, 0x42,
2338
2339 };
2340 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2341 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2342
2343 /*iso-2022-kr*/
2344 static const uint8_t sampleTxt_iso_2022_kr[]={
2345 0x1b, 0x24, 0x29, 0x43,
2346 0x41,
2347 0x0E, 0x7f, 0x1E,
2348 0x0e, 0x25, 0x50,
2349 0x0f, 0x51,
2350 0x42, 0x43,
2351
2352 };
2353 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2354 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2355
2356 /*hz*/
2357 static const uint8_t sampleTxt_hz[]={
2358 0x41,
2359 0x7e, 0x7b, 0x26, 0x30,
2360 0x7f, 0x1E, /*unassigned*/
2361 0x26, 0x30,
2362 0x7e, 0x7d, 0x42,
2363 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2364 0x7e, 0x7d, 0x42,
2365 };
2366 static const UChar hztoUnicode[]={
2367 0x41,
2368 0x03a0,
2369 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2370 0x03A0,
2371 0x42,
2372 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2373 0x42,};
2374
2375 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2376
2377
2378 /*iscii*/
2379 static const uint8_t sampleTxt_iscii[]={
2380 0x41,
2381 0x30,
2382 0xEB, /*unassigned*/
2383 0xa3,
2384 0x42,
2385 0xEC, /*unassigned*/
2386 0x42,
2387 };
2388 static const UChar isciitoUnicode[]={
2389 0x41,
2390 0x30,
2391 0x25, 0x58, 0x45, 0x42,
2392 0x0903,
2393 0x42,
2394 0x25, 0x58, 0x45, 0x43,
2395 0x42,};
2396
2397 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2398 #endif
2399
2400 /*UTF8*/
2401 static const uint8_t sampleTxtUTF8[]={
2402 0x20, 0x64, 0x50,
2403 0xC2, 0x7E, /* truncated char */
2404 0x20,
2405 0xE0, 0xB5, 0x7E, /* truncated char */
2406 0x40,
2407 };
2408 static const UChar UTF8ToUnicode[]={
2409 0x0020, 0x0064, 0x0050,
2410 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2411 0x0020,
2412 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2413 0x0040
2414 };
2415 static const int32_t fromUTF8[] = {
2416 0, 1, 2,
2417 3, 3, 3, 3, 4,
2418 5,
2419 6, 6, 6, 6, 6, 6, 6, 6, 8,
2420 9
2421 };
2422 static const UChar UTF8ToUnicodeXML_DEC[]={
2423 0x0020, 0x0064, 0x0050,
2424 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2425 0x0020,
2426 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2427 0x0040
2428 };
2429 static const int32_t fromUTF8XML_DEC[] = {
2430 0, 1, 2,
2431 3, 3, 3, 3, 3, 3, 4,
2432 5,
2433 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2434 9
2435 };
2436
2437
2438 #if !UCONFIG_NO_LEGACY_CONVERSION
2439 if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2440 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2441 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2442 log_err("ibm-943->u with substitute with value did not match.\n");
2443
2444 if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2445 EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2446 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2447 log_err("euc-jp->u with substitute with value did not match.\n");
2448
2449 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2450 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2451 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2452 log_err("euc-tw->u with substitute with value did not match.\n");
2453
2454 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2455 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2456 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2457 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2458
2459 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2460 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2461 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2462 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2463
2464 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2465 {
2466 static const UChar iso_2022_jptoUnicodeDec[]={
2467 0x0041,
2468 /* & # 5 8 ; */
2469 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2470 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2471 0x0042 };
2472 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2473 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2474 iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2475 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2476 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2477 }
2478 {
2479 static const UChar iso_2022_jptoUnicodeHex[]={
2480 0x0041,
2481 /* & # x 3 A ; */
2482 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2483 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2484 0x0042 };
2485 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2486 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2487 iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2488 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2489 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2490 }
2491 {
2492 static const UChar iso_2022_jptoUnicodeC[]={
2493 0x0041,
2494 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2495 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2496 0x0042 };
2497 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2498 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2499 iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2500 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2501 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2502 }
2503 }
2504 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2505 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2506 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2507 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2508
2509 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2510 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2511 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2512 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2513
2514 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2515 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2516 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2517 log_err("hz->u with substitute with value did not match.\n");
2518
2519 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2520 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2521 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2522 log_err("ISCII ->u with substitute with value did not match.\n");
2523 #endif
2524
2525 if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2526 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2527 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2528 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2529 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2530 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2531 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2532 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2533 }
2534 }
2535
2536 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2537 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2538 {
2539 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2540 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2541 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2542
2543
2544 static const uint8_t text943[] = {
2545 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2546 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2547 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2548 static const UChar toUnicode943stop[]= { 0x304b};
2549
2550 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2551 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2552 static const int32_t fromIBM943Offsstop[] = { 0};
2553
2554 gInBufferSize = inputsize;
2555 gOutBufferSize = outputsize;
2556 /*checking with a legal value*/
2557 if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2558 templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2559 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2560 log_err("u-> ibm-949 with skip did not match.\n");
2561
2562 /*checking illegal value for ibm-943 with substitute*/
2563 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2564 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2565 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2566 log_err("ibm-943->u with subst did not match.\n");
2567 /*checking illegal value for ibm-943 with skip */
2568 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2569 toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2570 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2571 log_err("ibm-943->u with skip did not match.\n");
2572
2573 /*checking illegal value for ibm-943 with stop */
2574 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2575 toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2576 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2577 log_err("ibm-943->u with stop did not match.\n");
2578
2579 }
2580
TestSingleByte(int32_t inputsize,int32_t outputsize)2581 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2582 {
2583 static const uint8_t sampleText[] = {
2584 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2585 0xff, 0x32, 0x33};
2586 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2587 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2588 /*checking illegal value for ibm-943 with substitute*/
2589 gInBufferSize = inputsize;
2590 gOutBufferSize = outputsize;
2591
2592 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2593 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2594 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2595 log_err("ibm-943->u with subst did not match.\n");
2596 }
2597
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2598 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2599 {
2600 /*EBCDIC_STATEFUL*/
2601 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2602 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2603 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2604 /* s SO doubl SI sng s SO fe fe SI s */
2605
2606 /*EBCDIC_STATEFUL with subChar=3f*/
2607 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2608 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2609 static const char mySubChar[]={ 0x3f};
2610
2611 gInBufferSize = inputsize;
2612 gOutBufferSize = outputsize;
2613
2614 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2615 toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2616 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2617 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2618
2619 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2620 toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2621 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2622 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2623 }
2624 #endif
2625
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2626 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2627 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2628 const char *mySubChar, int8_t len)
2629 {
2630
2631
2632 UErrorCode status = U_ZERO_ERROR;
2633 UConverter *conv = 0;
2634 char junkout[NEW_MAX_BUFFER]; /* FIX */
2635 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2636 const UChar *src;
2637 char *end;
2638 char *targ;
2639 int32_t *offs;
2640 int i;
2641 int32_t realBufferSize;
2642 char *realBufferEnd;
2643 const UChar *realSourceEnd;
2644 const UChar *sourceLimit;
2645 UBool checkOffsets = true;
2646 UBool doFlush;
2647 char junk[9999];
2648 char offset_str[9999];
2649 char *p;
2650 UConverterFromUCallback oldAction = NULL;
2651 const void* oldContext = NULL;
2652
2653
2654 for(i=0;i<NEW_MAX_BUFFER;i++)
2655 junkout[i] = (char)0xF0;
2656 for(i=0;i<NEW_MAX_BUFFER;i++)
2657 junokout[i] = 0xFF;
2658 setNuConvTestName(codepage, "FROM");
2659
2660 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2661 gOutBufferSize);
2662
2663 conv = ucnv_open(codepage, &status);
2664 if(U_FAILURE(status))
2665 {
2666 log_data_err("Couldn't open converter %s\n",codepage);
2667 return true;
2668 }
2669
2670 log_verbose("Converter opened..\n");
2671
2672 /*----setting the callback routine----*/
2673 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2674 if (U_FAILURE(status))
2675 {
2676 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2677 }
2678 /*------------------------*/
2679 /*setting the subChar*/
2680 if(mySubChar != NULL){
2681 ucnv_setSubstChars(conv, mySubChar, len, &status);
2682 if (U_FAILURE(status)) {
2683 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2684 }
2685 }
2686 /*------------*/
2687
2688 src = source;
2689 targ = junkout;
2690 offs = junokout;
2691
2692 realBufferSize = UPRV_LENGTHOF(junkout);
2693 realBufferEnd = junkout + realBufferSize;
2694 realSourceEnd = source + sourceLen;
2695
2696 if ( gOutBufferSize != realBufferSize )
2697 checkOffsets = false;
2698
2699 if( gInBufferSize != NEW_MAX_BUFFER )
2700 checkOffsets = false;
2701
2702 do
2703 {
2704 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2705 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2706
2707 doFlush = (UBool)(sourceLimit == realSourceEnd);
2708
2709 if(targ == realBufferEnd)
2710 {
2711 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2712 return false;
2713 }
2714 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
2715
2716
2717 status = U_ZERO_ERROR;
2718
2719 ucnv_fromUnicode (conv,
2720 (char **)&targ,
2721 (const char *)end,
2722 &src,
2723 sourceLimit,
2724 checkOffsets ? offs : NULL,
2725 doFlush, /* flush if we're at the end of the input data */
2726 &status);
2727 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2728
2729
2730 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2731 UChar errChars[50]; /* should be sufficient */
2732 int8_t errLen = 50;
2733 UErrorCode err = U_ZERO_ERROR;
2734 const UChar* start= NULL;
2735 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2736 if(U_FAILURE(err)){
2737 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2738 }
2739 /* length of in invalid chars should be equal to returned length*/
2740 start = src - errLen;
2741 if(u_strncmp(errChars,start,errLen)!=0){
2742 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2743 }
2744 }
2745 /* allow failure codes for the stop callback */
2746 if(U_FAILURE(status) &&
2747 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2748 {
2749 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2750 return false;
2751 }
2752
2753 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2754 sourceLen, targ-junkout);
2755 if(getTestOption(VERBOSITY_OPTION))
2756 {
2757
2758 junk[0] = 0;
2759 offset_str[0] = 0;
2760 for(p = junkout;p<targ;p++)
2761 {
2762 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2763 snprintf(offset_str + strlen(offset_str), sizeof(offset_str) - strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2764 }
2765
2766 log_verbose(junk);
2767 printSeq(expect, expectLen);
2768 if ( checkOffsets )
2769 {
2770 log_verbose("\nOffsets:");
2771 log_verbose(offset_str);
2772 }
2773 log_verbose("\n");
2774 }
2775 ucnv_close(conv);
2776
2777
2778 if(expectLen != targ-junkout)
2779 {
2780 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2781 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2782 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2783 printSeqErr(expect, expectLen);
2784 return false;
2785 }
2786
2787 if (checkOffsets && (expectOffsets != 0) )
2788 {
2789 log_verbose("comparing %d offsets..\n", targ-junkout);
2790 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2791 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2792 log_err("Got Output : ");
2793 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2794 log_err("Got Offsets: ");
2795 for(p=junkout;p<targ;p++)
2796 log_err("%d,", junokout[p-junkout]);
2797 log_err("\n");
2798 log_err("Expected Offsets: ");
2799 for(i=0; i<(targ-junkout); i++)
2800 log_err("%d,", expectOffsets[i]);
2801 log_err("\n");
2802 return false;
2803 }
2804 }
2805
2806 if(!memcmp(junkout, expect, expectLen))
2807 {
2808 log_verbose("String matches! %s\n", gNuConvTestName);
2809 return true;
2810 }
2811 else
2812 {
2813 log_err("String does not match. %s\n", gNuConvTestName);
2814 log_err("source: ");
2815 printUSeqErr(source, sourceLen);
2816 log_err("Got: ");
2817 printSeqErr((const uint8_t *)junkout, expectLen);
2818 log_err("Expected: ");
2819 printSeqErr(expect, expectLen);
2820 return false;
2821 }
2822 }
2823
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2824 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2825 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2826 const char *mySubChar, int8_t len)
2827 {
2828 UErrorCode status = U_ZERO_ERROR;
2829 UConverter *conv = 0;
2830 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2831 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2832 const char *src;
2833 const char *realSourceEnd;
2834 const char *srcLimit;
2835 UChar *targ;
2836 UChar *end;
2837 int32_t *offs;
2838 int i;
2839 UBool checkOffsets = true;
2840 char junk[9999];
2841 char offset_str[9999];
2842 UChar *p;
2843 UConverterToUCallback oldAction = NULL;
2844 const void* oldContext = NULL;
2845
2846 int32_t realBufferSize;
2847 UChar *realBufferEnd;
2848
2849
2850 for(i=0;i<NEW_MAX_BUFFER;i++)
2851 junkout[i] = 0xFFFE;
2852
2853 for(i=0;i<NEW_MAX_BUFFER;i++)
2854 junokout[i] = -1;
2855
2856 setNuConvTestName(codepage, "TO");
2857
2858 log_verbose("\n========= %s\n", gNuConvTestName);
2859
2860 conv = ucnv_open(codepage, &status);
2861 if(U_FAILURE(status))
2862 {
2863 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2864 return true;
2865 }
2866
2867 log_verbose("Converter opened..\n");
2868
2869 src = (const char *)source;
2870 targ = junkout;
2871 offs = junokout;
2872
2873 realBufferSize = UPRV_LENGTHOF(junkout);
2874 realBufferEnd = junkout + realBufferSize;
2875 realSourceEnd = src + sourcelen;
2876 /*----setting the callback routine----*/
2877 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2878 if (U_FAILURE(status))
2879 {
2880 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2881 }
2882 /*-------------------------------------*/
2883 /*setting the subChar*/
2884 if(mySubChar != NULL){
2885 ucnv_setSubstChars(conv, mySubChar, len, &status);
2886 if (U_FAILURE(status)) {
2887 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2888 }
2889 }
2890 /*------------*/
2891
2892
2893 if ( gOutBufferSize != realBufferSize )
2894 checkOffsets = false;
2895
2896 if( gInBufferSize != NEW_MAX_BUFFER )
2897 checkOffsets = false;
2898
2899 do
2900 {
2901 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2902 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2903
2904 if(targ == realBufferEnd)
2905 {
2906 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2907 return false;
2908 }
2909 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2910
2911
2912
2913 status = U_ZERO_ERROR;
2914
2915 ucnv_toUnicode (conv,
2916 &targ,
2917 end,
2918 (const char **)&src,
2919 (const char *)srcLimit,
2920 checkOffsets ? offs : NULL,
2921 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2922 &status);
2923 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2924
2925 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2926 char errChars[50]; /* should be sufficient */
2927 int8_t errLen = 50;
2928 UErrorCode err = U_ZERO_ERROR;
2929 const char* start= NULL;
2930 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2931 if(U_FAILURE(err)){
2932 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2933 }
2934 /* length of in invalid chars should be equal to returned length*/
2935 start = src - errLen;
2936 if(uprv_strncmp(errChars,start,errLen)!=0){
2937 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2938 }
2939 }
2940 /* allow failure codes for the stop callback */
2941 if(U_FAILURE(status) &&
2942 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2943 {
2944 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2945 return false;
2946 }
2947
2948 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2949 sourcelen, targ-junkout);
2950 if(getTestOption(VERBOSITY_OPTION))
2951 {
2952
2953 junk[0] = 0;
2954 offset_str[0] = 0;
2955
2956 for(p = junkout;p<targ;p++)
2957 {
2958 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2959 snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2960 }
2961
2962 log_verbose(junk);
2963 printUSeq(expect, expectlen);
2964 if ( checkOffsets )
2965 {
2966 log_verbose("\nOffsets:");
2967 log_verbose(offset_str);
2968 }
2969 log_verbose("\n");
2970 }
2971 ucnv_close(conv);
2972
2973 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2974
2975 if (checkOffsets && (expectOffsets != 0))
2976 {
2977 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2978 {
2979 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2980 log_err("Got offsets: ");
2981 for(p=junkout;p<targ;p++)
2982 log_err(" %2d,", junokout[p-junkout]);
2983 log_err("\n");
2984 log_err("Expected offsets: ");
2985 for(i=0; i<(targ-junkout); i++)
2986 log_err(" %2d,", expectOffsets[i]);
2987 log_err("\n");
2988 log_err("Got output: ");
2989 for(i=0; i<(targ-junkout); i++)
2990 log_err("0x%04x,", junkout[i]);
2991 log_err("\n");
2992 log_err("From source: ");
2993 for(i=0; i<(src-(const char *)source); i++)
2994 log_err(" 0x%02x,", (unsigned char)source[i]);
2995 log_err("\n");
2996 }
2997 }
2998
2999 if(!memcmp(junkout, expect, expectlen*2))
3000 {
3001 log_verbose("Matches!\n");
3002 return true;
3003 }
3004 else
3005 {
3006 log_err("String does not match. %s\n", gNuConvTestName);
3007 log_verbose("String does not match. %s\n", gNuConvTestName);
3008 log_err("Got: ");
3009 printUSeqErr(junkout, expectlen);
3010 log_err("Expected: ");
3011 printUSeqErr(expect, expectlen);
3012 log_err("\n");
3013 return false;
3014 }
3015 }
3016
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3017 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3018 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3019 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3020 {
3021
3022
3023 UErrorCode status = U_ZERO_ERROR;
3024 UConverter *conv = 0;
3025 char junkout[NEW_MAX_BUFFER]; /* FIX */
3026 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3027 const UChar *src;
3028 char *end;
3029 char *targ;
3030 int32_t *offs;
3031 int i;
3032 int32_t realBufferSize;
3033 char *realBufferEnd;
3034 const UChar *realSourceEnd;
3035 const UChar *sourceLimit;
3036 UBool checkOffsets = true;
3037 UBool doFlush;
3038 char junk[9999];
3039 char offset_str[9999];
3040 char *p;
3041 UConverterFromUCallback oldAction = NULL;
3042 const void* oldContext = NULL;
3043
3044
3045 for(i=0;i<NEW_MAX_BUFFER;i++)
3046 junkout[i] = (char)0xF0;
3047 for(i=0;i<NEW_MAX_BUFFER;i++)
3048 junokout[i] = 0xFF;
3049 setNuConvTestName(codepage, "FROM");
3050
3051 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3052 gOutBufferSize);
3053
3054 conv = ucnv_open(codepage, &status);
3055 if(U_FAILURE(status))
3056 {
3057 log_data_err("Couldn't open converter %s\n",codepage);
3058 return true; /* Because the err has already been logged. */
3059 }
3060
3061 log_verbose("Converter opened..\n");
3062
3063 /*----setting the callback routine----*/
3064 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3065 if (U_FAILURE(status))
3066 {
3067 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3068 }
3069 /*------------------------*/
3070 /*setting the subChar*/
3071 if(mySubChar != NULL){
3072 ucnv_setSubstChars(conv, mySubChar, len, &status);
3073 if (U_FAILURE(status)) {
3074 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3075 }
3076 }
3077 /*------------*/
3078
3079 src = source;
3080 targ = junkout;
3081 offs = junokout;
3082
3083 realBufferSize = UPRV_LENGTHOF(junkout);
3084 realBufferEnd = junkout + realBufferSize;
3085 realSourceEnd = source + sourceLen;
3086
3087 if ( gOutBufferSize != realBufferSize )
3088 checkOffsets = false;
3089
3090 if( gInBufferSize != NEW_MAX_BUFFER )
3091 checkOffsets = false;
3092
3093 do
3094 {
3095 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3096 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3097
3098 doFlush = (UBool)(sourceLimit == realSourceEnd);
3099
3100 if(targ == realBufferEnd)
3101 {
3102 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3103 return false;
3104 }
3105 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
3106
3107
3108 status = U_ZERO_ERROR;
3109
3110 ucnv_fromUnicode (conv,
3111 (char **)&targ,
3112 (const char *)end,
3113 &src,
3114 sourceLimit,
3115 checkOffsets ? offs : NULL,
3116 doFlush, /* flush if we're at the end of the input data */
3117 &status);
3118 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3119
3120 /* allow failure codes for the stop callback */
3121 if(U_FAILURE(status) && status != expectedError)
3122 {
3123 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3124 return false;
3125 }
3126
3127 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3128 sourceLen, targ-junkout);
3129 if(getTestOption(VERBOSITY_OPTION))
3130 {
3131
3132 junk[0] = 0;
3133 offset_str[0] = 0;
3134 for(p = junkout;p<targ;p++)
3135 {
3136 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3137 snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3138 }
3139
3140 log_verbose(junk);
3141 printSeq(expect, expectLen);
3142 if ( checkOffsets )
3143 {
3144 log_verbose("\nOffsets:");
3145 log_verbose(offset_str);
3146 }
3147 log_verbose("\n");
3148 }
3149 ucnv_close(conv);
3150
3151
3152 if(expectLen != targ-junkout)
3153 {
3154 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3155 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3156 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3157 printSeqErr(expect, expectLen);
3158 return false;
3159 }
3160
3161 if (checkOffsets && (expectOffsets != 0) )
3162 {
3163 log_verbose("comparing %d offsets..\n", targ-junkout);
3164 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3165 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3166 log_err("Got Output : ");
3167 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3168 log_err("Got Offsets: ");
3169 for(p=junkout;p<targ;p++)
3170 log_err("%d,", junokout[p-junkout]);
3171 log_err("\n");
3172 log_err("Expected Offsets: ");
3173 for(i=0; i<(targ-junkout); i++)
3174 log_err("%d,", expectOffsets[i]);
3175 log_err("\n");
3176 return false;
3177 }
3178 }
3179
3180 if(!memcmp(junkout, expect, expectLen))
3181 {
3182 log_verbose("String matches! %s\n", gNuConvTestName);
3183 return true;
3184 }
3185 else
3186 {
3187 log_err("String does not match. %s\n", gNuConvTestName);
3188 log_err("source: ");
3189 printUSeqErr(source, sourceLen);
3190 log_err("Got: ");
3191 printSeqErr((const uint8_t *)junkout, expectLen);
3192 log_err("Expected: ");
3193 printSeqErr(expect, expectLen);
3194 return false;
3195 }
3196 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3197 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3198 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3199 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3200 {
3201 UErrorCode status = U_ZERO_ERROR;
3202 UConverter *conv = 0;
3203 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3204 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3205 const char *src;
3206 const char *realSourceEnd;
3207 const char *srcLimit;
3208 UChar *targ;
3209 UChar *end;
3210 int32_t *offs;
3211 int i;
3212 UBool checkOffsets = true;
3213 char junk[9999];
3214 char offset_str[9999];
3215 UChar *p;
3216 UConverterToUCallback oldAction = NULL;
3217 const void* oldContext = NULL;
3218
3219 int32_t realBufferSize;
3220 UChar *realBufferEnd;
3221
3222
3223 for(i=0;i<NEW_MAX_BUFFER;i++)
3224 junkout[i] = 0xFFFE;
3225
3226 for(i=0;i<NEW_MAX_BUFFER;i++)
3227 junokout[i] = -1;
3228
3229 setNuConvTestName(codepage, "TO");
3230
3231 log_verbose("\n========= %s\n", gNuConvTestName);
3232
3233 conv = ucnv_open(codepage, &status);
3234 if(U_FAILURE(status))
3235 {
3236 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3237 return true;
3238 }
3239
3240 log_verbose("Converter opened..\n");
3241
3242 src = (const char *)source;
3243 targ = junkout;
3244 offs = junokout;
3245
3246 realBufferSize = UPRV_LENGTHOF(junkout);
3247 realBufferEnd = junkout + realBufferSize;
3248 realSourceEnd = src + sourcelen;
3249 /*----setting the callback routine----*/
3250 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3251 if (U_FAILURE(status))
3252 {
3253 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3254 }
3255 /*-------------------------------------*/
3256 /*setting the subChar*/
3257 if(mySubChar != NULL){
3258 ucnv_setSubstChars(conv, mySubChar, len, &status);
3259 if (U_FAILURE(status)) {
3260 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3261 }
3262 }
3263 /*------------*/
3264
3265
3266 if ( gOutBufferSize != realBufferSize )
3267 checkOffsets = false;
3268
3269 if( gInBufferSize != NEW_MAX_BUFFER )
3270 checkOffsets = false;
3271
3272 do
3273 {
3274 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3275 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3276
3277 if(targ == realBufferEnd)
3278 {
3279 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3280 return false;
3281 }
3282 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3283
3284
3285
3286 status = U_ZERO_ERROR;
3287
3288 ucnv_toUnicode (conv,
3289 &targ,
3290 end,
3291 (const char **)&src,
3292 (const char *)srcLimit,
3293 checkOffsets ? offs : NULL,
3294 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3295 &status);
3296 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3297
3298 /* allow failure codes for the stop callback */
3299 if(U_FAILURE(status) && status!=expectedError)
3300 {
3301 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3302 return false;
3303 }
3304
3305 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3306 sourcelen, targ-junkout);
3307 if(getTestOption(VERBOSITY_OPTION))
3308 {
3309
3310 junk[0] = 0;
3311 offset_str[0] = 0;
3312
3313 for(p = junkout;p<targ;p++)
3314 {
3315 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3316 snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3317 }
3318
3319 log_verbose(junk);
3320 printUSeq(expect, expectlen);
3321 if ( checkOffsets )
3322 {
3323 log_verbose("\nOffsets:");
3324 log_verbose(offset_str);
3325 }
3326 log_verbose("\n");
3327 }
3328 ucnv_close(conv);
3329
3330 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3331
3332 if (checkOffsets && (expectOffsets != 0))
3333 {
3334 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3335 {
3336 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3337 log_err("Got offsets: ");
3338 for(p=junkout;p<targ;p++)
3339 log_err(" %2d,", junokout[p-junkout]);
3340 log_err("\n");
3341 log_err("Expected offsets: ");
3342 for(i=0; i<(targ-junkout); i++)
3343 log_err(" %2d,", expectOffsets[i]);
3344 log_err("\n");
3345 log_err("Got output: ");
3346 for(i=0; i<(targ-junkout); i++)
3347 log_err("0x%04x,", junkout[i]);
3348 log_err("\n");
3349 log_err("From source: ");
3350 for(i=0; i<(src-(const char *)source); i++)
3351 log_err(" 0x%02x,", (unsigned char)source[i]);
3352 log_err("\n");
3353 }
3354 }
3355
3356 if(!memcmp(junkout, expect, expectlen*2))
3357 {
3358 log_verbose("Matches!\n");
3359 return true;
3360 }
3361 else
3362 {
3363 log_err("String does not match. %s\n", gNuConvTestName);
3364 log_verbose("String does not match. %s\n", gNuConvTestName);
3365 log_err("Got: ");
3366 printUSeqErr(junkout, expectlen);
3367 log_err("Expected: ");
3368 printUSeqErr(expect, expectlen);
3369 log_err("\n");
3370 return false;
3371 }
3372 }
3373
TestCallBackFailure(void)3374 static void TestCallBackFailure(void) {
3375 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3376 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3377 if (status != U_USELESS_COLLATOR_ERROR) {
3378 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3379 }
3380 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3381 if (status != U_USELESS_COLLATOR_ERROR) {
3382 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3383 }
3384 ucnv_cbFromUWriteSub(NULL, -1, &status);
3385 if (status != U_USELESS_COLLATOR_ERROR) {
3386 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3387 }
3388 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3389 if (status != U_USELESS_COLLATOR_ERROR) {
3390 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3391 }
3392 }
3393