1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda 7/21/1999 Testing error callback routines
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <ctype.h>
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
26 #include "cintltst.h"
27 #include "unicode/utypes.h"
28 #include "unicode/ustring.h"
29 #include "nccbtst.h"
30 #include "unicode/ucnv_cb.h"
31 #include "unicode/utf16.h"
32
33 #define NEW_MAX_BUFFER 999
34
35 #define nct_min(x,y) ((x<y) ? x : y)
36
37 static int32_t gInBufferSize = 0;
38 static int32_t gOutBufferSize = 0;
39 static char gNuConvTestName[1024];
40
printSeq(const uint8_t * a,int len)41 static void printSeq(const uint8_t* a, int len)
42 {
43 int i=0;
44 log_verbose("\n{");
45 while (i<len)
46 log_verbose("0x%02X, ", a[i++]);
47 log_verbose("}\n");
48 }
49
printUSeq(const UChar * a,int len)50 static void printUSeq(const UChar* a, int len)
51 {
52 int i=0;
53 log_verbose("{");
54 while (i<len)
55 log_verbose(" 0x%04x, ", a[i++]);
56 log_verbose("}\n");
57 }
58
printSeqErr(const uint8_t * a,int len)59 static void printSeqErr(const uint8_t* a, int len)
60 {
61 int i=0;
62 fprintf(stderr, "{");
63 while (i<len)
64 fprintf(stderr, " 0x%02x, ", a[i++]);
65 fprintf(stderr, "}\n");
66 }
67
printUSeqErr(const UChar * a,int len)68 static void printUSeqErr(const UChar* a, int len)
69 {
70 int i=0;
71 fprintf(stderr, "{");
72 while (i<len)
73 fprintf(stderr, "0x%04x, ", a[i++]);
74 fprintf(stderr,"}\n");
75 }
76
setNuConvTestName(const char * codepage,const char * direction)77 static void setNuConvTestName(const char *codepage, const char *direction)
78 {
79 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
80 codepage,
81 direction,
82 (int)gInBufferSize,
83 (int)gOutBufferSize);
84 }
85
86
87 static void TestCallBackFailure(void);
88
89 void addTestConvertErrorCallBack(TestNode** root);
90
addTestConvertErrorCallBack(TestNode ** root)91 void addTestConvertErrorCallBack(TestNode** root)
92 {
93 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
94 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
95 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
96 /* BEGIN android-removed
97 To save space, Android does not build complete CJK conversion tables.
98 We skip the test here.
99 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
100 END android-removed */
101
102 #if !UCONFIG_NO_LEGACY_CONVERSION
103 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
104 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
105 #endif
106
107 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
108 }
109
TestSkipCallBack()110 static void TestSkipCallBack()
111 {
112 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
113 TestSkip(1,NEW_MAX_BUFFER);
114 TestSkip(1,1);
115 TestSkip(NEW_MAX_BUFFER, 1);
116 }
117
TestStopCallBack()118 static void TestStopCallBack()
119 {
120 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
121 TestStop(1,NEW_MAX_BUFFER);
122 TestStop(1,1);
123 TestStop(NEW_MAX_BUFFER, 1);
124 }
125
TestSubCallBack()126 static void TestSubCallBack()
127 {
128 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
129 TestSub(1,NEW_MAX_BUFFER);
130 TestSub(1,1);
131 TestSub(NEW_MAX_BUFFER, 1);
132
133 #if !UCONFIG_NO_LEGACY_CONVERSION
134 TestEBCDIC_STATEFUL_Sub(1, 1);
135 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
136 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
137 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
138 #endif
139 }
140
TestSubWithValueCallBack()141 static void TestSubWithValueCallBack()
142 {
143 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
144 TestSubWithValue(1,NEW_MAX_BUFFER);
145 TestSubWithValue(1,1);
146 TestSubWithValue(NEW_MAX_BUFFER, 1);
147 }
148
149 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()150 static void TestLegalAndOtherCallBack()
151 {
152 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
153 TestLegalAndOthers(1,NEW_MAX_BUFFER);
154 TestLegalAndOthers(1,1);
155 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
156 }
157
TestSingleByteCallBack()158 static void TestSingleByteCallBack()
159 {
160 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
161 TestSingleByte(1,NEW_MAX_BUFFER);
162 TestSingleByte(1,1);
163 TestSingleByte(NEW_MAX_BUFFER, 1);
164 }
165 #endif
166
TestSkip(int32_t inputsize,int32_t outputsize)167 static void TestSkip(int32_t inputsize, int32_t outputsize)
168 {
169 static const uint8_t expskipIBM_949[]= {
170 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
171
172 static const uint8_t expskipIBM_943[] = {
173 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
174
175 static const uint8_t expskipIBM_930[] = {
176 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
177
178 gInBufferSize = inputsize;
179 gOutBufferSize = outputsize;
180
181 /*From Unicode*/
182 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
183
184 #if !UCONFIG_NO_LEGACY_CONVERSION
185 {
186 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
187 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
188
189 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
190 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
191
192 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
193 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
194 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
195 log_err("u-> ibm-949 with skip did not match.\n");
196 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
197 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
198 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
199 log_err("u-> ibm-943 with skip did not match.\n");
200 }
201
202 {
203 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
204 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
205 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
206
207 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
208 if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
209 fromUBytes, UPRV_LENGTHOF(fromUBytes),
210 "ibm-930",
211 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
212 NULL, 0)
213 ) {
214 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
215 }
216 }
217 #endif
218
219 {
220 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
221 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
222 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
223
224 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
225 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
226 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
227
228 /* US-ASCII */
229 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
230 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
231 "US-ASCII",
232 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
233 NULL, 0)
234 ) {
235 log_err("u->US-ASCII with skip did not match.\n");
236 }
237
238 #if !UCONFIG_NO_LEGACY_CONVERSION
239 /* SBCS NLTC codepage 367 for US-ASCII */
240 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
241 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
242 "ibm-367",
243 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
244 NULL, 0)
245 ) {
246 log_err("u->ibm-367 with skip did not match.\n");
247 }
248 #endif
249
250 /* ISO-Latin-1 */
251 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
252 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
253 "LATIN_1",
254 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
255 NULL, 0)
256 ) {
257 log_err("u->LATIN_1 with skip did not match.\n");
258 }
259
260 #if !UCONFIG_NO_LEGACY_CONVERSION
261 /* windows-1252 */
262 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
263 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
264 "windows-1252",
265 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
266 NULL, 0)
267 ) {
268 log_err("u->windows-1252 with skip did not match.\n");
269 }
270 }
271
272 {
273 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
274 static const uint8_t toIBM943[]= { 0x61, 0x61 };
275 static const int32_t offset[]= {0, 4};
276
277 /* EUC_JP*/
278 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
279 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
280 0x61, 0x8e, 0xe0,
281 };
282 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
283
284 /*EUC_TW*/
285 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
286 static const uint8_t to_euc_tw[]={
287 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
288 0x61, 0xe6, 0xca, 0x8a,
289 };
290 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
291
292 /*ISO-2022-JP*/
293 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
294 static const uint8_t to_iso_2022_jp[]={
295 0x41,
296 0x42,
297
298 };
299 static const int32_t from_iso_2022_jpOffs [] ={0,2};
300
301 /*ISO-2022-JP*/
302 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
303 static const uint8_t to_iso_2022_jp2[]={
304 0x41,
305 0x43,
306
307 };
308 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
309
310 /*ISO-2022-cn*/
311 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
312 static const uint8_t to_iso_2022_cn[]={
313 0x41, 0x42
314 };
315 static const int32_t from_iso_2022_cnOffs [] ={
316 0, 2
317 };
318
319 /*ISO-2022-CN*/
320 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
321 static const uint8_t to_iso_2022_cn1[]={
322 0x41, 0x43
323
324 };
325 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
326
327 /*ISO-2022-kr*/
328 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
329 static const uint8_t to_iso_2022_kr[]={
330 0x1b, 0x24, 0x29, 0x43,
331 0x41,
332 0x0e, 0x25, 0x50,
333 0x25, 0x50,
334 0x0f, 0x42,
335 };
336 static const int32_t from_iso_2022_krOffs [] ={
337 -1,-1,-1,-1,
338 0,
339 1,1,1,
340 3,3,
341 4,4
342 };
343
344 /*ISO-2022-kr*/
345 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
346 static const uint8_t to_iso_2022_kr1[]={
347 0x1b, 0x24, 0x29, 0x43,
348 0x41,
349 0x0e, 0x25, 0x50,
350 0x25, 0x50,
351
352 };
353 static const int32_t from_iso_2022_krOffs1 [] ={
354 -1,-1,-1,-1,
355 0,
356 1,1,1,
357 3,3,
358
359 };
360 /* HZ encoding */
361 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
362
363 static const uint8_t to_hz[]={
364 0x7e, 0x7d, 0x41,
365 0x7e, 0x7b, 0x26, 0x30,
366 0x26, 0x30,
367 0x7e, 0x7d, 0x42,
368
369 };
370 static const int32_t from_hzOffs [] ={
371 0,0,0,
372 1,1,1,1,
373 3,3,
374 4,4,4,4
375 };
376
377 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
378
379 static const uint8_t to_hz1[]={
380 0x7e, 0x7d, 0x41,
381 0x7e, 0x7b, 0x26, 0x30,
382 0x26, 0x30,
383
384
385 };
386 static const int32_t from_hzOffs1 [] ={
387 0,0,0,
388 1,1,1,1,
389 3,3,
390
391 };
392
393 #endif
394
395 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
396
397 static const uint8_t to_SCSU[]={
398 0x41,
399 0x42
400
401
402 };
403 static const int32_t from_SCSUOffs [] ={
404 0,
405 2,
406
407 };
408
409 #if !UCONFIG_NO_LEGACY_CONVERSION
410 /* ISCII */
411 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
412 static const uint8_t to_iscii[]={
413 0x41,
414 0x42,
415 };
416 static const int32_t from_isciiOffs [] ={
417 0,2,
418
419 };
420 /*ISCII*/
421 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
422 static const uint8_t to_iscii1[]={
423 0x44,
424 0x43,
425
426 };
427 static const int32_t from_isciiOffs1 [] ={0,2};
428
429 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
430 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
431 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
432 log_err("u-> ibm-943 with skip did not match.\n");
433
434 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
435 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
436 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
437 log_err("u-> euc-jp with skip did not match.\n");
438
439 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
440 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
441 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
442 log_err("u-> euc-tw with skip did not match.\n");
443
444 /*iso_2022_jp*/
445 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
446 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
447 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
448 log_err("u-> iso-2022-jp with skip did not match.\n");
449
450 /* with context */
451 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
452 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
453 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
454 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
455
456 /*iso_2022_cn*/
457 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
458 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
459 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
460 log_err("u-> iso-2022-cn with skip did not match.\n");
461 /*with context*/
462 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
463 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
464 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
465 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
466
467 /*iso_2022_kr*/
468 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
469 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
470 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
471 log_err("u-> iso-2022-kr with skip did not match.\n");
472 /*with context*/
473 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
474 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
475 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
476 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
477
478 /*hz*/
479 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
480 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
481 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
482 log_err("u-> HZ with skip did not match.\n");
483 /*with context*/
484 if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
485 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
486 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
487 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
488 #endif
489
490 /*SCSU*/
491 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
492 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
493 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
494 log_err("u-> SCSU with skip did not match.\n");
495
496 #if !UCONFIG_NO_LEGACY_CONVERSION
497 /*ISCII*/
498 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
499 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
500 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
501 log_err("u-> iscii with skip did not match.\n");
502 /*with context*/
503 if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
504 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
505 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
506 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
507 #endif
508 }
509
510 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
511 {
512 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
513 0xFB, 0xEE, 0x28, /* from source offset 0 */
514 0x24, 0x1E, 0x52,
515 0xB2,
516 0x20,
517 0xB3,
518 0xB1,
519 0x0D,
520 0x0A,
521
522 0x20, /* from 8 */
523 0x00,
524 0xD0, 0x6C,
525 0xB6,
526 0xD8, 0xA5,
527 0x20,
528 0x68,
529 0x59,
530
531 0xF9, 0x28, /* from 16 */
532 0x6D,
533 0x20,
534 0x73,
535 0xE0, 0x2D,
536 0xDE, 0x43,
537 0xD0, 0x33,
538 0x20,
539
540 0xFA, 0x83, /* from 24 */
541 0x25, 0x01,
542 0xFB, 0x16, 0x87,
543 0x4B, 0x16,
544 0x20,
545 0xE6, 0xBD,
546 0xEB, 0x5B,
547 0x4B, 0xCC,
548
549 0xF9, 0xA2, /* from 32 */
550 0xFC, 0x10, 0x3E,
551 0xFE, 0x16, 0x3A, 0x8C,
552 0x20,
553 0xFC, 0x03, 0xAC,
554
555 0x01, /* from 41 */
556 0xDE, 0x83,
557 0x20,
558 0x09
559 };
560 static const UChar expected[]={
561 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
562 0x0063, 0x0061, 0x000D, 0x000A,
563
564 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
565 0x0930, 0x0020, 0x0918, 0x0909,
566
567 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
568 0x4000, 0x4E00, 0x7777, 0x0020,
569
570 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
571 0x0020, 0xD7A3, 0xDC00, 0xD800,
572
573 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
574 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
575
576 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
577 0x0009
578 };
579 static const int32_t offsets[]={
580 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
581 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
582 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
583 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
584 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
585 41, 42, 42, 43, 44
586 };
587
588 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
589 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
590 sampleText, UPRV_LENGTHOF(sampleText),
591 "BOCU-1",
592 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
593 ) {
594 log_err("u->BOCU-1 with skip did not match.\n");
595 }
596 }
597
598 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
599 {
600 const uint8_t sampleText[]={
601 0x61, /* 'a' */
602 0xc4, 0xb5, /* U+0135 */
603 0xed, 0x80, 0xa0, /* Hangul U+d020 */
604 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
605 0xee, 0x80, 0x80, /* PUA U+e000 */
606 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
607 0x62, /* 'b' */
608 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
609 0xd0, 0x80 /* U+0400 */
610 };
611 UChar expected[]={
612 0x0061,
613 0x0135,
614 0xd020,
615 0xd801, 0xdc01,
616 0xe000,
617 0xdc01,
618 0x0062,
619 0xd801,
620 0x0400
621 };
622 int32_t offsets[]={
623 0,
624 1, 1,
625 2, 2, 2,
626 3, 3, 3, 4, 4, 4,
627 5, 5, 5,
628 6, 6, 6,
629 7,
630 8, 8, 8,
631 9, 9
632 };
633
634 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
635
636 /* without offsets */
637 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
638 sampleText, UPRV_LENGTHOF(sampleText),
639 "CESU-8",
640 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
641 ) {
642 log_err("u->CESU-8 with skip did not match.\n");
643 }
644
645 /* with offsets */
646 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
647 sampleText, UPRV_LENGTHOF(sampleText),
648 "CESU-8",
649 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
650 ) {
651 log_err("u->CESU-8 with skip did not match.\n");
652 }
653 }
654
655 /*to Unicode*/
656 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
657
658 #if !UCONFIG_NO_LEGACY_CONVERSION
659 {
660
661 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
662 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
663 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
664
665 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
666 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
667 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
668
669 if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
670 IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
671 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
672 log_err("ibm-949->u with skip did not match.\n");
673 if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
674 IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
675 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
676 log_err("ibm-943->u with skip did not match.\n");
677
678
679 if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
680 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
681 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
682 log_err("ibm-930->u with skip did not match.\n");
683
684
685 if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
686 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
687 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
688 log_err("ibm-930->u with skip did not match.\n");
689 }
690 #endif
691
692 {
693 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
694 static const UChar usasciiToU[] = { 0x61, 0x31 };
695 static const int32_t usasciiToUOffsets[] = { 0, 2 };
696
697 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
698 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
699 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
700
701 /* US-ASCII */
702 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
703 usasciiToU, UPRV_LENGTHOF(usasciiToU),
704 "US-ASCII",
705 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
706 NULL, 0)
707 ) {
708 log_err("US-ASCII->u with skip did not match.\n");
709 }
710
711 #if !UCONFIG_NO_LEGACY_CONVERSION
712 /* SBCS NLTC codepage 367 for US-ASCII */
713 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
714 usasciiToU, UPRV_LENGTHOF(usasciiToU),
715 "ibm-367",
716 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
717 NULL, 0)
718 ) {
719 log_err("ibm-367->u with skip did not match.\n");
720 }
721 #endif
722
723 /* ISO-Latin-1 */
724 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
725 latin1ToU, UPRV_LENGTHOF(latin1ToU),
726 "LATIN_1",
727 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
728 NULL, 0)
729 ) {
730 log_err("LATIN_1->u with skip did not match.\n");
731 }
732
733 #if !UCONFIG_NO_LEGACY_CONVERSION
734 /* windows-1252 */
735 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
736 latin1ToU, UPRV_LENGTHOF(latin1ToU),
737 "windows-1252",
738 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
739 NULL, 0)
740 ) {
741 log_err("windows-1252->u with skip did not match.\n");
742 }
743 #endif
744 }
745
746 #if !UCONFIG_NO_LEGACY_CONVERSION
747 {
748 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
749 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
750 };
751 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
752 };
753 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
754
755
756 /* euc-jp*/
757 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
758 0x8f, 0xda, 0xa1, /*unassigned*/
759 0x8e, 0xe0,
760 };
761 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
762 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
763
764 /*EUC_TW*/
765 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
766 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
767 0xe6, 0xca, 0x8a,
768 };
769 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
770 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
771 /*iso-2022-jp*/
772 static const uint8_t sampleTxt_iso_2022_jp[]={
773 0x41,
774 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
775 0x1b, 0x28, 0x42, 0x42,
776
777 };
778 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
779 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
780
781 /*iso-2022-cn*/
782 static const uint8_t sampleTxt_iso_2022_cn[]={
783 0x0f, 0x41, 0x44,
784 0x1B, 0x24, 0x29, 0x47,
785 0x0E, 0x40, 0x6f, /*unassigned*/
786 0x0f, 0x42,
787
788 };
789
790 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
791 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
792
793 /*iso-2022-kr*/
794 static const uint8_t sampleTxt_iso_2022_kr[]={
795 0x1b, 0x24, 0x29, 0x43,
796 0x41,
797 0x0E, 0x7f, 0x1E,
798 0x0e, 0x25, 0x50,
799 0x0f, 0x51,
800 0x42, 0x43,
801
802 };
803 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
804 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
805
806 /*hz*/
807 static const uint8_t sampleTxt_hz[]={
808 0x41,
809 0x7e, 0x7b, 0x26, 0x30,
810 0x7f, 0x1E, /*unassigned*/
811 0x26, 0x30,
812 0x7e, 0x7d, 0x42,
813 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
814 0x7e, 0x7d, 0x42,
815 };
816 static const UChar hztoUnicode[]={
817 0x41,
818 0x03a0,
819 0x03A0,
820 0x42,
821 0x42,};
822
823 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
824
825 /*ISCII*/
826 static const uint8_t sampleTxt_iscii[]={
827 0x41,
828 0xa1,
829 0xEB, /*unassigned*/
830 0x26,
831 0x30,
832 0xa2,
833 0xEC, /*unassigned*/
834 0x42,
835 };
836 static const UChar isciitoUnicode[]={
837 0x41,
838 0x0901,
839 0x26,
840 0x30,
841 0x0902,
842 0x42,
843 };
844
845 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
846
847 /*LMBCS*/
848 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
849 0x12, 0x92, 0xa0, /*unassigned*/
850 0x12, 0x92, 0xA1,
851 };
852 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
853 static const int32_t fromLMBCS[] = {0, 6};
854
855 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
856 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
857 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
858 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
859
860 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
861 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
862 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
863 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
864
865 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
866 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
867 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
868 log_err("euc-jp->u with skip did not match.\n");
869
870
871
872 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
873 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
874 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
875 log_err("euc-tw->u with skip did not match.\n");
876
877
878 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
879 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
880 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
881 log_err("iso-2022-jp->u with skip did not match.\n");
882
883 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
884 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
885 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
886 log_err("iso-2022-cn->u with skip did not match.\n");
887
888 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
889 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
890 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
891 log_err("iso-2022-kr->u with skip did not match.\n");
892
893 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
894 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
895 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
896 log_err("HZ->u with skip did not match.\n");
897
898 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
899 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
900 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
901 log_err("iscii->u with skip did not match.\n");
902
903 if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
904 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
905 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
906 log_err("LMBCS->u with skip did not match.\n");
907
908 }
909 #endif
910
911 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
912 {
913 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
914 0xe0, 0x80, 0x61,};
915 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
916 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
917
918 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
919 expected1, UPRV_LENGTHOF(expected1),"utf8",
920 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
921 log_err("utf8->u with skip did not match.\n");;
922 }
923
924 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
925 {
926 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
927 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
928 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
929
930 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
931 expected1, UPRV_LENGTHOF(expected1),"SCSU",
932 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
933 log_err("scsu->u with skip did not match.\n");
934 }
935
936 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
937 {
938 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
939 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
940 0x24, 0x1E, 0x52, /* 3 */
941 0xB2, /* 6 */
942 0x20, /* 7 */
943 0x40, 0x07, /* 8 - wrong trail byte */
944 0xB3, /* 10 */
945 0xB1, /* 11 */
946 0xD0, 0x20, /* 12 - wrong trail byte */
947 0x0D, /* 14 */
948 0x0A, /* 15 */
949 0x20, /* 16 */
950 0x00, /* 17 */
951 0xD0, 0x6C, /* 18 */
952 0xB6, /* 20 */
953 0xD8, 0xA5, /* 21 */
954 0x20, /* 23 */
955 0x68, /* 24 */
956 0x59, /* 25 */
957 0xF9, 0x28, /* 26 */
958 0x6D, /* 28 */
959 0x20, /* 29 */
960 0x73, /* 30 */
961 0xE0, 0x2D, /* 31 */
962 0xDE, 0x43, /* 33 */
963 0xD0, 0x33, /* 35 */
964 0x20, /* 37 */
965 0xFA, 0x83, /* 38 */
966 0x25, 0x01, /* 40 */
967 0xFB, 0x16, 0x87, /* 42 */
968 0x4B, 0x16, /* 45 */
969 0x20, /* 47 */
970 0xE6, 0xBD, /* 48 */
971 0xEB, 0x5B, /* 50 */
972 0x4B, 0xCC, /* 52 */
973 0xF9, 0xA2, /* 54 */
974 0xFC, 0x10, 0x3E, /* 56 */
975 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
976 0x20, /* 63 */
977 0xFC, 0x03, 0xAC, /* 64 */
978 0xFF, /* 67 - FF just resets the state without encoding anything */
979 0x01, /* 68 */
980 0xDE, 0x83, /* 69 */
981 0x20, /* 71 */
982 0x09 /* 72 */
983 };
984 UChar expected[]={
985 0xFEFF, 0x0061, 0x0062, 0x0020,
986 0x0063, 0x0061, 0x000D, 0x000A,
987 0x0020, 0x0000, 0x00DF, 0x00E6,
988 0x0930, 0x0020, 0x0918, 0x0909,
989 0x3086, 0x304D, 0x0020, 0x3053,
990 0x4000, 0x4E00, 0x7777, 0x0020,
991 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
992 0x0020, 0xD7A3, 0xDC00, 0xD800,
993 0xD800, 0xDC00, 0xD845, 0xDDDD,
994 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
995 0xDFFF, 0x0001, 0x0E40, 0x0020,
996 0x0009
997 };
998 int32_t offsets[]={
999 0, 3, 6, 7, /* skip 8, */
1000 10, 11, /* skip 12, */
1001 14, 15, 16, 17, 18,
1002 20, 21, 23, 24, 25, 26, 28, 29,
1003 30, 31, 33, 35, 37, 38,
1004 40, 42, 45, 47, 48,
1005 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1006 63, 64, /* trail */ 64, /* reset only 67, */
1007 68, 69,
1008 71, 72
1009 };
1010
1011 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1012 expected, UPRV_LENGTHOF(expected), "BOCU-1",
1013 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1014 ) {
1015 log_err("BOCU-1->u with skip did not match.\n");
1016 }
1017 }
1018
1019 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1020 {
1021 const uint8_t sampleText[]={
1022 0x61, /* 0 'a' */
1023 0xc0, 0x80, /* 1 non-shortest form */
1024 0xc4, 0xb5, /* 3 U+0135 */
1025 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1026 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1027 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1028 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1029 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1030 0x62, /* 24 'b' */
1031 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1032 0xed, 0xa0, /* 28 incomplete sequence */
1033 0xd0, 0x80 /* 30 U+0400 */
1034 };
1035 UChar expected[]={
1036 0x0061,
1037 /* skip */
1038 0x0135,
1039 0xd020,
1040 0xd801, 0xdc01,
1041 0xe000,
1042 0xdc01,
1043 /* skip */
1044 0x0062,
1045 0xd801,
1046 0x0400
1047 };
1048 int32_t offsets[]={
1049 0,
1050 /* skip 1, */
1051 3,
1052 5,
1053 8, 11,
1054 14,
1055 17,
1056 /* skip 20, 20, */
1057 24,
1058 25,
1059 /* skip 28 */
1060 30
1061 };
1062
1063 /* without offsets */
1064 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1065 expected, UPRV_LENGTHOF(expected), "CESU-8",
1066 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1067 ) {
1068 log_err("CESU-8->u with skip did not match.\n");
1069 }
1070
1071 /* with offsets */
1072 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1073 expected, UPRV_LENGTHOF(expected), "CESU-8",
1074 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1075 ) {
1076 log_err("CESU-8->u with skip did not match.\n");
1077 }
1078 }
1079 }
1080
TestStop(int32_t inputsize,int32_t outputsize)1081 static void TestStop(int32_t inputsize, int32_t outputsize)
1082 {
1083 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1084 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1085
1086 static const uint8_t expstopIBM_949[]= {
1087 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1088
1089 static const uint8_t expstopIBM_943[] = {
1090 0x9f, 0xaf, 0x9f, 0xb1};
1091
1092 static const uint8_t expstopIBM_930[] = {
1093 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1094
1095 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1096 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1097 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1098
1099
1100 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1101 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1102 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1103
1104 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1105 static const int32_t fromIBM943Offs [] = { 0, 2};
1106 static const int32_t fromIBM930Offs [] = { 1, 3};
1107
1108 gInBufferSize = inputsize;
1109 gOutBufferSize = outputsize;
1110
1111 /*From Unicode*/
1112
1113 #if !UCONFIG_NO_LEGACY_CONVERSION
1114 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1115 expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1116 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1117 log_err("u-> ibm-949 with stop did not match.\n");
1118 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1119 expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1120 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1121 log_err("u-> ibm-943 with stop did not match.\n");
1122 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1123 expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1124 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1125 log_err("u-> ibm-930 with stop did not match.\n");
1126
1127 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1128 {
1129 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1130 static const uint8_t toIBM943[]= { 0x61,};
1131 static const int32_t offset[]= {0,} ;
1132
1133 /*EUC_JP*/
1134 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1135 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1136 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1137
1138 /*EUC_TW*/
1139 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1140 static const uint8_t to_euc_tw[]={
1141 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1142 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1143
1144 /*ISO-2022-JP*/
1145 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1146 static const uint8_t to_iso_2022_jp[]={
1147 0x41,
1148
1149 };
1150 static const int32_t from_iso_2022_jpOffs [] ={0,};
1151
1152 /*ISO-2022-cn*/
1153 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1154 static const uint8_t to_iso_2022_cn[]={
1155 0x41,
1156
1157 };
1158 static const int32_t from_iso_2022_cnOffs [] ={
1159 0,0,
1160 2,2,
1161 };
1162
1163 /*ISO-2022-kr*/
1164 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1165 static const uint8_t to_iso_2022_kr[]={
1166 0x1b, 0x24, 0x29, 0x43,
1167 0x41,
1168 0x0e, 0x25, 0x50,
1169 };
1170 static const int32_t from_iso_2022_krOffs [] ={
1171 -1,-1,-1,-1,
1172 0,
1173 1,1,1,
1174 };
1175
1176 /* HZ encoding */
1177 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1178
1179 static const uint8_t to_hz[]={
1180 0x7e, 0x7d, 0x41,
1181 0x7e, 0x7b, 0x26, 0x30,
1182
1183 };
1184 static const int32_t from_hzOffs [] ={
1185 0, 0,0,
1186 1,1,1,1,
1187 };
1188
1189 /*ISCII*/
1190 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1191 static const uint8_t to_iscii[]={
1192 0x41,
1193 };
1194 static const int32_t from_isciiOffs [] ={
1195 0,
1196 };
1197
1198 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1199 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1200 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1201 log_err("u-> ibm-943 with stop did not match.\n");
1202
1203 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1204 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1205 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1206 log_err("u-> euc-jp with stop did not match.\n");
1207
1208 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1209 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1210 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1211 log_err("u-> euc-tw with stop did not match.\n");
1212
1213 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1214 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1215 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1216 log_err("u-> iso-2022-jp with stop did not match.\n");
1217
1218 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1219 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1220 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1221 log_err("u-> iso-2022-jp with stop did not match.\n");
1222
1223 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1224 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1225 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1226 log_err("u-> iso-2022-cn with stop did not match.\n");
1227
1228 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1229 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1230 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1231 log_err("u-> iso-2022-kr with stop did not match.\n");
1232
1233 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1234 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1235 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1236 log_err("u-> HZ with stop did not match.\n");\
1237
1238 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1239 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1240 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1241 log_err("u-> iscii with stop did not match.\n");
1242
1243
1244 }
1245 #endif
1246
1247 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1248 {
1249 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1250
1251 static const uint8_t to_SCSU[]={
1252 0x41,
1253
1254 };
1255 int32_t from_SCSUOffs [] ={
1256 0,
1257
1258 };
1259 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1260 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1261 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1262 log_err("u-> SCSU with skip did not match.\n");
1263
1264 }
1265
1266 /*to Unicode*/
1267
1268 #if !UCONFIG_NO_LEGACY_CONVERSION
1269 if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1270 IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1271 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1272 log_err("ibm-949->u with stop did not match.\n");
1273 if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1274 IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1275 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1276 log_err("ibm-943->u with stop did not match.\n");
1277 if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1278 IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1279 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1280 log_err("ibm-930->u with stop did not match.\n");
1281
1282 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1283 {
1284
1285 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1286 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1287 };
1288 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1289 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1290
1291
1292 /*EUC-JP*/
1293 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1294 0x8f, 0xda, 0xa1, /*unassigned*/
1295 0x8e, 0xe0,
1296 };
1297 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1298 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1299
1300 /*EUC_TW*/
1301 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1302 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1303 0xe6, 0xca, 0x8a,
1304 };
1305 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1306 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1307
1308
1309
1310 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1311 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1312 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1313 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1314
1315 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1316 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1317 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1318 log_err("euc-jp->u with stop did not match.\n");
1319
1320 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1321 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1322 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1323 log_err("euc-tw->u with stop did not match.\n");
1324 }
1325 #endif
1326
1327 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1328 {
1329 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1330 0xe0, 0x80, 0x61,};
1331 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1332 static const int32_t offsets1[] = { 0x0000, 0x0001};
1333
1334 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1335 expected1, UPRV_LENGTHOF(expected1),"utf8",
1336 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1337 log_err("utf8->u with stop did not match.\n");;
1338 }
1339 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1340 {
1341 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1342 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1343 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1344
1345 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1346 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1347 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1348 log_err("scsu->u with stop did not match.\n");;
1349 }
1350
1351 }
1352
TestSub(int32_t inputsize,int32_t outputsize)1353 static void TestSub(int32_t inputsize, int32_t outputsize)
1354 {
1355 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1356 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1357
1358 static const uint8_t expsubIBM_949[] =
1359 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1360
1361 static const uint8_t expsubIBM_943[] = {
1362 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1363
1364 static const uint8_t expsubIBM_930[] = {
1365 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1366
1367 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1368 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1369 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1370
1371 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1372 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1373 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1374
1375 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1376 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1377 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1378
1379 gInBufferSize = inputsize;
1380 gOutBufferSize = outputsize;
1381
1382 /*from unicode*/
1383
1384 #if !UCONFIG_NO_LEGACY_CONVERSION
1385 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1386 expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1387 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1388 log_err("u-> ibm-949 with subst did not match.\n");
1389 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1390 expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1391 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1392 log_err("u-> ibm-943 with subst did not match.\n");
1393 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1394 expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1395 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1396 log_err("u-> ibm-930 with subst did not match.\n");
1397
1398 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1399 {
1400 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1401 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1402 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1403
1404
1405 /* EUC_JP*/
1406 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1407 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1408 0xf4, 0xfe, 0xf4, 0xfe,
1409 0x61, 0x8e, 0xe0,
1410 };
1411 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1412
1413 /*EUC_TW*/
1414 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1415 static const uint8_t to_euc_tw[]={
1416 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1417 0xfd, 0xfe, 0xfd, 0xfe,
1418 0x61, 0xe6, 0xca, 0x8a,
1419 };
1420
1421 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1422
1423 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1424 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1425 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1426 log_err("u-> ibm-943 with substitute did not match.\n");
1427
1428 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1429 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1430 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1431 log_err("u-> euc-jp with substitute did not match.\n");
1432
1433 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1434 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1435 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1436 log_err("u-> euc-tw with substitute did not match.\n");
1437 }
1438 #endif
1439
1440 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1441 {
1442 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1443
1444 const uint8_t to_SCSU[]={
1445 0x41,
1446 0x0e, 0xff,0xfd,
1447 0x42
1448
1449
1450 };
1451 int32_t from_SCSUOffs [] ={
1452 0,
1453 1,1,1,
1454 2,
1455
1456 };
1457 const uint8_t to_SCSU_1[]={
1458 0x41,
1459
1460 };
1461 int32_t from_SCSUOffs_1 [] ={
1462 0,
1463
1464 };
1465 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1466 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1467 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1468 log_err("u-> SCSU with substitute did not match.\n");
1469
1470 if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1471 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1472 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1473 log_err("u-> SCSU with substitute did not match.\n");
1474 }
1475
1476 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1477 {
1478 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1479 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1480 0xf0, 0x90, 0x90, 0x81,
1481 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1482 0xef, 0xbf, 0xbf, 0x61,
1483
1484 };
1485 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1486 if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1487 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1488 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1489 log_err("u-> utf8 with substitute did not match.\n");
1490 }
1491 }
1492
1493 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1494 {
1495 static const UChar in[]={ 0x0041, 0xfeff };
1496
1497 static const uint8_t out[]={
1498 #if U_IS_BIG_ENDIAN
1499 0xfe, 0xff,
1500 0x00, 0x41,
1501 0xfe, 0xff
1502 #else
1503 0xff, 0xfe,
1504 0x41, 0x00,
1505 0xff, 0xfe
1506 #endif
1507 };
1508 static const int32_t offsets[]={
1509 -1, -1, 0, 0, 1, 1
1510 };
1511
1512 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1513 out, UPRV_LENGTHOF(out), "UTF-16",
1514 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1515 ) {
1516 log_err("u->UTF-16 with substitute did not match.\n");
1517 }
1518 }
1519
1520 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1521 {
1522 static const UChar in[]={ 0x0041, 0xfeff };
1523
1524 static const uint8_t out[]={
1525 #if U_IS_BIG_ENDIAN
1526 0x00, 0x00, 0xfe, 0xff,
1527 0x00, 0x00, 0x00, 0x41,
1528 0x00, 0x00, 0xfe, 0xff
1529 #else
1530 0xff, 0xfe, 0x00, 0x00,
1531 0x41, 0x00, 0x00, 0x00,
1532 0xff, 0xfe, 0x00, 0x00
1533 #endif
1534 };
1535 static const int32_t offsets[]={
1536 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1537 };
1538
1539 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1540 out, UPRV_LENGTHOF(out), "UTF-32",
1541 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1542 ) {
1543 log_err("u->UTF-32 with substitute did not match.\n");
1544 }
1545 }
1546
1547 /*to unicode*/
1548
1549 #if !UCONFIG_NO_LEGACY_CONVERSION
1550 if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1551 IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1552 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1553 log_err("ibm-949->u with substitute did not match.\n");
1554 if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1555 IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1556 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1557 log_err("ibm-943->u with substitute did not match.\n");
1558 if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1559 IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1560 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1561 log_err("ibm-930->u with substitute did not match.\n");
1562
1563 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1564 {
1565
1566 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1567 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1568 };
1569 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1570 };
1571 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1572
1573
1574 /* EUC_JP*/
1575 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1576 0x8f, 0xda, 0xa1, /*unassigned*/
1577 0x8e, 0xe0, 0x8a
1578 };
1579 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1580 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1581
1582 /*EUC_TW*/
1583 const uint8_t sampleTxt_euc_tw[]={
1584 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1585 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1586 0xe6, 0xca, 0x8a,
1587 };
1588 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1589 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1590
1591
1592 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1593 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1594 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1595 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1596
1597
1598 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1599 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1600 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1601 log_err("euc-jp->u with substitute did not match.\n");
1602
1603
1604 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1605 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1606 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1607 log_err("euc-tw->u with substitute did not match.\n");
1608
1609
1610 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1611 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1612 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1613 log_err("euc-jp->u with substitute did not match.\n");
1614 }
1615 #endif
1616
1617 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1618 {
1619 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1620 0xe0, 0x80, 0x61,};
1621 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1622 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1623
1624 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1625 expected1, UPRV_LENGTHOF(expected1),"utf8",
1626 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1627 log_err("utf8->u with substitute did not match.\n");;
1628 }
1629 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1630 {
1631 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1632 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1633 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1634
1635 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1636 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1637 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1638 log_err("scsu->u with stop did not match.\n");;
1639 }
1640
1641 #if !UCONFIG_NO_LEGACY_CONVERSION
1642 log_verbose("Testing ibm-930 subchar/subchar1\n");
1643 {
1644 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1645 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1646 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1647
1648 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1649 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1650 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1651
1652 if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1653 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1654 ) {
1655 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1656 }
1657
1658 if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1659 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1660 ) {
1661 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1662 }
1663 }
1664
1665 log_verbose("Testing GB 18030 with substitute callbacks\n");
1666 {
1667 static const UChar u2[]={
1668 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1669 static const uint8_t gb2[]={
1670 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1671 static const int32_t offsets2[]={
1672 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1673
1674 if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1675 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1676 ) {
1677 log_err("gb18030->u with substitute did not match.\n");
1678 }
1679 }
1680 #endif
1681
1682 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1683 {
1684 static const uint8_t utf7[]={
1685 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1686 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1687 };
1688 static const UChar unicode[]={
1689 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1690 };
1691 static const int32_t offsets[]={
1692 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1693 };
1694
1695 if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1696 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1697 ) {
1698 log_err("UTF-7->u with substitute did not match.\n");
1699 }
1700 }
1701
1702 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1703 {
1704 static const uint8_t
1705 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1706 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1707 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1708
1709 static const UChar
1710 out1[]={ 0x4e00, 0xfeff },
1711 out2[]={ 0x004e, 0xfffe },
1712 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1713
1714 static const int32_t
1715 offsets1[]={ 2, 4 },
1716 offsets2[]={ 2, 4 },
1717 offsets3[]={ 0, 2, 4 };
1718
1719 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1720 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1721 ) {
1722 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1723 }
1724
1725 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1726 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1727 ) {
1728 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1729 }
1730
1731 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1732 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1733 ) {
1734 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1735 }
1736 }
1737
1738 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1739 {
1740 static const uint8_t
1741 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1742 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1743 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1744 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1745
1746 static const UChar
1747 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1748 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1749 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1750 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1751
1752 static const int32_t
1753 offsets1[]={ 4, 4, 8 },
1754 offsets2[]={ 4, 4, 8 },
1755 offsets3[]={ 0, 4, 4, 8, 12 },
1756 offsets4[]={ 0, 0, 4, 8 };
1757
1758 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1759 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1760 ) {
1761 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1762 }
1763
1764 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1765 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1766 ) {
1767 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1768 }
1769
1770 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1771 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1772 ) {
1773 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1774 }
1775
1776 if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1777 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1778 ) {
1779 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1780 }
1781 }
1782 }
1783
TestSubWithValue(int32_t inputsize,int32_t outputsize)1784 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1785 {
1786 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1787 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1788
1789 const uint8_t expsubwvalIBM_949[]= {
1790 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1791 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1792
1793 const uint8_t expsubwvalIBM_943[]= {
1794 0x9f, 0xaf, 0x9f, 0xb1,
1795 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1796
1797 const uint8_t expsubwvalIBM_930[] = {
1798 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1799
1800 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1801 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1802 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1803
1804 gInBufferSize = inputsize;
1805 gOutBufferSize = outputsize;
1806
1807 /*from Unicode*/
1808
1809 #if !UCONFIG_NO_LEGACY_CONVERSION
1810 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1811 expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1812 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1813 log_err("u-> ibm-949 with subst with value did not match.\n");
1814
1815 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1816 expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1817 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1818 log_err("u-> ibm-943 with sub with value did not match.\n");
1819
1820 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1821 expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1822 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1823 log_err("u-> ibm-930 with subst with value did not match.\n");
1824
1825
1826 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1827 {
1828 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1829 static const uint8_t toIBM943[]= { 0x61,
1830 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1831 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1832 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1833 0x61 };
1834 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1835
1836
1837 /* EUC_JP*/
1838 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1839 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1840 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1841 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1842 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1843 0x61, 0x8e, 0xe0,
1844 };
1845 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1846 3, 3, 3, 3, 3, 3,
1847 3, 3, 3, 3, 3, 3,
1848 5, 5, 5, 5, 5, 5,
1849 6, 7, 7,
1850 };
1851
1852 /*EUC_TW*/
1853 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1854 static const uint8_t to_euc_tw[]={
1855 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1856 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1857 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1858 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1859 0x61, 0xe6, 0xca, 0x8a,
1860 };
1861 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1862 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1863 6, 7, 7, 8,
1864 };
1865 /*ISO-2022-JP*/
1866 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1867 static const uint8_t to_iso_2022_jp1[]={
1868 0x1b, 0x24, 0x42, 0x21, 0x21,
1869 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1870 0x1b, 0x24, 0x42, 0x21, 0x22,
1871 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1872 0x42,
1873 };
1874
1875 static const int32_t from_iso_2022_jpOffs1 [] ={
1876 0,0,0,0,0,
1877 1,1,1,1,1,1,1,1,1,
1878 2,2,2,2,2,
1879 3,3,3,3,3,3,3,3,3,
1880 4,
1881 };
1882 /* surrogate pair*/
1883 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1884 static const uint8_t to_iso_2022_jp2[]={
1885 0x1b, 0x24, 0x42, 0x21, 0x21,
1886 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1887 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1888 0x1b, 0x24, 0x42, 0x21, 0x22,
1889 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1890 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1891 0x42,
1892 };
1893 static const int32_t from_iso_2022_jpOffs2 [] ={
1894 0,0,0,0,0,
1895 1,1,1,1,1,1,1,1,1,
1896 1,1,1,1,1,1,
1897 3,3,3,3,3,
1898 4,4,4,4,4,4,4,4,4,
1899 4,4,4,4,4,4,
1900 6,
1901 };
1902
1903 /*ISO-2022-cn*/
1904 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1905 static const uint8_t to_iso_2022_cn[]={
1906 0x41,
1907 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1908 0x42,
1909 };
1910 static const int32_t from_iso_2022_cnOffs [] ={
1911 0,
1912 1,1,1,1,1,1,
1913 2,
1914 };
1915
1916 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1917
1918 static const uint8_t to_iso_2022_cn4[]={
1919 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1920 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1921 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1922 0x0e, 0x21, 0x22,
1923 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1924 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1925 0x42,
1926 };
1927 static const int32_t from_iso_2022_cnOffs4 [] ={
1928 0,0,0,0,0,0,0,
1929 1,1,1,1,1,1,1,
1930 1,1,1,1,1,1,
1931 3,3,3,
1932 4,4,4,4,4,4,4,
1933 4,4,4,4,4,4,
1934 6
1935
1936 };
1937
1938 /*ISO-2022-kr*/
1939 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1940 static const uint8_t to_iso_2022_kr2[]={
1941 0x1b, 0x24, 0x29, 0x43,
1942 0x41,
1943 0x0e, 0x25, 0x50,
1944 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1945 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1946 0x0e, 0x25, 0x50,
1947 0x0f, 0x42,
1948 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1949 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1950 0x43
1951 };
1952 static const int32_t from_iso_2022_krOffs2 [] ={
1953 -1,-1,-1,-1,
1954 0,
1955 1,1,1,
1956 2,2,2,2,2,2,2,
1957 2,2,2,2,2,2,
1958 4,4,4,
1959 5,5,
1960 6,6,6,6,6,6,
1961 6,6,6,6,6,6,
1962 8,
1963 };
1964
1965 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1966 static const uint8_t to_iso_2022_kr[]={
1967 0x1b, 0x24, 0x29, 0x43,
1968 0x41,
1969 0x0e, 0x25, 0x50,
1970 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1971 0x0e, 0x25, 0x50,
1972 0x0f, 0x42,
1973 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1974 0x43
1975 };
1976
1977
1978 static const int32_t from_iso_2022_krOffs [] ={
1979 -1,-1,-1,-1,
1980 0,
1981 1,1,1,
1982 2,2,2,2,2,2,2,
1983 3,3,3,
1984 4,4,
1985 5,5,5,5,5,5,
1986 6,
1987 };
1988 /* HZ encoding */
1989 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1990
1991 static const uint8_t to_hz[]={
1992 0x7e, 0x7d, 0x41,
1993 0x7e, 0x7b, 0x26, 0x30,
1994 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1995 0x7e, 0x7b, 0x26, 0x30,
1996 0x7e, 0x7d, 0x42,
1997
1998 };
1999 static const int32_t from_hzOffs [] ={
2000 0,0,0,
2001 1,1,1,1,
2002 2,2,2,2,2,2,2,2,
2003 3,3,3,3,
2004 4,4,4
2005 };
2006
2007 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2008 static const uint8_t to_hz2[]={
2009 0x7e, 0x7d, 0x41,
2010 0x7e, 0x7b, 0x26, 0x30,
2011 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2012 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2013 0x7e, 0x7b, 0x26, 0x30,
2014 0x7e, 0x7d, 0x42,
2015 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2016 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2017 0x43
2018 };
2019 static const int32_t from_hzOffs2 [] ={
2020 0,0,0,
2021 1,1,1,1,
2022 2,2,2,2,2,2,2,2,
2023 2,2,2,2,2,2,
2024 4,4,4,4,
2025 5,5,5,
2026 6,6,6,6,6,6,
2027 6,6,6,6,6,6,
2028 8,
2029 };
2030
2031 /*ISCII*/
2032 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2033 static const uint8_t to_iscii[]={
2034 0x41,
2035 0xef, 0x42, 0xa1,
2036 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2037 0xa2,
2038 0x42,
2039 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2040 0x43
2041 };
2042
2043
2044 static const int32_t from_isciiOffs [] ={
2045 0,
2046 1,1,1,
2047 2,2,2,2,2,2,
2048 3,
2049 4,
2050 5,5,5,5,5,5,
2051 6,
2052 };
2053
2054 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2055 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2056 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2057 log_err("u-> ibm-943 with subst with value did not match.\n");
2058
2059 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2060 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2061 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2062 log_err("u-> euc-jp with subst with value did not match.\n");
2063
2064 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2065 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2066 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2067 log_err("u-> euc-tw with subst with value did not match.\n");
2068
2069 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2070 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2071 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2072 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2073
2074 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2075 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2076 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2077 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2078
2079 if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2080 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2081 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2082 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2083 /*ESCAPE OPTIONS*/
2084 {
2085 /* surrogate pair*/
2086 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2087 static const uint8_t to_iso_2022_jp3_v2[]={
2088 0x1b, 0x24, 0x42, 0x21, 0x21,
2089 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2090
2091 0x1b, 0x24, 0x42, 0x21, 0x22,
2092 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2093
2094 0x42,
2095 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2096 };
2097
2098 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2099 0,0,0,0,0,
2100 1,1,1,1,1,1,1,1,1,1,1,1,
2101
2102 3,3,3,3,3,
2103 4,4,4,4,4,4,4,4,4,4,4,4,
2104
2105 6,
2106 7,7,7,7,7,7,7,7,7
2107 };
2108
2109 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2110 to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2111 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2112 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2113 }
2114 {
2115 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2116 static const uint8_t to_iso_2022_cn5_v2[]={
2117 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2118 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2119 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2120 0x0e, 0x21, 0x22,
2121 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2122 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2123 0x42,
2124 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2125 };
2126 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2127 0,0,0,0,0,0,0,
2128 1,1,1,1,1,1,1,
2129 1,1,1,1,1,1,
2130 3,3,3,
2131 4,4,4,4,4,4,4,
2132 4,4,4,4,4,4,
2133 6,
2134 7,7,7,7,7,7
2135 };
2136 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2137 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2138 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2139 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2140
2141 }
2142 {
2143 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2144 static const uint8_t to_iso_2022_cn6_v2[]={
2145 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2146 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2147 0x0e, 0x21, 0x22,
2148 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2149 0x42,
2150 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2151 };
2152 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2153 0, 0, 0, 0, 0, 0, 0,
2154 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2155 3, 3, 3,
2156 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2157 6,
2158 7, 7, 7, 7, 7, 7, 7, 7,
2159 };
2160 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2161 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2162 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2163 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2164
2165 }
2166 {
2167 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2168 static const uint8_t to_iso_2022_cn7_v2[]={
2169 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2170 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2171 0x0e, 0x21, 0x22,
2172 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2173 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2174 };
2175 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2176 0, 0, 0, 0, 0, 0, 0,
2177 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2178 3, 3, 3,
2179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2180 6,
2181 7, 7, 7, 7, 7, 7,
2182 };
2183 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2184 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2185 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2186 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2187
2188 }
2189 {
2190 static const UChar iso_2022_cn_inputText8[]={
2191 0x3000,
2192 0xD84D, 0xDC56,
2193 0x3001,
2194 0xD84D, 0xDC56,
2195 0xDBFF, 0xDFFF,
2196 0x0042,
2197 0x0902};
2198 static const uint8_t to_iso_2022_cn8_v2[]={
2199 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2200 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2201 0x0e, 0x21, 0x22,
2202 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2203 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2204 0x42,
2205 0x5c, 0x39, 0x30, 0x32, 0x20
2206 };
2207 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2208 0, 0, 0, 0, 0, 0, 0,
2209 1, 1, 1, 1, 1, 1, 1, 1,
2210 3, 3, 3,
2211 4, 4, 4, 4, 4, 4, 4, 4,
2212 6, 6, 6, 6, 6, 6, 6, 6,
2213 8,
2214 9, 9, 9, 9, 9
2215 };
2216 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2217 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2218 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2219 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2220
2221 }
2222 {
2223 static const uint8_t to_iso_2022_cn4_v3[]={
2224 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2225 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2226 0x0e, 0x21, 0x22,
2227 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2228 0x42
2229 };
2230
2231
2232 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2233 0,0,0,0,0,0,0,
2234 1,1,1,1,1,1,1,1,1,1,1,
2235
2236 3,3,3,
2237 4,4,4,4,4,4,4,4,4,4,4,
2238
2239 6
2240
2241 };
2242 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2243 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2244 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2245 {
2246 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2247 }
2248 }
2249 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2250 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2251 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2252 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2253
2254 if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2255 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2256 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2257 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2258 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2259 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2260 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2261 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2262 if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2263 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2264 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2265 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2266 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2267 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2268 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2269 log_err("u-> hz with subst with value did not match.\n");
2270 if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2271 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2272 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2273 log_err("u-> hz with subst with value did not match.\n");
2274
2275 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2276 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2277 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2278 log_err("u-> iscii with subst with value did not match.\n");
2279 }
2280 #endif
2281
2282 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2283 /*to Unicode*/
2284 {
2285 #if !UCONFIG_NO_LEGACY_CONVERSION
2286 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2287 0x81, 0xad, /*unassigned*/
2288 0x89, 0xd3 };
2289 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2290 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2291 0x7B87};
2292 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2293
2294 /* EUC_JP*/
2295 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2296 0x8f, 0xda, 0xa1, /*unassigned*/
2297 0x8e, 0xe0,
2298 };
2299 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2300 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2301 0x00a2 };
2302 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2303 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2304 9,
2305 };
2306
2307 /*EUC_TW*/
2308 static const uint8_t sampleTxt_euc_tw[]={
2309 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2310 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2311 0xe6, 0xca, 0x8a,
2312 };
2313 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2314 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2315 0x8706, 0x8a, };
2316 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2317 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2318 11, 13};
2319
2320 /*iso-2022-jp*/
2321 static const uint8_t sampleTxt_iso_2022_jp[]={
2322 0x1b, 0x28, 0x42, 0x41,
2323 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2324 0x1b, 0x28, 0x42, 0x42,
2325
2326 };
2327 /* A % X 3 A % X 1 A B */
2328 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2329 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2330
2331 /*iso-2022-cn*/
2332 static const uint8_t sampleTxt_iso_2022_cn[]={
2333 0x0f, 0x41, 0x44,
2334 0x1B, 0x24, 0x29, 0x47,
2335 0x0E, 0x40, 0x6c, /*unassigned*/
2336 0x0f, 0x42,
2337
2338 };
2339 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2340 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2341
2342 /*iso-2022-kr*/
2343 static const uint8_t sampleTxt_iso_2022_kr[]={
2344 0x1b, 0x24, 0x29, 0x43,
2345 0x41,
2346 0x0E, 0x7f, 0x1E,
2347 0x0e, 0x25, 0x50,
2348 0x0f, 0x51,
2349 0x42, 0x43,
2350
2351 };
2352 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2353 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2354
2355 /*hz*/
2356 static const uint8_t sampleTxt_hz[]={
2357 0x41,
2358 0x7e, 0x7b, 0x26, 0x30,
2359 0x7f, 0x1E, /*unassigned*/
2360 0x26, 0x30,
2361 0x7e, 0x7d, 0x42,
2362 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2363 0x7e, 0x7d, 0x42,
2364 };
2365 static const UChar hztoUnicode[]={
2366 0x41,
2367 0x03a0,
2368 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2369 0x03A0,
2370 0x42,
2371 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2372 0x42,};
2373
2374 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2375
2376
2377 /*iscii*/
2378 static const uint8_t sampleTxt_iscii[]={
2379 0x41,
2380 0x30,
2381 0xEB, /*unassigned*/
2382 0xa3,
2383 0x42,
2384 0xEC, /*unassigned*/
2385 0x42,
2386 };
2387 static const UChar isciitoUnicode[]={
2388 0x41,
2389 0x30,
2390 0x25, 0x58, 0x45, 0x42,
2391 0x0903,
2392 0x42,
2393 0x25, 0x58, 0x45, 0x43,
2394 0x42,};
2395
2396 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2397 #endif
2398
2399 /*UTF8*/
2400 static const uint8_t sampleTxtUTF8[]={
2401 0x20, 0x64, 0x50,
2402 0xC2, 0x7E, /* truncated char */
2403 0x20,
2404 0xE0, 0xB5, 0x7E, /* truncated char */
2405 0x40,
2406 };
2407 static const UChar UTF8ToUnicode[]={
2408 0x0020, 0x0064, 0x0050,
2409 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2410 0x0020,
2411 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2412 0x0040
2413 };
2414 static const int32_t fromUTF8[] = {
2415 0, 1, 2,
2416 3, 3, 3, 3, 4,
2417 5,
2418 6, 6, 6, 6, 6, 6, 6, 6, 8,
2419 9
2420 };
2421 static const UChar UTF8ToUnicodeXML_DEC[]={
2422 0x0020, 0x0064, 0x0050,
2423 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2424 0x0020,
2425 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2426 0x0040
2427 };
2428 static const int32_t fromUTF8XML_DEC[] = {
2429 0, 1, 2,
2430 3, 3, 3, 3, 3, 3, 4,
2431 5,
2432 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2433 9
2434 };
2435
2436
2437 #if !UCONFIG_NO_LEGACY_CONVERSION
2438 if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2439 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2440 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2441 log_err("ibm-943->u with substitute with value did not match.\n");
2442
2443 if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2444 EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2445 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2446 log_err("euc-jp->u with substitute with value did not match.\n");
2447
2448 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2449 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2450 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2451 log_err("euc-tw->u with substitute with value did not match.\n");
2452
2453 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2454 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2455 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2456 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2457
2458 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2459 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2460 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2461 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2462
2463 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2464 {
2465 static const UChar iso_2022_jptoUnicodeDec[]={
2466 0x0041,
2467 /* & # 5 8 ; */
2468 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2469 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2470 0x0042 };
2471 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2472 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2473 iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2474 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2475 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2476 }
2477 {
2478 static const UChar iso_2022_jptoUnicodeHex[]={
2479 0x0041,
2480 /* & # x 3 A ; */
2481 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2482 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2483 0x0042 };
2484 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2485 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2486 iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2487 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2488 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2489 }
2490 {
2491 static const UChar iso_2022_jptoUnicodeC[]={
2492 0x0041,
2493 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2494 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2495 0x0042 };
2496 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2497 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2498 iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2499 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2500 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2501 }
2502 }
2503 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2504 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2505 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2506 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2507
2508 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2509 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2510 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2511 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2512
2513 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2514 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2515 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2516 log_err("hz->u with substitute with value did not match.\n");
2517
2518 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2519 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2520 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2521 log_err("ISCII ->u with substitute with value did not match.\n");
2522 #endif
2523
2524 if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2525 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2526 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2527 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2528 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2529 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2530 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2531 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2532 }
2533 }
2534
2535 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2536 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2537 {
2538 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2539 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2540 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2541
2542
2543 static const uint8_t text943[] = {
2544 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2545 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2546 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2547 static const UChar toUnicode943stop[]= { 0x304b};
2548
2549 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2550 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2551 static const int32_t fromIBM943Offsstop[] = { 0};
2552
2553 gInBufferSize = inputsize;
2554 gOutBufferSize = outputsize;
2555 /*checking with a legal value*/
2556 if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2557 templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2558 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2559 log_err("u-> ibm-949 with skip did not match.\n");
2560
2561 /*checking illegal value for ibm-943 with substitute*/
2562 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2563 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2564 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2565 log_err("ibm-943->u with subst did not match.\n");
2566 /*checking illegal value for ibm-943 with skip */
2567 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2568 toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2569 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2570 log_err("ibm-943->u with skip did not match.\n");
2571
2572 /*checking illegal value for ibm-943 with stop */
2573 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2574 toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2575 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2576 log_err("ibm-943->u with stop did not match.\n");
2577
2578 }
2579
TestSingleByte(int32_t inputsize,int32_t outputsize)2580 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2581 {
2582 static const uint8_t sampleText[] = {
2583 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2584 0xff, 0x32, 0x33};
2585 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2586 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2587 /*checking illegal value for ibm-943 with substitute*/
2588 gInBufferSize = inputsize;
2589 gOutBufferSize = outputsize;
2590
2591 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2592 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2593 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2594 log_err("ibm-943->u with subst did not match.\n");
2595 }
2596
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2597 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2598 {
2599 /*EBCDIC_STATEFUL*/
2600 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2601 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2602 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2603 /* s SO doubl SI sng s SO fe fe SI s */
2604
2605 /*EBCDIC_STATEFUL with subChar=3f*/
2606 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2607 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2608 static const char mySubChar[]={ 0x3f};
2609
2610 gInBufferSize = inputsize;
2611 gOutBufferSize = outputsize;
2612
2613 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2614 toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2615 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2616 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2617
2618 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2619 toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2620 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2621 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2622 }
2623 #endif
2624
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2625 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2626 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2627 const char *mySubChar, int8_t len)
2628 {
2629
2630
2631 UErrorCode status = U_ZERO_ERROR;
2632 UConverter *conv = 0;
2633 char junkout[NEW_MAX_BUFFER]; /* FIX */
2634 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2635 const UChar *src;
2636 char *end;
2637 char *targ;
2638 int32_t *offs;
2639 int i;
2640 int32_t realBufferSize;
2641 char *realBufferEnd;
2642 const UChar *realSourceEnd;
2643 const UChar *sourceLimit;
2644 UBool checkOffsets = TRUE;
2645 UBool doFlush;
2646 char junk[9999];
2647 char offset_str[9999];
2648 char *p;
2649 UConverterFromUCallback oldAction = NULL;
2650 const void* oldContext = NULL;
2651
2652
2653 for(i=0;i<NEW_MAX_BUFFER;i++)
2654 junkout[i] = (char)0xF0;
2655 for(i=0;i<NEW_MAX_BUFFER;i++)
2656 junokout[i] = 0xFF;
2657 setNuConvTestName(codepage, "FROM");
2658
2659 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2660 gOutBufferSize);
2661
2662 conv = ucnv_open(codepage, &status);
2663 if(U_FAILURE(status))
2664 {
2665 log_data_err("Couldn't open converter %s\n",codepage);
2666 return TRUE;
2667 }
2668
2669 log_verbose("Converter opened..\n");
2670
2671 /*----setting the callback routine----*/
2672 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2673 if (U_FAILURE(status))
2674 {
2675 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2676 }
2677 /*------------------------*/
2678 /*setting the subChar*/
2679 if(mySubChar != NULL){
2680 ucnv_setSubstChars(conv, mySubChar, len, &status);
2681 if (U_FAILURE(status)) {
2682 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2683 }
2684 }
2685 /*------------*/
2686
2687 src = source;
2688 targ = junkout;
2689 offs = junokout;
2690
2691 realBufferSize = UPRV_LENGTHOF(junkout);
2692 realBufferEnd = junkout + realBufferSize;
2693 realSourceEnd = source + sourceLen;
2694
2695 if ( gOutBufferSize != realBufferSize )
2696 checkOffsets = FALSE;
2697
2698 if( gInBufferSize != NEW_MAX_BUFFER )
2699 checkOffsets = FALSE;
2700
2701 do
2702 {
2703 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2704 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2705
2706 doFlush = (UBool)(sourceLimit == realSourceEnd);
2707
2708 if(targ == realBufferEnd)
2709 {
2710 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2711 return FALSE;
2712 }
2713 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
2714
2715
2716 status = U_ZERO_ERROR;
2717
2718 ucnv_fromUnicode (conv,
2719 (char **)&targ,
2720 (const char *)end,
2721 &src,
2722 sourceLimit,
2723 checkOffsets ? offs : NULL,
2724 doFlush, /* flush if we're at the end of the input data */
2725 &status);
2726 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2727
2728
2729 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2730 UChar errChars[50]; /* should be sufficient */
2731 int8_t errLen = 50;
2732 UErrorCode err = U_ZERO_ERROR;
2733 const UChar* start= NULL;
2734 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2735 if(U_FAILURE(err)){
2736 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2737 }
2738 /* length of in invalid chars should be equal to returned length*/
2739 start = src - errLen;
2740 if(u_strncmp(errChars,start,errLen)!=0){
2741 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2742 }
2743 }
2744 /* allow failure codes for the stop callback */
2745 if(U_FAILURE(status) &&
2746 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2747 {
2748 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2749 return FALSE;
2750 }
2751
2752 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2753 sourceLen, targ-junkout);
2754 if(getTestOption(VERBOSITY_OPTION))
2755 {
2756
2757 junk[0] = 0;
2758 offset_str[0] = 0;
2759 for(p = junkout;p<targ;p++)
2760 {
2761 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2762 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2763 }
2764
2765 log_verbose(junk);
2766 printSeq(expect, expectLen);
2767 if ( checkOffsets )
2768 {
2769 log_verbose("\nOffsets:");
2770 log_verbose(offset_str);
2771 }
2772 log_verbose("\n");
2773 }
2774 ucnv_close(conv);
2775
2776
2777 if(expectLen != targ-junkout)
2778 {
2779 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2780 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2781 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2782 printSeqErr(expect, expectLen);
2783 return FALSE;
2784 }
2785
2786 if (checkOffsets && (expectOffsets != 0) )
2787 {
2788 log_verbose("comparing %d offsets..\n", targ-junkout);
2789 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2790 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2791 log_err("Got Output : ");
2792 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2793 log_err("Got Offsets: ");
2794 for(p=junkout;p<targ;p++)
2795 log_err("%d,", junokout[p-junkout]);
2796 log_err("\n");
2797 log_err("Expected Offsets: ");
2798 for(i=0; i<(targ-junkout); i++)
2799 log_err("%d,", expectOffsets[i]);
2800 log_err("\n");
2801 return FALSE;
2802 }
2803 }
2804
2805 if(!memcmp(junkout, expect, expectLen))
2806 {
2807 log_verbose("String matches! %s\n", gNuConvTestName);
2808 return TRUE;
2809 }
2810 else
2811 {
2812 log_err("String does not match. %s\n", gNuConvTestName);
2813 log_err("source: ");
2814 printUSeqErr(source, sourceLen);
2815 log_err("Got: ");
2816 printSeqErr((const uint8_t *)junkout, expectLen);
2817 log_err("Expected: ");
2818 printSeqErr(expect, expectLen);
2819 return FALSE;
2820 }
2821 }
2822
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2823 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2824 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2825 const char *mySubChar, int8_t len)
2826 {
2827 UErrorCode status = U_ZERO_ERROR;
2828 UConverter *conv = 0;
2829 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2830 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2831 const char *src;
2832 const char *realSourceEnd;
2833 const char *srcLimit;
2834 UChar *targ;
2835 UChar *end;
2836 int32_t *offs;
2837 int i;
2838 UBool checkOffsets = TRUE;
2839 char junk[9999];
2840 char offset_str[9999];
2841 UChar *p;
2842 UConverterToUCallback oldAction = NULL;
2843 const void* oldContext = NULL;
2844
2845 int32_t realBufferSize;
2846 UChar *realBufferEnd;
2847
2848
2849 for(i=0;i<NEW_MAX_BUFFER;i++)
2850 junkout[i] = 0xFFFE;
2851
2852 for(i=0;i<NEW_MAX_BUFFER;i++)
2853 junokout[i] = -1;
2854
2855 setNuConvTestName(codepage, "TO");
2856
2857 log_verbose("\n========= %s\n", gNuConvTestName);
2858
2859 conv = ucnv_open(codepage, &status);
2860 if(U_FAILURE(status))
2861 {
2862 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2863 return TRUE;
2864 }
2865
2866 log_verbose("Converter opened..\n");
2867
2868 src = (const char *)source;
2869 targ = junkout;
2870 offs = junokout;
2871
2872 realBufferSize = UPRV_LENGTHOF(junkout);
2873 realBufferEnd = junkout + realBufferSize;
2874 realSourceEnd = src + sourcelen;
2875 /*----setting the callback routine----*/
2876 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2877 if (U_FAILURE(status))
2878 {
2879 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2880 }
2881 /*-------------------------------------*/
2882 /*setting the subChar*/
2883 if(mySubChar != NULL){
2884 ucnv_setSubstChars(conv, mySubChar, len, &status);
2885 if (U_FAILURE(status)) {
2886 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2887 }
2888 }
2889 /*------------*/
2890
2891
2892 if ( gOutBufferSize != realBufferSize )
2893 checkOffsets = FALSE;
2894
2895 if( gInBufferSize != NEW_MAX_BUFFER )
2896 checkOffsets = FALSE;
2897
2898 do
2899 {
2900 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2901 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2902
2903 if(targ == realBufferEnd)
2904 {
2905 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2906 return FALSE;
2907 }
2908 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2909
2910
2911
2912 status = U_ZERO_ERROR;
2913
2914 ucnv_toUnicode (conv,
2915 &targ,
2916 end,
2917 (const char **)&src,
2918 (const char *)srcLimit,
2919 checkOffsets ? offs : NULL,
2920 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2921 &status);
2922 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2923
2924 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2925 char errChars[50]; /* should be sufficient */
2926 int8_t errLen = 50;
2927 UErrorCode err = U_ZERO_ERROR;
2928 const char* start= NULL;
2929 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2930 if(U_FAILURE(err)){
2931 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2932 }
2933 /* length of in invalid chars should be equal to returned length*/
2934 start = src - errLen;
2935 if(uprv_strncmp(errChars,start,errLen)!=0){
2936 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2937 }
2938 }
2939 /* allow failure codes for the stop callback */
2940 if(U_FAILURE(status) &&
2941 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2942 {
2943 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2944 return FALSE;
2945 }
2946
2947 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2948 sourcelen, targ-junkout);
2949 if(getTestOption(VERBOSITY_OPTION))
2950 {
2951
2952 junk[0] = 0;
2953 offset_str[0] = 0;
2954
2955 for(p = junkout;p<targ;p++)
2956 {
2957 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2958 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2959 }
2960
2961 log_verbose(junk);
2962 printUSeq(expect, expectlen);
2963 if ( checkOffsets )
2964 {
2965 log_verbose("\nOffsets:");
2966 log_verbose(offset_str);
2967 }
2968 log_verbose("\n");
2969 }
2970 ucnv_close(conv);
2971
2972 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2973
2974 if (checkOffsets && (expectOffsets != 0))
2975 {
2976 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2977 {
2978 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2979 log_err("Got offsets: ");
2980 for(p=junkout;p<targ;p++)
2981 log_err(" %2d,", junokout[p-junkout]);
2982 log_err("\n");
2983 log_err("Expected offsets: ");
2984 for(i=0; i<(targ-junkout); i++)
2985 log_err(" %2d,", expectOffsets[i]);
2986 log_err("\n");
2987 log_err("Got output: ");
2988 for(i=0; i<(targ-junkout); i++)
2989 log_err("0x%04x,", junkout[i]);
2990 log_err("\n");
2991 log_err("From source: ");
2992 for(i=0; i<(src-(const char *)source); i++)
2993 log_err(" 0x%02x,", (unsigned char)source[i]);
2994 log_err("\n");
2995 }
2996 }
2997
2998 if(!memcmp(junkout, expect, expectlen*2))
2999 {
3000 log_verbose("Matches!\n");
3001 return TRUE;
3002 }
3003 else
3004 {
3005 log_err("String does not match. %s\n", gNuConvTestName);
3006 log_verbose("String does not match. %s\n", gNuConvTestName);
3007 log_err("Got: ");
3008 printUSeqErr(junkout, expectlen);
3009 log_err("Expected: ");
3010 printUSeqErr(expect, expectlen);
3011 log_err("\n");
3012 return FALSE;
3013 }
3014 }
3015
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3016 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3017 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3018 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3019 {
3020
3021
3022 UErrorCode status = U_ZERO_ERROR;
3023 UConverter *conv = 0;
3024 char junkout[NEW_MAX_BUFFER]; /* FIX */
3025 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3026 const UChar *src;
3027 char *end;
3028 char *targ;
3029 int32_t *offs;
3030 int i;
3031 int32_t realBufferSize;
3032 char *realBufferEnd;
3033 const UChar *realSourceEnd;
3034 const UChar *sourceLimit;
3035 UBool checkOffsets = TRUE;
3036 UBool doFlush;
3037 char junk[9999];
3038 char offset_str[9999];
3039 char *p;
3040 UConverterFromUCallback oldAction = NULL;
3041 const void* oldContext = NULL;
3042
3043
3044 for(i=0;i<NEW_MAX_BUFFER;i++)
3045 junkout[i] = (char)0xF0;
3046 for(i=0;i<NEW_MAX_BUFFER;i++)
3047 junokout[i] = 0xFF;
3048 setNuConvTestName(codepage, "FROM");
3049
3050 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3051 gOutBufferSize);
3052
3053 conv = ucnv_open(codepage, &status);
3054 if(U_FAILURE(status))
3055 {
3056 log_data_err("Couldn't open converter %s\n",codepage);
3057 return TRUE; /* Because the err has already been logged. */
3058 }
3059
3060 log_verbose("Converter opened..\n");
3061
3062 /*----setting the callback routine----*/
3063 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3064 if (U_FAILURE(status))
3065 {
3066 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3067 }
3068 /*------------------------*/
3069 /*setting the subChar*/
3070 if(mySubChar != NULL){
3071 ucnv_setSubstChars(conv, mySubChar, len, &status);
3072 if (U_FAILURE(status)) {
3073 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3074 }
3075 }
3076 /*------------*/
3077
3078 src = source;
3079 targ = junkout;
3080 offs = junokout;
3081
3082 realBufferSize = UPRV_LENGTHOF(junkout);
3083 realBufferEnd = junkout + realBufferSize;
3084 realSourceEnd = source + sourceLen;
3085
3086 if ( gOutBufferSize != realBufferSize )
3087 checkOffsets = FALSE;
3088
3089 if( gInBufferSize != NEW_MAX_BUFFER )
3090 checkOffsets = FALSE;
3091
3092 do
3093 {
3094 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3095 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3096
3097 doFlush = (UBool)(sourceLimit == realSourceEnd);
3098
3099 if(targ == realBufferEnd)
3100 {
3101 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3102 return FALSE;
3103 }
3104 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
3105
3106
3107 status = U_ZERO_ERROR;
3108
3109 ucnv_fromUnicode (conv,
3110 (char **)&targ,
3111 (const char *)end,
3112 &src,
3113 sourceLimit,
3114 checkOffsets ? offs : NULL,
3115 doFlush, /* flush if we're at the end of the input data */
3116 &status);
3117 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3118
3119 /* allow failure codes for the stop callback */
3120 if(U_FAILURE(status) && status != expectedError)
3121 {
3122 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3123 return FALSE;
3124 }
3125
3126 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3127 sourceLen, targ-junkout);
3128 if(getTestOption(VERBOSITY_OPTION))
3129 {
3130
3131 junk[0] = 0;
3132 offset_str[0] = 0;
3133 for(p = junkout;p<targ;p++)
3134 {
3135 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3136 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3137 }
3138
3139 log_verbose(junk);
3140 printSeq(expect, expectLen);
3141 if ( checkOffsets )
3142 {
3143 log_verbose("\nOffsets:");
3144 log_verbose(offset_str);
3145 }
3146 log_verbose("\n");
3147 }
3148 ucnv_close(conv);
3149
3150
3151 if(expectLen != targ-junkout)
3152 {
3153 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3154 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3155 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3156 printSeqErr(expect, expectLen);
3157 return FALSE;
3158 }
3159
3160 if (checkOffsets && (expectOffsets != 0) )
3161 {
3162 log_verbose("comparing %d offsets..\n", targ-junkout);
3163 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3164 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3165 log_err("Got Output : ");
3166 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3167 log_err("Got Offsets: ");
3168 for(p=junkout;p<targ;p++)
3169 log_err("%d,", junokout[p-junkout]);
3170 log_err("\n");
3171 log_err("Expected Offsets: ");
3172 for(i=0; i<(targ-junkout); i++)
3173 log_err("%d,", expectOffsets[i]);
3174 log_err("\n");
3175 return FALSE;
3176 }
3177 }
3178
3179 if(!memcmp(junkout, expect, expectLen))
3180 {
3181 log_verbose("String matches! %s\n", gNuConvTestName);
3182 return TRUE;
3183 }
3184 else
3185 {
3186 log_err("String does not match. %s\n", gNuConvTestName);
3187 log_err("source: ");
3188 printUSeqErr(source, sourceLen);
3189 log_err("Got: ");
3190 printSeqErr((const uint8_t *)junkout, expectLen);
3191 log_err("Expected: ");
3192 printSeqErr(expect, expectLen);
3193 return FALSE;
3194 }
3195 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3196 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3197 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3198 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3199 {
3200 UErrorCode status = U_ZERO_ERROR;
3201 UConverter *conv = 0;
3202 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3203 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3204 const char *src;
3205 const char *realSourceEnd;
3206 const char *srcLimit;
3207 UChar *targ;
3208 UChar *end;
3209 int32_t *offs;
3210 int i;
3211 UBool checkOffsets = TRUE;
3212 char junk[9999];
3213 char offset_str[9999];
3214 UChar *p;
3215 UConverterToUCallback oldAction = NULL;
3216 const void* oldContext = NULL;
3217
3218 int32_t realBufferSize;
3219 UChar *realBufferEnd;
3220
3221
3222 for(i=0;i<NEW_MAX_BUFFER;i++)
3223 junkout[i] = 0xFFFE;
3224
3225 for(i=0;i<NEW_MAX_BUFFER;i++)
3226 junokout[i] = -1;
3227
3228 setNuConvTestName(codepage, "TO");
3229
3230 log_verbose("\n========= %s\n", gNuConvTestName);
3231
3232 conv = ucnv_open(codepage, &status);
3233 if(U_FAILURE(status))
3234 {
3235 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3236 return TRUE;
3237 }
3238
3239 log_verbose("Converter opened..\n");
3240
3241 src = (const char *)source;
3242 targ = junkout;
3243 offs = junokout;
3244
3245 realBufferSize = UPRV_LENGTHOF(junkout);
3246 realBufferEnd = junkout + realBufferSize;
3247 realSourceEnd = src + sourcelen;
3248 /*----setting the callback routine----*/
3249 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3250 if (U_FAILURE(status))
3251 {
3252 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3253 }
3254 /*-------------------------------------*/
3255 /*setting the subChar*/
3256 if(mySubChar != NULL){
3257 ucnv_setSubstChars(conv, mySubChar, len, &status);
3258 if (U_FAILURE(status)) {
3259 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3260 }
3261 }
3262 /*------------*/
3263
3264
3265 if ( gOutBufferSize != realBufferSize )
3266 checkOffsets = FALSE;
3267
3268 if( gInBufferSize != NEW_MAX_BUFFER )
3269 checkOffsets = FALSE;
3270
3271 do
3272 {
3273 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3274 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3275
3276 if(targ == realBufferEnd)
3277 {
3278 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3279 return FALSE;
3280 }
3281 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3282
3283
3284
3285 status = U_ZERO_ERROR;
3286
3287 ucnv_toUnicode (conv,
3288 &targ,
3289 end,
3290 (const char **)&src,
3291 (const char *)srcLimit,
3292 checkOffsets ? offs : NULL,
3293 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3294 &status);
3295 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3296
3297 /* allow failure codes for the stop callback */
3298 if(U_FAILURE(status) && status!=expectedError)
3299 {
3300 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3301 return FALSE;
3302 }
3303
3304 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3305 sourcelen, targ-junkout);
3306 if(getTestOption(VERBOSITY_OPTION))
3307 {
3308
3309 junk[0] = 0;
3310 offset_str[0] = 0;
3311
3312 for(p = junkout;p<targ;p++)
3313 {
3314 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3315 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3316 }
3317
3318 log_verbose(junk);
3319 printUSeq(expect, expectlen);
3320 if ( checkOffsets )
3321 {
3322 log_verbose("\nOffsets:");
3323 log_verbose(offset_str);
3324 }
3325 log_verbose("\n");
3326 }
3327 ucnv_close(conv);
3328
3329 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3330
3331 if (checkOffsets && (expectOffsets != 0))
3332 {
3333 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3334 {
3335 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3336 log_err("Got offsets: ");
3337 for(p=junkout;p<targ;p++)
3338 log_err(" %2d,", junokout[p-junkout]);
3339 log_err("\n");
3340 log_err("Expected offsets: ");
3341 for(i=0; i<(targ-junkout); i++)
3342 log_err(" %2d,", expectOffsets[i]);
3343 log_err("\n");
3344 log_err("Got output: ");
3345 for(i=0; i<(targ-junkout); i++)
3346 log_err("0x%04x,", junkout[i]);
3347 log_err("\n");
3348 log_err("From source: ");
3349 for(i=0; i<(src-(const char *)source); i++)
3350 log_err(" 0x%02x,", (unsigned char)source[i]);
3351 log_err("\n");
3352 }
3353 }
3354
3355 if(!memcmp(junkout, expect, expectlen*2))
3356 {
3357 log_verbose("Matches!\n");
3358 return TRUE;
3359 }
3360 else
3361 {
3362 log_err("String does not match. %s\n", gNuConvTestName);
3363 log_verbose("String does not match. %s\n", gNuConvTestName);
3364 log_err("Got: ");
3365 printUSeqErr(junkout, expectlen);
3366 log_err("Expected: ");
3367 printUSeqErr(expect, expectlen);
3368 log_err("\n");
3369 return FALSE;
3370 }
3371 }
3372
TestCallBackFailure(void)3373 static void TestCallBackFailure(void) {
3374 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3375 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3376 if (status != U_USELESS_COLLATOR_ERROR) {
3377 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3378 }
3379 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3380 if (status != U_USELESS_COLLATOR_ERROR) {
3381 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3382 }
3383 ucnv_cbFromUWriteSub(NULL, -1, &status);
3384 if (status != U_USELESS_COLLATOR_ERROR) {
3385 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3386 }
3387 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3388 if (status != U_USELESS_COLLATOR_ERROR) {
3389 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3390 }
3391 }
3392