1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*
9 ********************************************************************************
10 * File NCCBTST.C
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda 7/21/1999 Testing error callback routines
15 ********************************************************************************
16 */
17 #include <ctype.h>
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "cintltst.h"
28 #include "unicode/utypes.h"
29 #include "unicode/ustring.h"
30 #include "nccbtst.h"
31 #include "unicode/ucnv_cb.h"
32 #include "unicode/utf16.h"
33
34 #define NEW_MAX_BUFFER 999
35
36 #define nct_min(x,y) ((x<y) ? x : y)
37
38 static int32_t gInBufferSize = 0;
39 static int32_t gOutBufferSize = 0;
40 static char gNuConvTestName[1024];
41
printSeq(const uint8_t * a,int len)42 static void printSeq(const uint8_t* a, int len)
43 {
44 int i=0;
45 log_verbose("\n{");
46 while (i<len)
47 log_verbose("0x%02X, ", a[i++]);
48 log_verbose("}\n");
49 }
50
printUSeq(const UChar * a,int len)51 static void printUSeq(const UChar* a, int len)
52 {
53 int i=0;
54 log_verbose("{");
55 while (i<len)
56 log_verbose(" 0x%04x, ", a[i++]);
57 log_verbose("}\n");
58 }
59
printSeqErr(const uint8_t * a,int len)60 static void printSeqErr(const uint8_t* a, int len)
61 {
62 int i=0;
63 fprintf(stderr, "{");
64 while (i<len)
65 fprintf(stderr, " 0x%02x, ", a[i++]);
66 fprintf(stderr, "}\n");
67 }
68
printUSeqErr(const UChar * a,int len)69 static void printUSeqErr(const UChar* a, int len)
70 {
71 int i=0;
72 fprintf(stderr, "{");
73 while (i<len)
74 fprintf(stderr, "0x%04x, ", a[i++]);
75 fprintf(stderr,"}\n");
76 }
77
setNuConvTestName(const char * codepage,const char * direction)78 static void setNuConvTestName(const char *codepage, const char *direction)
79 {
80 sprintf(gNuConvTestName, "[testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
81 codepage,
82 direction,
83 (int)gInBufferSize,
84 (int)gOutBufferSize);
85 }
86
87
88 static void TestCallBackFailure(void);
89
90 void addTestConvertErrorCallBack(TestNode** root);
91
addTestConvertErrorCallBack(TestNode ** root)92 void addTestConvertErrorCallBack(TestNode** root)
93 {
94 addTest(root, &TestSkipCallBack, "tsconv/nccbtst/TestSkipCallBack");
95 addTest(root, &TestStopCallBack, "tsconv/nccbtst/TestStopCallBack");
96 addTest(root, &TestSubCallBack, "tsconv/nccbtst/TestSubCallBack");
97 addTest(root, &TestSubWithValueCallBack, "tsconv/nccbtst/TestSubWithValueCallBack");
98
99 #if !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root, &TestLegalAndOtherCallBack, "tsconv/nccbtst/TestLegalAndOtherCallBack");
101 addTest(root, &TestSingleByteCallBack, "tsconv/nccbtst/TestSingleByteCallBack");
102 #endif
103
104 addTest(root, &TestCallBackFailure, "tsconv/nccbtst/TestCallBackFailure");
105 }
106
TestSkipCallBack()107 static void TestSkipCallBack()
108 {
109 TestSkip(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
110 TestSkip(1,NEW_MAX_BUFFER);
111 TestSkip(1,1);
112 TestSkip(NEW_MAX_BUFFER, 1);
113 }
114
TestStopCallBack()115 static void TestStopCallBack()
116 {
117 TestStop(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
118 TestStop(1,NEW_MAX_BUFFER);
119 TestStop(1,1);
120 TestStop(NEW_MAX_BUFFER, 1);
121 }
122
TestSubCallBack()123 static void TestSubCallBack()
124 {
125 TestSub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
126 TestSub(1,NEW_MAX_BUFFER);
127 TestSub(1,1);
128 TestSub(NEW_MAX_BUFFER, 1);
129
130 #if !UCONFIG_NO_LEGACY_CONVERSION
131 TestEBCDIC_STATEFUL_Sub(1, 1);
132 TestEBCDIC_STATEFUL_Sub(1, NEW_MAX_BUFFER);
133 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, 1);
134 TestEBCDIC_STATEFUL_Sub(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
135 #endif
136 }
137
TestSubWithValueCallBack()138 static void TestSubWithValueCallBack()
139 {
140 TestSubWithValue(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
141 TestSubWithValue(1,NEW_MAX_BUFFER);
142 TestSubWithValue(1,1);
143 TestSubWithValue(NEW_MAX_BUFFER, 1);
144 }
145
146 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOtherCallBack()147 static void TestLegalAndOtherCallBack()
148 {
149 TestLegalAndOthers(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
150 TestLegalAndOthers(1,NEW_MAX_BUFFER);
151 TestLegalAndOthers(1,1);
152 TestLegalAndOthers(NEW_MAX_BUFFER, 1);
153 }
154
TestSingleByteCallBack()155 static void TestSingleByteCallBack()
156 {
157 TestSingleByte(NEW_MAX_BUFFER, NEW_MAX_BUFFER);
158 TestSingleByte(1,NEW_MAX_BUFFER);
159 TestSingleByte(1,1);
160 TestSingleByte(NEW_MAX_BUFFER, 1);
161 }
162 #endif
163
TestSkip(int32_t inputsize,int32_t outputsize)164 static void TestSkip(int32_t inputsize, int32_t outputsize)
165 {
166 static const uint8_t expskipIBM_949[]= {
167 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
168
169 static const uint8_t expskipIBM_943[] = {
170 0x9f, 0xaf, 0x9f, 0xb1, 0x89, 0x59 };
171
172 static const uint8_t expskipIBM_930[] = {
173 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x46, 0x6b, 0x0f };
174
175 gInBufferSize = inputsize;
176 gOutBufferSize = outputsize;
177
178 /*From Unicode*/
179 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SKIP \n");
180
181 #if !UCONFIG_NO_LEGACY_CONVERSION
182 {
183 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
184 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
185
186 static const int32_t toIBM949Offsskip [] = { 0, 1, 1, 2, 2, 4, 4 };
187 static const int32_t toIBM943Offsskip [] = { 0, 0, 1, 1, 3, 3 };
188
189 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
190 expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949), "ibm-949",
191 UCNV_FROM_U_CALLBACK_SKIP, toIBM949Offsskip, NULL, 0 ))
192 log_err("u-> ibm-949 with skip did not match.\n");
193 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
194 expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943), "ibm-943",
195 UCNV_FROM_U_CALLBACK_SKIP, toIBM943Offsskip, NULL, 0 ))
196 log_err("u-> ibm-943 with skip did not match.\n");
197 }
198
199 {
200 static const UChar fromU[] = { 0x61, 0xff5e, 0x62, 0x6d63, 0xff5e, 0x6d64, 0x63, 0xff5e, 0x6d66 };
201 static const uint8_t fromUBytes[] = { 0x62, 0x63, 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x64, 0x0e, 0x46, 0x6b, 0x0f };
202 static const int32_t fromUOffsets[] = { 0, 2, 3, 3, 3, 5, 5, 6, 6, 8, 8, 8, 8 };
203
204 /* test ibm-930 (EBCDIC_STATEFUL) with fallbacks that are not taken to check correct state transitions */
205 if(!testConvertFromUnicode(fromU, UPRV_LENGTHOF(fromU),
206 fromUBytes, UPRV_LENGTHOF(fromUBytes),
207 "ibm-930",
208 UCNV_FROM_U_CALLBACK_SKIP, fromUOffsets,
209 NULL, 0)
210 ) {
211 log_err("u->ibm-930 with skip with untaken fallbacks did not match.\n");
212 }
213 }
214 #endif
215
216 {
217 static const UChar usasciiFromU[] = { 0x61, 0x80, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
218 static const uint8_t usasciiFromUBytes[] = { 0x61, 0x31, 0x39 };
219 static const int32_t usasciiFromUOffsets[] = { 0, 3, 6 };
220
221 static const UChar latin1FromU[] = { 0x61, 0xa0, 0x4e00, 0x31, 0xd800, 0xdfff, 0x39 };
222 static const uint8_t latin1FromUBytes[] = { 0x61, 0xa0, 0x31, 0x39 };
223 static const int32_t latin1FromUOffsets[] = { 0, 1, 3, 6 };
224
225 /* US-ASCII */
226 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
227 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
228 "US-ASCII",
229 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
230 NULL, 0)
231 ) {
232 log_err("u->US-ASCII with skip did not match.\n");
233 }
234
235 #if !UCONFIG_NO_LEGACY_CONVERSION
236 /* SBCS NLTC codepage 367 for US-ASCII */
237 if(!testConvertFromUnicode(usasciiFromU, UPRV_LENGTHOF(usasciiFromU),
238 usasciiFromUBytes, UPRV_LENGTHOF(usasciiFromUBytes),
239 "ibm-367",
240 UCNV_FROM_U_CALLBACK_SKIP, usasciiFromUOffsets,
241 NULL, 0)
242 ) {
243 log_err("u->ibm-367 with skip did not match.\n");
244 }
245 #endif
246
247 /* ISO-Latin-1 */
248 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
249 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
250 "LATIN_1",
251 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
252 NULL, 0)
253 ) {
254 log_err("u->LATIN_1 with skip did not match.\n");
255 }
256
257 #if !UCONFIG_NO_LEGACY_CONVERSION
258 /* windows-1252 */
259 if(!testConvertFromUnicode(latin1FromU, UPRV_LENGTHOF(latin1FromU),
260 latin1FromUBytes, UPRV_LENGTHOF(latin1FromUBytes),
261 "windows-1252",
262 UCNV_FROM_U_CALLBACK_SKIP, latin1FromUOffsets,
263 NULL, 0)
264 ) {
265 log_err("u->windows-1252 with skip did not match.\n");
266 }
267 }
268
269 {
270 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
271 static const uint8_t toIBM943[]= { 0x61, 0x61 };
272 static const int32_t offset[]= {0, 4};
273
274 /* EUC_JP*/
275 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
276 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
277 0x61, 0x8e, 0xe0,
278 };
279 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 6, 7, 7};
280
281 /*EUC_TW*/
282 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
283 static const uint8_t to_euc_tw[]={
284 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
285 0x61, 0xe6, 0xca, 0x8a,
286 };
287 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 6, 7, 7, 8,};
288
289 /*ISO-2022-JP*/
290 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9/*unassigned*/,0x0042, };
291 static const uint8_t to_iso_2022_jp[]={
292 0x41,
293 0x42,
294
295 };
296 static const int32_t from_iso_2022_jpOffs [] ={0,2};
297
298 /*ISO-2022-JP*/
299 UChar const iso_2022_jp_inputText2[]={0x0041, 0x00E9/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
300 static const uint8_t to_iso_2022_jp2[]={
301 0x41,
302 0x43,
303
304 };
305 static const int32_t from_iso_2022_jpOffs2 [] ={0,2};
306
307 /*ISO-2022-cn*/
308 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
309 static const uint8_t to_iso_2022_cn[]={
310 0x41, 0x42
311 };
312 static const int32_t from_iso_2022_cnOffs [] ={
313 0, 2
314 };
315
316 /*ISO-2022-CN*/
317 static const UChar iso_2022_cn_inputText1[]={0x0041, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
318 static const uint8_t to_iso_2022_cn1[]={
319 0x41, 0x43
320
321 };
322 static const int32_t from_iso_2022_cnOffs1 [] ={ 0, 2 };
323
324 /*ISO-2022-kr*/
325 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
326 static const uint8_t to_iso_2022_kr[]={
327 0x1b, 0x24, 0x29, 0x43,
328 0x41,
329 0x0e, 0x25, 0x50,
330 0x25, 0x50,
331 0x0f, 0x42,
332 };
333 static const int32_t from_iso_2022_krOffs [] ={
334 -1,-1,-1,-1,
335 0,
336 1,1,1,
337 3,3,
338 4,4
339 };
340
341 /*ISO-2022-kr*/
342 static const UChar iso_2022_kr_inputText1[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
343 static const uint8_t to_iso_2022_kr1[]={
344 0x1b, 0x24, 0x29, 0x43,
345 0x41,
346 0x0e, 0x25, 0x50,
347 0x25, 0x50,
348
349 };
350 static const int32_t from_iso_2022_krOffs1 [] ={
351 -1,-1,-1,-1,
352 0,
353 1,1,1,
354 3,3,
355
356 };
357 /* HZ encoding */
358 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
359
360 static const uint8_t to_hz[]={
361 0x7e, 0x7d, 0x41,
362 0x7e, 0x7b, 0x26, 0x30,
363 0x26, 0x30,
364 0x7e, 0x7d, 0x42,
365
366 };
367 static const int32_t from_hzOffs [] ={
368 0,0,0,
369 1,1,1,1,
370 3,3,
371 4,4,4,4
372 };
373
374 static const UChar hz_inputText1[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0,0xd801/*illegal*/, 0x0042, };
375
376 static const uint8_t to_hz1[]={
377 0x7e, 0x7d, 0x41,
378 0x7e, 0x7b, 0x26, 0x30,
379 0x26, 0x30,
380
381
382 };
383 static const int32_t from_hzOffs1 [] ={
384 0,0,0,
385 1,1,1,1,
386 3,3,
387
388 };
389
390 #endif
391
392 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
393
394 static const uint8_t to_SCSU[]={
395 0x41,
396 0x42
397
398
399 };
400 static const int32_t from_SCSUOffs [] ={
401 0,
402 2,
403
404 };
405
406 #if !UCONFIG_NO_LEGACY_CONVERSION
407 /* ISCII */
408 static const UChar iscii_inputText[]={ 0x0041, 0x3712/*unassigned*/, 0x0042, };
409 static const uint8_t to_iscii[]={
410 0x41,
411 0x42,
412 };
413 static const int32_t from_isciiOffs [] ={
414 0,2,
415
416 };
417 /*ISCII*/
418 static const UChar iscii_inputText1[]={0x0044, 0x3712/*unassigned*/,0x43,0xd800/*illegal*/,0x0042, };
419 static const uint8_t to_iscii1[]={
420 0x44,
421 0x43,
422
423 };
424 static const int32_t from_isciiOffs1 [] ={0,2};
425
426 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
427 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
428 UCNV_FROM_U_CALLBACK_SKIP, offset, NULL, 0 ))
429 log_err("u-> ibm-943 with skip did not match.\n");
430
431 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
432 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
433 UCNV_FROM_U_CALLBACK_SKIP, fromEUC_JPOffs, NULL, 0 ))
434 log_err("u-> euc-jp with skip did not match.\n");
435
436 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
437 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
438 UCNV_FROM_U_CALLBACK_SKIP, from_euc_twOffs, NULL, 0 ))
439 log_err("u-> euc-tw with skip did not match.\n");
440
441 /*iso_2022_jp*/
442 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
443 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
444 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs, NULL, 0 ))
445 log_err("u-> iso-2022-jp with skip did not match.\n");
446
447 /* with context */
448 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
449 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
450 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_jpOffs2, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
451 log_err("u-> iso-2022-jp with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
452
453 /*iso_2022_cn*/
454 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
455 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
456 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs, NULL, 0 ))
457 log_err("u-> iso-2022-cn with skip did not match.\n");
458 /*with context*/
459 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText1, UPRV_LENGTHOF(iso_2022_cn_inputText1),
460 to_iso_2022_cn1, UPRV_LENGTHOF(to_iso_2022_cn1), "iso-2022-cn",
461 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_cnOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
462 log_err("u-> iso-2022-cn with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
463
464 /*iso_2022_kr*/
465 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
466 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
467 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs, NULL, 0 ))
468 log_err("u-> iso-2022-kr with skip did not match.\n");
469 /*with context*/
470 if(!testConvertFromUnicodeWithContext(iso_2022_kr_inputText1, UPRV_LENGTHOF(iso_2022_kr_inputText1),
471 to_iso_2022_kr1, UPRV_LENGTHOF(to_iso_2022_kr1), "iso-2022-kr",
472 UCNV_FROM_U_CALLBACK_SKIP, from_iso_2022_krOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
473 log_err("u-> iso-2022-kr with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
474
475 /*hz*/
476 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
477 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
478 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs, NULL, 0 ))
479 log_err("u-> HZ with skip did not match.\n");
480 /*with context*/
481 if(!testConvertFromUnicodeWithContext(hz_inputText1, UPRV_LENGTHOF(hz_inputText1),
482 to_hz1, UPRV_LENGTHOF(to_hz1), "hz",
483 UCNV_FROM_U_CALLBACK_SKIP, from_hzOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
484 log_err("u-> hz with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
485 #endif
486
487 /*SCSU*/
488 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
489 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
490 UCNV_FROM_U_CALLBACK_SKIP, from_SCSUOffs, NULL, 0 ))
491 log_err("u-> SCSU with skip did not match.\n");
492
493 #if !UCONFIG_NO_LEGACY_CONVERSION
494 /*ISCII*/
495 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
496 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
497 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs, NULL, 0 ))
498 log_err("u-> iscii with skip did not match.\n");
499 /*with context*/
500 if(!testConvertFromUnicodeWithContext(iscii_inputText1, UPRV_LENGTHOF(iscii_inputText1),
501 to_iscii1, UPRV_LENGTHOF(to_iscii1), "ISCII,version=0",
502 UCNV_FROM_U_CALLBACK_SKIP, from_isciiOffs1, NULL, 0,UCNV_SKIP_STOP_ON_ILLEGAL,U_ILLEGAL_CHAR_FOUND ))
503 log_err("u-> iscii with skip & UCNV_SKIP_STOP_ON_ILLEGAL did not match.\n");
504 #endif
505 }
506
507 log_verbose("Testing fromUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
508 {
509 static const uint8_t sampleText[]={ /* from cintltst/bocu1tst.c/TestBOCU1 text 1 */
510 0xFB, 0xEE, 0x28, /* from source offset 0 */
511 0x24, 0x1E, 0x52,
512 0xB2,
513 0x20,
514 0xB3,
515 0xB1,
516 0x0D,
517 0x0A,
518
519 0x20, /* from 8 */
520 0x00,
521 0xD0, 0x6C,
522 0xB6,
523 0xD8, 0xA5,
524 0x20,
525 0x68,
526 0x59,
527
528 0xF9, 0x28, /* from 16 */
529 0x6D,
530 0x20,
531 0x73,
532 0xE0, 0x2D,
533 0xDE, 0x43,
534 0xD0, 0x33,
535 0x20,
536
537 0xFA, 0x83, /* from 24 */
538 0x25, 0x01,
539 0xFB, 0x16, 0x87,
540 0x4B, 0x16,
541 0x20,
542 0xE6, 0xBD,
543 0xEB, 0x5B,
544 0x4B, 0xCC,
545
546 0xF9, 0xA2, /* from 32 */
547 0xFC, 0x10, 0x3E,
548 0xFE, 0x16, 0x3A, 0x8C,
549 0x20,
550 0xFC, 0x03, 0xAC,
551
552 0x01, /* from 41 */
553 0xDE, 0x83,
554 0x20,
555 0x09
556 };
557 static const UChar expected[]={
558 0xFEFF, 0x0061, 0x0062, 0x0020, /* 0 */
559 0x0063, 0x0061, 0x000D, 0x000A,
560
561 0x0020, 0x0000, 0x00DF, 0x00E6, /* 8 */
562 0x0930, 0x0020, 0x0918, 0x0909,
563
564 0x3086, 0x304D, 0x0020, 0x3053, /* 16 */
565 0x4000, 0x4E00, 0x7777, 0x0020,
566
567 0x9FA5, 0x4E00, 0xAC00, 0xBCDE, /* 24 */
568 0x0020, 0xD7A3, 0xDC00, 0xD800,
569
570 0xD800, 0xDC00, 0xD845, 0xDDDD, /* 32 */
571 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
572
573 0xDFFF, 0x0001, 0x0E40, 0x0020, /* 40 */
574 0x0009
575 };
576 static const int32_t offsets[]={
577 0, 0, 0, 1, 1, 1, 2, 3, 4, 5, 6, 7,
578 8, 9, 10, 10, 11, 12, 12, 13, 14, 15,
579 16, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23,
580 24, 24, 25, 25, 26, 26, 26, 27, 27, 28, 29, 29, 30, 30, 31, 31,
581 32, 32, 34, 34, 34, 36, 36, 36, 36, 38, 39, 39, 39,
582 41, 42, 42, 43, 44
583 };
584
585 /* BOCU-1 fromUnicode never calls callbacks, so this only tests single-byte and offsets behavior */
586 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
587 sampleText, UPRV_LENGTHOF(sampleText),
588 "BOCU-1",
589 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
590 ) {
591 log_err("u->BOCU-1 with skip did not match.\n");
592 }
593 }
594
595 log_verbose("Testing fromUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
596 {
597 const uint8_t sampleText[]={
598 0x61, /* 'a' */
599 0xc4, 0xb5, /* U+0135 */
600 0xed, 0x80, 0xa0, /* Hangul U+d020 */
601 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* surrogate pair for U+10401 */
602 0xee, 0x80, 0x80, /* PUA U+e000 */
603 0xed, 0xb0, 0x81, /* unpaired trail surrogate U+dc01 */
604 0x62, /* 'b' */
605 0xed, 0xa0, 0x81, /* unpaired lead surrogate U+d801 */
606 0xd0, 0x80 /* U+0400 */
607 };
608 UChar expected[]={
609 0x0061,
610 0x0135,
611 0xd020,
612 0xd801, 0xdc01,
613 0xe000,
614 0xdc01,
615 0x0062,
616 0xd801,
617 0x0400
618 };
619 int32_t offsets[]={
620 0,
621 1, 1,
622 2, 2, 2,
623 3, 3, 3, 4, 4, 4,
624 5, 5, 5,
625 6, 6, 6,
626 7,
627 8, 8, 8,
628 9, 9
629 };
630
631 /* CESU-8 fromUnicode never calls callbacks, so this only tests conversion and offsets behavior */
632
633 /* without offsets */
634 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
635 sampleText, UPRV_LENGTHOF(sampleText),
636 "CESU-8",
637 UCNV_FROM_U_CALLBACK_SKIP, NULL, NULL, 0)
638 ) {
639 log_err("u->CESU-8 with skip did not match.\n");
640 }
641
642 /* with offsets */
643 if(!testConvertFromUnicode(expected, UPRV_LENGTHOF(expected),
644 sampleText, UPRV_LENGTHOF(sampleText),
645 "CESU-8",
646 UCNV_FROM_U_CALLBACK_SKIP, offsets, NULL, 0)
647 ) {
648 log_err("u->CESU-8 with skip did not match.\n");
649 }
650 }
651
652 /*to Unicode*/
653 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SKIP \n");
654
655 #if !UCONFIG_NO_LEGACY_CONVERSION
656 {
657
658 static const UChar IBM_949skiptoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xD700 };
659 static const UChar IBM_943skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
660 static const UChar IBM_930skiptoUnicode[]= { 0x6D63, 0x6D64, 0x6D66 };
661
662 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5};
663 static const int32_t fromIBM943Offs [] = { 0, 2, 4};
664 static const int32_t fromIBM930Offs [] = { 1, 3, 5};
665
666 if(!testConvertToUnicode(expskipIBM_949, UPRV_LENGTHOF(expskipIBM_949),
667 IBM_949skiptoUnicode, UPRV_LENGTHOF(IBM_949skiptoUnicode),"ibm-949",
668 UCNV_TO_U_CALLBACK_SKIP, fromIBM949Offs, NULL, 0 ))
669 log_err("ibm-949->u with skip did not match.\n");
670 if(!testConvertToUnicode(expskipIBM_943, UPRV_LENGTHOF(expskipIBM_943),
671 IBM_943skiptoUnicode, UPRV_LENGTHOF(IBM_943skiptoUnicode),"ibm-943",
672 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offs, NULL, 0 ))
673 log_err("ibm-943->u with skip did not match.\n");
674
675
676 if(!testConvertToUnicode(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
677 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
678 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0 ))
679 log_err("ibm-930->u with skip did not match.\n");
680
681
682 if(!testConvertToUnicodeWithContext(expskipIBM_930, UPRV_LENGTHOF(expskipIBM_930),
683 IBM_930skiptoUnicode, UPRV_LENGTHOF(IBM_930skiptoUnicode),"ibm-930",
684 UCNV_TO_U_CALLBACK_SKIP, fromIBM930Offs, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
685 log_err("ibm-930->u with skip did not match.\n");
686 }
687 #endif
688
689 {
690 static const uint8_t usasciiToUBytes[] = { 0x61, 0x80, 0x31 };
691 static const UChar usasciiToU[] = { 0x61, 0x31 };
692 static const int32_t usasciiToUOffsets[] = { 0, 2 };
693
694 static const uint8_t latin1ToUBytes[] = { 0x61, 0xa0, 0x31 };
695 static const UChar latin1ToU[] = { 0x61, 0xa0, 0x31 };
696 static const int32_t latin1ToUOffsets[] = { 0, 1, 2 };
697
698 /* US-ASCII */
699 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
700 usasciiToU, UPRV_LENGTHOF(usasciiToU),
701 "US-ASCII",
702 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
703 NULL, 0)
704 ) {
705 log_err("US-ASCII->u with skip did not match.\n");
706 }
707
708 #if !UCONFIG_NO_LEGACY_CONVERSION
709 /* SBCS NLTC codepage 367 for US-ASCII */
710 if(!testConvertToUnicode(usasciiToUBytes, UPRV_LENGTHOF(usasciiToUBytes),
711 usasciiToU, UPRV_LENGTHOF(usasciiToU),
712 "ibm-367",
713 UCNV_TO_U_CALLBACK_SKIP, usasciiToUOffsets,
714 NULL, 0)
715 ) {
716 log_err("ibm-367->u with skip did not match.\n");
717 }
718 #endif
719
720 /* ISO-Latin-1 */
721 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
722 latin1ToU, UPRV_LENGTHOF(latin1ToU),
723 "LATIN_1",
724 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
725 NULL, 0)
726 ) {
727 log_err("LATIN_1->u with skip did not match.\n");
728 }
729
730 #if !UCONFIG_NO_LEGACY_CONVERSION
731 /* windows-1252 */
732 if(!testConvertToUnicode(latin1ToUBytes, UPRV_LENGTHOF(latin1ToUBytes),
733 latin1ToU, UPRV_LENGTHOF(latin1ToU),
734 "windows-1252",
735 UCNV_TO_U_CALLBACK_SKIP, latin1ToUOffsets,
736 NULL, 0)
737 ) {
738 log_err("windows-1252->u with skip did not match.\n");
739 }
740 #endif
741 }
742
743 #if !UCONFIG_NO_LEGACY_CONVERSION
744 {
745 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
746 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
747 };
748 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0x03b4
749 };
750 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 5};
751
752
753 /* euc-jp*/
754 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
755 0x8f, 0xda, 0xa1, /*unassigned*/
756 0x8e, 0xe0,
757 };
758 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0x00a2};
759 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3, 9};
760
761 /*EUC_TW*/
762 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
763 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
764 0xe6, 0xca, 0x8a,
765 };
766 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0x8706, 0x8a, };
767 static const int32_t from_euc_twOffs [] ={ 0, 1, 3, 11, 13};
768 /*iso-2022-jp*/
769 static const uint8_t sampleTxt_iso_2022_jp[]={
770 0x41,
771 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
772 0x1b, 0x28, 0x42, 0x42,
773
774 };
775 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x42 };
776 static const int32_t from_iso_2022_jpOffs [] ={ 0,9 };
777
778 /*iso-2022-cn*/
779 static const uint8_t sampleTxt_iso_2022_cn[]={
780 0x0f, 0x41, 0x44,
781 0x1B, 0x24, 0x29, 0x47,
782 0x0E, 0x40, 0x6f, /*unassigned*/
783 0x0f, 0x42,
784
785 };
786
787 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x42 };
788 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 11 };
789
790 /*iso-2022-kr*/
791 static const uint8_t sampleTxt_iso_2022_kr[]={
792 0x1b, 0x24, 0x29, 0x43,
793 0x41,
794 0x0E, 0x7f, 0x1E,
795 0x0e, 0x25, 0x50,
796 0x0f, 0x51,
797 0x42, 0x43,
798
799 };
800 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x03A0,0x51, 0x42,0x43};
801 static const int32_t from_iso_2022_krOffs [] ={ 4, 9, 12, 13 , 14 };
802
803 /*hz*/
804 static const uint8_t sampleTxt_hz[]={
805 0x41,
806 0x7e, 0x7b, 0x26, 0x30,
807 0x7f, 0x1E, /*unassigned*/
808 0x26, 0x30,
809 0x7e, 0x7d, 0x42,
810 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
811 0x7e, 0x7d, 0x42,
812 };
813 static const UChar hztoUnicode[]={
814 0x41,
815 0x03a0,
816 0x03A0,
817 0x42,
818 0x42,};
819
820 static const int32_t from_hzOffs [] ={0,3,7,11,18, };
821
822 /*ISCII*/
823 static const uint8_t sampleTxt_iscii[]={
824 0x41,
825 0xa1,
826 0xEB, /*unassigned*/
827 0x26,
828 0x30,
829 0xa2,
830 0xEC, /*unassigned*/
831 0x42,
832 };
833 static const UChar isciitoUnicode[]={
834 0x41,
835 0x0901,
836 0x26,
837 0x30,
838 0x0902,
839 0x42,
840 };
841
842 static const int32_t from_isciiOffs [] ={0,1,3,4,5,7 };
843
844 /*LMBCS*/
845 static const uint8_t sampleTxtLMBCS[]={ 0x12, 0xc9, 0x50,
846 0x12, 0x92, 0xa0, /*unassigned*/
847 0x12, 0x92, 0xA1,
848 };
849 static const UChar LMBCSToUnicode[]={ 0x4e2e, 0xe5c4};
850 static const int32_t fromLMBCS[] = {0, 6};
851
852 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
853 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
854 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
855 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
856
857 if(!testConvertToUnicodeWithContext(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
858 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
859 UCNV_TO_U_CALLBACK_SKIP, from_EBCIDIC_STATEFULOffsets, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
860 log_err("EBCIDIC_STATEFUL->u with skip did not match.\n");
861
862 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
863 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
864 UCNV_TO_U_CALLBACK_SKIP, from_euc_jpOffs , NULL, 0))
865 log_err("euc-jp->u with skip did not match.\n");
866
867
868
869 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
870 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
871 UCNV_TO_U_CALLBACK_SKIP, from_euc_twOffs , NULL, 0))
872 log_err("euc-tw->u with skip did not match.\n");
873
874
875 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
876 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
877 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_jpOffs , NULL, 0))
878 log_err("iso-2022-jp->u with skip did not match.\n");
879
880 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
881 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
882 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_cnOffs , NULL, 0))
883 log_err("iso-2022-cn->u with skip did not match.\n");
884
885 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
886 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
887 UCNV_TO_U_CALLBACK_SKIP, from_iso_2022_krOffs , NULL, 0))
888 log_err("iso-2022-kr->u with skip did not match.\n");
889
890 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
891 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
892 UCNV_TO_U_CALLBACK_SKIP, from_hzOffs , NULL, 0))
893 log_err("HZ->u with skip did not match.\n");
894
895 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
896 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
897 UCNV_TO_U_CALLBACK_SKIP, from_isciiOffs , NULL, 0))
898 log_err("iscii->u with skip did not match.\n");
899
900 if(!testConvertToUnicode(sampleTxtLMBCS, UPRV_LENGTHOF(sampleTxtLMBCS),
901 LMBCSToUnicode, UPRV_LENGTHOF(LMBCSToUnicode),"LMBCS-1",
902 UCNV_TO_U_CALLBACK_SKIP, fromLMBCS , NULL, 0))
903 log_err("LMBCS->u with skip did not match.\n");
904
905 }
906 #endif
907
908 log_verbose("Testing to Unicode for UTF-8 with UCNV_TO_U_CALLBACK_SKIP \n");
909 {
910 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
911 0xe0, 0x80, 0x61,};
912 UChar expected1[] = { 0x0031, 0x4e8c, 0x0061};
913 int32_t offsets1[] = { 0x0000, 0x0001, 0x0006};
914
915 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
916 expected1, UPRV_LENGTHOF(expected1),"utf8",
917 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
918 log_err("utf8->u with skip did not match.\n");
919 }
920
921 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SKIP \n");
922 {
923 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
924 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffe,0xfffe};
925 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
926
927 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
928 expected1, UPRV_LENGTHOF(expected1),"SCSU",
929 UCNV_TO_U_CALLBACK_SKIP, offsets1, NULL, 0 ))
930 log_err("scsu->u with skip did not match.\n");
931 }
932
933 log_verbose("Testing toUnicode for BOCU-1 with UCNV_TO_U_CALLBACK_SKIP\n");
934 {
935 const uint8_t sampleText[]={ /* modified from cintltst/bocu1tst.c/TestBOCU1 text 1 */
936 0xFB, 0xEE, 0x28, /* single-code point sequence at offset 0 */
937 0x24, 0x1E, 0x52, /* 3 */
938 0xB2, /* 6 */
939 0x20, /* 7 */
940 0x40, 0x07, /* 8 - wrong trail byte */
941 0xB3, /* 10 */
942 0xB1, /* 11 */
943 0xD0, 0x20, /* 12 - wrong trail byte */
944 0x0D, /* 14 */
945 0x0A, /* 15 */
946 0x20, /* 16 */
947 0x00, /* 17 */
948 0xD0, 0x6C, /* 18 */
949 0xB6, /* 20 */
950 0xD8, 0xA5, /* 21 */
951 0x20, /* 23 */
952 0x68, /* 24 */
953 0x59, /* 25 */
954 0xF9, 0x28, /* 26 */
955 0x6D, /* 28 */
956 0x20, /* 29 */
957 0x73, /* 30 */
958 0xE0, 0x2D, /* 31 */
959 0xDE, 0x43, /* 33 */
960 0xD0, 0x33, /* 35 */
961 0x20, /* 37 */
962 0xFA, 0x83, /* 38 */
963 0x25, 0x01, /* 40 */
964 0xFB, 0x16, 0x87, /* 42 */
965 0x4B, 0x16, /* 45 */
966 0x20, /* 47 */
967 0xE6, 0xBD, /* 48 */
968 0xEB, 0x5B, /* 50 */
969 0x4B, 0xCC, /* 52 */
970 0xF9, 0xA2, /* 54 */
971 0xFC, 0x10, 0x3E, /* 56 */
972 0xFE, 0x16, 0x3A, 0x8C, /* 59 */
973 0x20, /* 63 */
974 0xFC, 0x03, 0xAC, /* 64 */
975 0xFF, /* 67 - FF just resets the state without encoding anything */
976 0x01, /* 68 */
977 0xDE, 0x83, /* 69 */
978 0x20, /* 71 */
979 0x09 /* 72 */
980 };
981 UChar expected[]={
982 0xFEFF, 0x0061, 0x0062, 0x0020,
983 0x0063, 0x0061, 0x000D, 0x000A,
984 0x0020, 0x0000, 0x00DF, 0x00E6,
985 0x0930, 0x0020, 0x0918, 0x0909,
986 0x3086, 0x304D, 0x0020, 0x3053,
987 0x4000, 0x4E00, 0x7777, 0x0020,
988 0x9FA5, 0x4E00, 0xAC00, 0xBCDE,
989 0x0020, 0xD7A3, 0xDC00, 0xD800,
990 0xD800, 0xDC00, 0xD845, 0xDDDD,
991 0xDBBB, 0xDDEE, 0x0020, 0xDBFF,
992 0xDFFF, 0x0001, 0x0E40, 0x0020,
993 0x0009
994 };
995 int32_t offsets[]={
996 0, 3, 6, 7, /* skip 8, */
997 10, 11, /* skip 12, */
998 14, 15, 16, 17, 18,
999 20, 21, 23, 24, 25, 26, 28, 29,
1000 30, 31, 33, 35, 37, 38,
1001 40, 42, 45, 47, 48,
1002 50, 52, 54, /* trail */ 54, 56, /* trail */ 56, 59, /* trail */ 59,
1003 63, 64, /* trail */ 64, /* reset only 67, */
1004 68, 69,
1005 71, 72
1006 };
1007
1008 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1009 expected, UPRV_LENGTHOF(expected), "BOCU-1",
1010 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1011 ) {
1012 log_err("BOCU-1->u with skip did not match.\n");
1013 }
1014 }
1015
1016 log_verbose("Testing toUnicode for CESU-8 with UCNV_TO_U_CALLBACK_SKIP\n");
1017 {
1018 const uint8_t sampleText[]={
1019 0x61, /* 0 'a' */
1020 0xc0, 0x80, /* 1 non-shortest form */
1021 0xc4, 0xb5, /* 3 U+0135 */
1022 0xed, 0x80, 0xa0, /* 5 Hangul U+d020 */
1023 0xed, 0xa0, 0x81, 0xed, 0xb0, 0x81, /* 8 surrogate pair for U+10401 */
1024 0xee, 0x80, 0x80, /* 14 PUA U+e000 */
1025 0xed, 0xb0, 0x81, /* 17 unpaired trail surrogate U+dc01 */
1026 0xf0, 0x90, 0x80, 0x80, /* 20 illegal 4-byte form for U+10000 */
1027 0x62, /* 24 'b' */
1028 0xed, 0xa0, 0x81, /* 25 unpaired lead surrogate U+d801 */
1029 0xed, 0xa0, /* 28 incomplete sequence */
1030 0xd0, 0x80 /* 30 U+0400 */
1031 };
1032 UChar expected[]={
1033 0x0061,
1034 /* skip */
1035 0x0135,
1036 0xd020,
1037 0xd801, 0xdc01,
1038 0xe000,
1039 0xdc01,
1040 /* skip */
1041 0x0062,
1042 0xd801,
1043 0x0400
1044 };
1045 int32_t offsets[]={
1046 0,
1047 /* skip 1, */
1048 3,
1049 5,
1050 8, 11,
1051 14,
1052 17,
1053 /* skip 20, 20, */
1054 24,
1055 25,
1056 /* skip 28 */
1057 30
1058 };
1059
1060 /* without offsets */
1061 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1062 expected, UPRV_LENGTHOF(expected), "CESU-8",
1063 UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, 0)
1064 ) {
1065 log_err("CESU-8->u with skip did not match.\n");
1066 }
1067
1068 /* with offsets */
1069 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1070 expected, UPRV_LENGTHOF(expected), "CESU-8",
1071 UCNV_TO_U_CALLBACK_SKIP, offsets, NULL, 0)
1072 ) {
1073 log_err("CESU-8->u with skip did not match.\n");
1074 }
1075 }
1076 }
1077
TestStop(int32_t inputsize,int32_t outputsize)1078 static void TestStop(int32_t inputsize, int32_t outputsize)
1079 {
1080 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1081 static const UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1082
1083 static const uint8_t expstopIBM_949[]= {
1084 0x00, 0xb0, 0xa1, 0xb0, 0xa2};
1085
1086 static const uint8_t expstopIBM_943[] = {
1087 0x9f, 0xaf, 0x9f, 0xb1};
1088
1089 static const uint8_t expstopIBM_930[] = {
1090 0x0e, 0x5d, 0x5f, 0x5d, 0x63};
1091
1092 static const UChar IBM_949stoptoUnicode[]= {0x0000, 0xAC00, 0xAC01};
1093 static const UChar IBM_943stoptoUnicode[]= { 0x6D63, 0x6D64};
1094 static const UChar IBM_930stoptoUnicode[]= { 0x6D63, 0x6D64};
1095
1096
1097 static const int32_t toIBM949Offsstop [] = { 0, 1, 1, 2, 2};
1098 static const int32_t toIBM943Offsstop [] = { 0, 0, 1, 1};
1099 static const int32_t toIBM930Offsstop [] = { 0, 0, 0, 1, 1};
1100
1101 static const int32_t fromIBM949Offs [] = { 0, 1, 3};
1102 static const int32_t fromIBM943Offs [] = { 0, 2};
1103 static const int32_t fromIBM930Offs [] = { 1, 3};
1104
1105 gInBufferSize = inputsize;
1106 gOutBufferSize = outputsize;
1107
1108 /*From Unicode*/
1109
1110 #if !UCONFIG_NO_LEGACY_CONVERSION
1111 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1112 expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949), "ibm-949",
1113 UCNV_FROM_U_CALLBACK_STOP, toIBM949Offsstop, NULL, 0 ))
1114 log_err("u-> ibm-949 with stop did not match.\n");
1115 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1116 expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943), "ibm-943",
1117 UCNV_FROM_U_CALLBACK_STOP, toIBM943Offsstop , NULL, 0))
1118 log_err("u-> ibm-943 with stop did not match.\n");
1119 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1120 expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930), "ibm-930",
1121 UCNV_FROM_U_CALLBACK_STOP, toIBM930Offsstop, NULL, 0 ))
1122 log_err("u-> ibm-930 with stop did not match.\n");
1123
1124 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_STOP \n");
1125 {
1126 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1127 static const uint8_t toIBM943[]= { 0x61,};
1128 static const int32_t offset[]= {0,} ;
1129
1130 /*EUC_JP*/
1131 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1132 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,};
1133 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,};
1134
1135 /*EUC_TW*/
1136 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1137 static const uint8_t to_euc_tw[]={
1138 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,};
1139 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,};
1140
1141 /*ISO-2022-JP*/
1142 static const UChar iso_2022_jp_inputText[]={0x0041, 0x00E9, 0x0042, };
1143 static const uint8_t to_iso_2022_jp[]={
1144 0x41,
1145
1146 };
1147 static const int32_t from_iso_2022_jpOffs [] ={0,};
1148
1149 /*ISO-2022-cn*/
1150 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1151 static const uint8_t to_iso_2022_cn[]={
1152 0x41,
1153
1154 };
1155 static const int32_t from_iso_2022_cnOffs [] ={
1156 0,0,
1157 2,2,
1158 };
1159
1160 /*ISO-2022-kr*/
1161 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042, };
1162 static const uint8_t to_iso_2022_kr[]={
1163 0x1b, 0x24, 0x29, 0x43,
1164 0x41,
1165 0x0e, 0x25, 0x50,
1166 };
1167 static const int32_t from_iso_2022_krOffs [] ={
1168 -1,-1,-1,-1,
1169 0,
1170 1,1,1,
1171 };
1172
1173 /* HZ encoding */
1174 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1175
1176 static const uint8_t to_hz[]={
1177 0x7e, 0x7d, 0x41,
1178 0x7e, 0x7b, 0x26, 0x30,
1179
1180 };
1181 static const int32_t from_hzOffs [] ={
1182 0, 0,0,
1183 1,1,1,1,
1184 };
1185
1186 /*ISCII*/
1187 static const UChar iscii_inputText[]={ 0x0041, 0x3712, 0x0042, };
1188 static const uint8_t to_iscii[]={
1189 0x41,
1190 };
1191 static const int32_t from_isciiOffs [] ={
1192 0,
1193 };
1194
1195 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1196 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1197 UCNV_FROM_U_CALLBACK_STOP, offset, NULL, 0 ))
1198 log_err("u-> ibm-943 with stop did not match.\n");
1199
1200 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1201 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1202 UCNV_FROM_U_CALLBACK_STOP, fromEUC_JPOffs, NULL, 0 ))
1203 log_err("u-> euc-jp with stop did not match.\n");
1204
1205 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1206 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1207 UCNV_FROM_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1208 log_err("u-> euc-tw with stop did not match.\n");
1209
1210 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1211 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1212 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1213 log_err("u-> iso-2022-jp with stop did not match.\n");
1214
1215 if(!testConvertFromUnicode(iso_2022_jp_inputText, UPRV_LENGTHOF(iso_2022_jp_inputText),
1216 to_iso_2022_jp, UPRV_LENGTHOF(to_iso_2022_jp), "iso-2022-jp",
1217 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_jpOffs, NULL, 0 ))
1218 log_err("u-> iso-2022-jp with stop did not match.\n");
1219
1220 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
1221 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
1222 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_cnOffs, NULL, 0 ))
1223 log_err("u-> iso-2022-cn with stop did not match.\n");
1224
1225 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
1226 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
1227 UCNV_FROM_U_CALLBACK_STOP, from_iso_2022_krOffs, NULL, 0 ))
1228 log_err("u-> iso-2022-kr with stop did not match.\n");
1229
1230 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
1231 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
1232 UCNV_FROM_U_CALLBACK_STOP, from_hzOffs, NULL, 0 ))
1233 log_err("u-> HZ with stop did not match.\n");\
1234
1235 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
1236 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
1237 UCNV_FROM_U_CALLBACK_STOP, from_isciiOffs, NULL, 0 ))
1238 log_err("u-> iscii with stop did not match.\n");
1239
1240
1241 }
1242 #endif
1243
1244 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_STOP \n");
1245 {
1246 static const UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1247
1248 static const uint8_t to_SCSU[]={
1249 0x41,
1250
1251 };
1252 int32_t from_SCSUOffs [] ={
1253 0,
1254
1255 };
1256 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1257 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1258 UCNV_FROM_U_CALLBACK_STOP, from_SCSUOffs, NULL, 0 ))
1259 log_err("u-> SCSU with skip did not match.\n");
1260
1261 }
1262
1263 /*to Unicode*/
1264
1265 #if !UCONFIG_NO_LEGACY_CONVERSION
1266 if(!testConvertToUnicode(expstopIBM_949, UPRV_LENGTHOF(expstopIBM_949),
1267 IBM_949stoptoUnicode, UPRV_LENGTHOF(IBM_949stoptoUnicode),"ibm-949",
1268 UCNV_TO_U_CALLBACK_STOP, fromIBM949Offs, NULL, 0 ))
1269 log_err("ibm-949->u with stop did not match.\n");
1270 if(!testConvertToUnicode(expstopIBM_943, UPRV_LENGTHOF(expstopIBM_943),
1271 IBM_943stoptoUnicode, UPRV_LENGTHOF(IBM_943stoptoUnicode),"ibm-943",
1272 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offs, NULL, 0 ))
1273 log_err("ibm-943->u with stop did not match.\n");
1274 if(!testConvertToUnicode(expstopIBM_930, UPRV_LENGTHOF(expstopIBM_930),
1275 IBM_930stoptoUnicode, UPRV_LENGTHOF(IBM_930stoptoUnicode),"ibm-930",
1276 UCNV_TO_U_CALLBACK_STOP, fromIBM930Offs, NULL, 0 ))
1277 log_err("ibm-930->u with stop did not match.\n");
1278
1279 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_STOP \n");
1280 {
1281
1282 static const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1283 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1284 };
1285 static const UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63 };
1286 static const int32_t from_EBCIDIC_STATEFULOffsets []={ 1};
1287
1288
1289 /*EUC-JP*/
1290 static const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1291 0x8f, 0xda, 0xa1, /*unassigned*/
1292 0x8e, 0xe0,
1293 };
1294 static const UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec};
1295 static const int32_t from_euc_jpOffs [] ={ 0, 1, 3};
1296
1297 /*EUC_TW*/
1298 static const uint8_t sampleTxt_euc_tw[]={ 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1299 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1300 0xe6, 0xca, 0x8a,
1301 };
1302 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2};
1303 int32_t from_euc_twOffs [] ={ 0, 1, 3};
1304
1305
1306
1307 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1308 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1309 UCNV_TO_U_CALLBACK_STOP, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1310 log_err("EBCIDIC_STATEFUL->u with stop did not match.\n");
1311
1312 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1313 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1314 UCNV_TO_U_CALLBACK_STOP, from_euc_jpOffs , NULL, 0))
1315 log_err("euc-jp->u with stop did not match.\n");
1316
1317 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1318 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1319 UCNV_TO_U_CALLBACK_STOP, from_euc_twOffs, NULL, 0 ))
1320 log_err("euc-tw->u with stop did not match.\n");
1321 }
1322 #endif
1323
1324 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_STOP \n");
1325 {
1326 static const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1327 0xe0, 0x80, 0x61,};
1328 static const UChar expected1[] = { 0x0031, 0x4e8c,};
1329 static const int32_t offsets1[] = { 0x0000, 0x0001};
1330
1331 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1332 expected1, UPRV_LENGTHOF(expected1),"utf8",
1333 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1334 log_err("utf8->u with stop did not match.\n");
1335 }
1336 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_STOP \n");
1337 {
1338 static const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,0x04};
1339 static const UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061};
1340 static const int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003};
1341
1342 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1343 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1344 UCNV_TO_U_CALLBACK_STOP, offsets1, NULL, 0 ))
1345 log_err("scsu->u with stop did not match.\n");
1346 }
1347
1348 }
1349
TestSub(int32_t inputsize,int32_t outputsize)1350 static void TestSub(int32_t inputsize, int32_t outputsize)
1351 {
1352 static const UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1353 static const UChar sampleText2[]= { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1354
1355 static const uint8_t expsubIBM_949[] =
1356 { 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xaf, 0xfe, 0xc8, 0xd3 };
1357
1358 static const uint8_t expsubIBM_943[] = {
1359 0x9f, 0xaf, 0x9f, 0xb1, 0xfc, 0xfc, 0x89, 0x59 };
1360
1361 static const uint8_t expsubIBM_930[] = {
1362 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f };
1363
1364 static const UChar IBM_949subtoUnicode[]= {0x0000, 0xAC00, 0xAC01, 0xfffd, 0xD700 };
1365 static const UChar IBM_943subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1366 static const UChar IBM_930subtoUnicode[]= {0x6D63, 0x6D64, 0xfffd, 0x6D66 };
1367
1368 static const int32_t toIBM949Offssub [] ={ 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1369 static const int32_t toIBM943Offssub [] ={ 0, 0, 1, 1, 2, 2, 3, 3 };
1370 static const int32_t toIBM930Offssub [] ={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 3 };
1371
1372 static const int32_t fromIBM949Offs [] = { 0, 1, 3, 5, 7 };
1373 static const int32_t fromIBM943Offs [] = { 0, 2, 4, 6 };
1374 static const int32_t fromIBM930Offs [] = { 1, 3, 5, 7 };
1375
1376 gInBufferSize = inputsize;
1377 gOutBufferSize = outputsize;
1378
1379 /*from unicode*/
1380
1381 #if !UCONFIG_NO_LEGACY_CONVERSION
1382 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1383 expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949), "ibm-949",
1384 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM949Offssub, NULL, 0 ))
1385 log_err("u-> ibm-949 with subst did not match.\n");
1386 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1387 expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943), "ibm-943",
1388 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM943Offssub , NULL, 0))
1389 log_err("u-> ibm-943 with subst did not match.\n");
1390 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1391 expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930), "ibm-930",
1392 UCNV_FROM_U_CALLBACK_SUBSTITUTE, toIBM930Offssub, NULL, 0 ))
1393 log_err("u-> ibm-930 with subst did not match.\n");
1394
1395 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1396 {
1397 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1398 static const uint8_t toIBM943[]= { 0x61, 0xfc, 0xfc, 0xfc, 0xfc, 0x61 };
1399 static const int32_t offset[]= {0, 1, 1, 3, 3, 4};
1400
1401
1402 /* EUC_JP*/
1403 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2 };
1404 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1405 0xf4, 0xfe, 0xf4, 0xfe,
1406 0x61, 0x8e, 0xe0,
1407 };
1408 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7};
1409
1410 /*EUC_TW*/
1411 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1412 static const uint8_t to_euc_tw[]={
1413 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1414 0xfd, 0xfe, 0xfd, 0xfe,
1415 0x61, 0xe6, 0xca, 0x8a,
1416 };
1417
1418 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2, 3, 3, 5, 5, 6, 7, 7, 8,};
1419
1420 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
1421 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
1422 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset, NULL, 0 ))
1423 log_err("u-> ibm-943 with substitute did not match.\n");
1424
1425 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
1426 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
1427 UCNV_FROM_U_CALLBACK_SUBSTITUTE, fromEUC_JPOffs, NULL, 0 ))
1428 log_err("u-> euc-jp with substitute did not match.\n");
1429
1430 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
1431 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
1432 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1433 log_err("u-> euc-tw with substitute did not match.\n");
1434 }
1435 #endif
1436
1437 log_verbose("Testing fromUnicode for SCSU with UCNV_FROM_U_CALLBACK_SUBSTITUTE \n");
1438 {
1439 UChar SCSU_inputText[]={ 0x0041, 0xd801/*illegal*/, 0x0042, };
1440
1441 const uint8_t to_SCSU[]={
1442 0x41,
1443 0x0e, 0xff,0xfd,
1444 0x42
1445
1446
1447 };
1448 int32_t from_SCSUOffs [] ={
1449 0,
1450 1,1,1,
1451 2,
1452
1453 };
1454 const uint8_t to_SCSU_1[]={
1455 0x41,
1456
1457 };
1458 int32_t from_SCSUOffs_1 [] ={
1459 0,
1460
1461 };
1462 if(!testConvertFromUnicode(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1463 to_SCSU, UPRV_LENGTHOF(to_SCSU), "SCSU",
1464 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs, NULL, 0 ))
1465 log_err("u-> SCSU with substitute did not match.\n");
1466
1467 if(!testConvertFromUnicodeWithContext(SCSU_inputText, UPRV_LENGTHOF(SCSU_inputText),
1468 to_SCSU_1, UPRV_LENGTHOF(to_SCSU_1), "SCSU",
1469 UCNV_FROM_U_CALLBACK_SUBSTITUTE, from_SCSUOffs_1, NULL, 0,"i",U_ILLEGAL_CHAR_FOUND ))
1470 log_err("u-> SCSU with substitute did not match.\n");
1471 }
1472
1473 log_verbose("Testing fromUnicode for UTF-8 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1474 {
1475 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01, 0xd801, 0xffff, 0x0061,};
1476 static const uint8_t expectedUTF8[]= { 0xe2, 0x82, 0xac,
1477 0xf0, 0x90, 0x90, 0x81,
1478 0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd,
1479 0xef, 0xbf, 0xbf, 0x61,
1480
1481 };
1482 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6 };
1483 if(!testConvertFromUnicode(testinput, UPRV_LENGTHOF(testinput),
1484 expectedUTF8, UPRV_LENGTHOF(expectedUTF8), "utf8",
1485 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0 )) {
1486 log_err("u-> utf8 with substitute did not match.\n");
1487 }
1488 }
1489
1490 log_verbose("Testing fromUnicode for UTF-16 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1491 {
1492 static const UChar in[]={ 0x0041, 0xfeff };
1493
1494 static const uint8_t out[]={
1495 #if U_IS_BIG_ENDIAN
1496 0xfe, 0xff,
1497 0x00, 0x41,
1498 0xfe, 0xff
1499 #else
1500 0xff, 0xfe,
1501 0x41, 0x00,
1502 0xff, 0xfe
1503 #endif
1504 };
1505 static const int32_t offsets[]={
1506 -1, -1, 0, 0, 1, 1
1507 };
1508
1509 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1510 out, UPRV_LENGTHOF(out), "UTF-16",
1511 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1512 ) {
1513 log_err("u->UTF-16 with substitute did not match.\n");
1514 }
1515 }
1516
1517 log_verbose("Testing fromUnicode for UTF-32 with UCNV_FROM_U_CALLBACK_SUBSTITUTE\n");
1518 {
1519 static const UChar in[]={ 0x0041, 0xfeff };
1520
1521 static const uint8_t out[]={
1522 #if U_IS_BIG_ENDIAN
1523 0x00, 0x00, 0xfe, 0xff,
1524 0x00, 0x00, 0x00, 0x41,
1525 0x00, 0x00, 0xfe, 0xff
1526 #else
1527 0xff, 0xfe, 0x00, 0x00,
1528 0x41, 0x00, 0x00, 0x00,
1529 0xff, 0xfe, 0x00, 0x00
1530 #endif
1531 };
1532 static const int32_t offsets[]={
1533 -1, -1, -1, -1, 0, 0, 0, 0, 1, 1, 1, 1
1534 };
1535
1536 if(!testConvertFromUnicode(in, UPRV_LENGTHOF(in),
1537 out, UPRV_LENGTHOF(out), "UTF-32",
1538 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1539 ) {
1540 log_err("u->UTF-32 with substitute did not match.\n");
1541 }
1542 }
1543
1544 /*to unicode*/
1545
1546 #if !UCONFIG_NO_LEGACY_CONVERSION
1547 if(!testConvertToUnicode(expsubIBM_949, UPRV_LENGTHOF(expsubIBM_949),
1548 IBM_949subtoUnicode, UPRV_LENGTHOF(IBM_949subtoUnicode),"ibm-949",
1549 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM949Offs, NULL, 0 ))
1550 log_err("ibm-949->u with substitute did not match.\n");
1551 if(!testConvertToUnicode(expsubIBM_943, UPRV_LENGTHOF(expsubIBM_943),
1552 IBM_943subtoUnicode, UPRV_LENGTHOF(IBM_943subtoUnicode),"ibm-943",
1553 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offs, NULL, 0 ))
1554 log_err("ibm-943->u with substitute did not match.\n");
1555 if(!testConvertToUnicode(expsubIBM_930, UPRV_LENGTHOF(expsubIBM_930),
1556 IBM_930subtoUnicode, UPRV_LENGTHOF(IBM_930subtoUnicode),"ibm-930",
1557 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM930Offs, NULL, 0 ))
1558 log_err("ibm-930->u with substitute did not match.\n");
1559
1560 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1561 {
1562
1563 const uint8_t sampleTxtEBCIDIC_STATEFUL [] ={
1564 0x0e, 0x5d, 0x5f , 0x41, 0x79, 0x41, 0x44
1565 };
1566 UChar EBCIDIC_STATEFUL_toUnicode[] ={ 0x6d63, 0xfffd, 0x03b4
1567 };
1568 int32_t from_EBCIDIC_STATEFULOffsets []={ 1, 3, 5};
1569
1570
1571 /* EUC_JP*/
1572 const uint8_t sampleTxt_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1573 0x8f, 0xda, 0xa1, /*unassigned*/
1574 0x8e, 0xe0, 0x8a
1575 };
1576 UChar euc_jptoUnicode[]={ 0x0061, 0x4edd, 0x5bec, 0xfffd, 0x00a2, 0x008a };
1577 int32_t from_euc_jpOffs [] ={ 0, 1, 3, 6, 9, 11 };
1578
1579 /*EUC_TW*/
1580 const uint8_t sampleTxt_euc_tw[]={
1581 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1582 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
1583 0xe6, 0xca, 0x8a,
1584 };
1585 UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2, 0xfffd, 0x8706, 0x8a, };
1586 int32_t from_euc_twOffs [] ={ 0, 1, 3, 7, 11, 13};
1587
1588
1589 if(!testConvertToUnicode(sampleTxtEBCIDIC_STATEFUL, UPRV_LENGTHOF(sampleTxtEBCIDIC_STATEFUL),
1590 EBCIDIC_STATEFUL_toUnicode, UPRV_LENGTHOF(EBCIDIC_STATEFUL_toUnicode),"ibm-930",
1591 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_EBCIDIC_STATEFULOffsets, NULL, 0 ))
1592 log_err("EBCIDIC_STATEFUL->u with substitute did not match.\n");
1593
1594
1595 if(!testConvertToUnicode(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1596 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1597 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ))
1598 log_err("euc-jp->u with substitute did not match.\n");
1599
1600
1601 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
1602 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
1603 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_twOffs, NULL, 0 ))
1604 log_err("euc-tw->u with substitute did not match.\n");
1605
1606
1607 if(!testConvertToUnicodeWithContext(sampleTxt_euc_jp, UPRV_LENGTHOF(sampleTxt_euc_jp),
1608 euc_jptoUnicode, UPRV_LENGTHOF(euc_jptoUnicode),"IBM-eucJP",
1609 UCNV_TO_U_CALLBACK_SUBSTITUTE, from_euc_jpOffs, NULL, 0 ,"i", U_ILLEGAL_CHAR_FOUND))
1610 log_err("euc-jp->u with substitute did not match.\n");
1611 }
1612 #endif
1613
1614 log_verbose("Testing toUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1615 {
1616 const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
1617 0xe0, 0x80, 0x61,};
1618 UChar expected1[] = { 0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
1619 int32_t offsets1[] = { 0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
1620
1621 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1622 expected1, UPRV_LENGTHOF(expected1),"utf8",
1623 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1624 log_err("utf8->u with substitute did not match.\n");
1625 }
1626 log_verbose("Testing toUnicode for SCSU with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
1627 {
1628 const uint8_t sampleText1[] = { 0xba, 0x8c,0xF8, 0x61,0x0c, 0x0c,};
1629 UChar expected1[] = { 0x00ba, 0x008c, 0x00f8, 0x0061,0xfffd,0xfffd};
1630 int32_t offsets1[] = { 0x0000, 0x0001,0x0002,0x0003,4,5};
1631
1632 if(!testConvertToUnicode(sampleText1, UPRV_LENGTHOF(sampleText1),
1633 expected1, UPRV_LENGTHOF(expected1),"SCSU",
1634 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0 ))
1635 log_err("scsu->u with stop did not match.\n");
1636 }
1637
1638 #if !UCONFIG_NO_LEGACY_CONVERSION
1639 log_verbose("Testing ibm-930 subchar/subchar1\n");
1640 {
1641 static const UChar u1[]={ 0x6d63, 0x6d64, 0x6d65, 0x6d66, 0xdf };
1642 static const uint8_t s1[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfe, 0xfe, 0x46, 0x6b, 0x0f, 0x3f };
1643 static const int32_t offsets1[]={ 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 };
1644
1645 static const UChar u2[]={ 0x6d63, 0x6d64, 0xfffd, 0x6d66, 0x1a };
1646 static const uint8_t s2[]={ 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0xfc, 0xfc, 0x46, 0x6b, 0x0f, 0x57 };
1647 static const int32_t offsets2[]={ 1, 3, 5, 7, 10 };
1648
1649 if(!testConvertFromUnicode(u1, UPRV_LENGTHOF(u1), s1, UPRV_LENGTHOF(s1), "ibm-930",
1650 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1651 ) {
1652 log_err("u->ibm-930 subchar/subchar1 did not match.\n");
1653 }
1654
1655 if(!testConvertToUnicode(s2, UPRV_LENGTHOF(s2), u2, UPRV_LENGTHOF(u2), "ibm-930",
1656 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1657 ) {
1658 log_err("ibm-930->u subchar/subchar1 did not match.\n");
1659 }
1660 }
1661
1662 log_verbose("Testing GB 18030 with substitute callbacks\n");
1663 {
1664 static const UChar u2[]={
1665 0x24, 0x7f, 0x80, 0x1f9, 0x20ac, 0x4e00, 0x9fa6, 0xffff, 0xd800, 0xdc00, 0xfffd, 0xdbff, 0xdfff };
1666 static const uint8_t gb2[]={
1667 0x24, 0x7f, 0x81, 0x30, 0x81, 0x30, 0xa8, 0xbf, 0xa2, 0xe3, 0xd2, 0xbb, 0x82, 0x35, 0x8f, 0x33, 0x84, 0x31, 0xa4, 0x39, 0x90, 0x30, 0x81, 0x30, 0xe3, 0x32, 0x9a, 0x36, 0xe3, 0x32, 0x9a, 0x35 };
1668 static const int32_t offsets2[]={
1669 0, 1, 2, 6, 8, 10, 12, 16, 20, 20, 24, 28, 28 };
1670
1671 if(!testConvertToUnicode(gb2, UPRV_LENGTHOF(gb2), u2, UPRV_LENGTHOF(u2), "gb18030",
1672 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1673 ) {
1674 log_err("gb18030->u with substitute did not match.\n");
1675 }
1676 }
1677 #endif
1678
1679 log_verbose("Testing UTF-7 toUnicode with substitute callbacks\n");
1680 {
1681 static const uint8_t utf7[]={
1682 /* a~ a+AB~ a+AB\x0c a+AB- a+AB. a+. */
1683 0x61, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x7e, 0x61, 0x2b, 0x41, 0x42, 0x0c, 0x61, 0x2b, 0x41, 0x42, 0x2d, 0x61, 0x2b, 0x41, 0x42, 0x2e, 0x61, 0x2b, 0x2e
1684 };
1685 static const UChar unicode[]={
1686 0x61, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0xfffd, 0x61, 0xfffd, 0x61, 0xfffd, 0x2e, 0x61, 0xfffd, 0x2e
1687 };
1688 static const int32_t offsets[]={
1689 0, 1, 2, 4, 6, 7, 9, 11, 12, 14, 17, 19, 21, 22, 23, 24
1690 };
1691
1692 if(!testConvertToUnicode(utf7, UPRV_LENGTHOF(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7",
1693 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets, NULL, 0)
1694 ) {
1695 log_err("UTF-7->u with substitute did not match.\n");
1696 }
1697 }
1698
1699 log_verbose("Testing UTF-16 toUnicode with substitute callbacks\n");
1700 {
1701 static const uint8_t
1702 in1[]={ 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff },
1703 in2[]={ 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff },
1704 in3[]={ 0xfe, 0xfd, 0x4e, 0x00, 0xfe, 0xff };
1705
1706 static const UChar
1707 out1[]={ 0x4e00, 0xfeff },
1708 out2[]={ 0x004e, 0xfffe },
1709 out3[]={ 0xfefd, 0x4e00, 0xfeff };
1710
1711 static const int32_t
1712 offsets1[]={ 2, 4 },
1713 offsets2[]={ 2, 4 },
1714 offsets3[]={ 0, 2, 4 };
1715
1716 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-16",
1717 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1718 ) {
1719 log_err("UTF-16 (BE BOM)->u with substitute did not match.\n");
1720 }
1721
1722 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-16",
1723 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1724 ) {
1725 log_err("UTF-16 (LE BOM)->u with substitute did not match.\n");
1726 }
1727
1728 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-16",
1729 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1730 ) {
1731 log_err("UTF-16 (no BOM)->u with substitute did not match.\n");
1732 }
1733 }
1734
1735 log_verbose("Testing UTF-32 toUnicode with substitute callbacks\n");
1736 {
1737 static const uint8_t
1738 in1[]={ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff },
1739 in2[]={ 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00 },
1740 in3[]={ 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01 },
1741 in4[]={ 0x00, 0x01, 0x02, 0x03, 0x00, 0x11, 0x12, 0x00, 0x00, 0x00, 0x4e, 0x00 };
1742
1743 static const UChar
1744 out1[]={ U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfeff },
1745 out2[]={ U16_LEAD(0x0f1000), U16_TRAIL(0x0f1000), 0xfffe },
1746 out3[]={ 0xfefe, U16_LEAD(0x100f00), U16_TRAIL(0x100f00), 0xfffd, 0xfffd },
1747 out4[]={ U16_LEAD(0x10203), U16_TRAIL(0x10203), 0xfffd, 0x4e00 };
1748
1749 static const int32_t
1750 offsets1[]={ 4, 4, 8 },
1751 offsets2[]={ 4, 4, 8 },
1752 offsets3[]={ 0, 4, 4, 8, 12 },
1753 offsets4[]={ 0, 0, 4, 8 };
1754
1755 if(!testConvertToUnicode(in1, UPRV_LENGTHOF(in1), out1, UPRV_LENGTHOF(out1), "UTF-32",
1756 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1, NULL, 0)
1757 ) {
1758 log_err("UTF-32 (BE BOM)->u with substitute did not match.\n");
1759 }
1760
1761 if(!testConvertToUnicode(in2, UPRV_LENGTHOF(in2), out2, UPRV_LENGTHOF(out2), "UTF-32",
1762 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets2, NULL, 0)
1763 ) {
1764 log_err("UTF-32 (LE BOM)->u with substitute did not match.\n");
1765 }
1766
1767 if(!testConvertToUnicode(in3, UPRV_LENGTHOF(in3), out3, UPRV_LENGTHOF(out3), "UTF-32",
1768 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets3, NULL, 0)
1769 ) {
1770 log_err("UTF-32 (no BOM)->u with substitute did not match.\n");
1771 }
1772
1773 if(!testConvertToUnicode(in4, UPRV_LENGTHOF(in4), out4, UPRV_LENGTHOF(out4), "UTF-32",
1774 UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets4, NULL, 0)
1775 ) {
1776 log_err("UTF-32 (no BOM, with error)->u with substitute did not match.\n");
1777 }
1778 }
1779 }
1780
TestSubWithValue(int32_t inputsize,int32_t outputsize)1781 static void TestSubWithValue(int32_t inputsize, int32_t outputsize)
1782 {
1783 UChar sampleText[] = { 0x0000, 0xAC00, 0xAC01, 0xEF67, 0xD700 };
1784 UChar sampleText2[] = { 0x6D63, 0x6D64, 0x6D65, 0x6D66 };
1785
1786 const uint8_t expsubwvalIBM_949[]= {
1787 0x00, 0xb0, 0xa1, 0xb0, 0xa2,
1788 0x25, 0x55, 0x45, 0x46, 0x36, 0x37, 0xc8, 0xd3 };
1789
1790 const uint8_t expsubwvalIBM_943[]= {
1791 0x9f, 0xaf, 0x9f, 0xb1,
1792 0x25, 0x55, 0x36, 0x44, 0x36, 0x35, 0x89, 0x59 };
1793
1794 const uint8_t expsubwvalIBM_930[] = {
1795 0x0e, 0x5d, 0x5f, 0x5d, 0x63, 0x0f, 0x6c, 0xe4, 0xf6, 0xc4, 0xf6, 0xf5, 0x0e, 0x46, 0x6b, 0x0f };
1796
1797 int32_t toIBM949Offs [] ={ 0, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4 };
1798 int32_t toIBM943Offs [] = { 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3 };
1799 int32_t toIBM930Offs [] = { 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3 }; /* last item: 3,3,3,3 because there's SO+DBCS+SI */
1800
1801 gInBufferSize = inputsize;
1802 gOutBufferSize = outputsize;
1803
1804 /*from Unicode*/
1805
1806 #if !UCONFIG_NO_LEGACY_CONVERSION
1807 if(!testConvertFromUnicode(sampleText, UPRV_LENGTHOF(sampleText),
1808 expsubwvalIBM_949, UPRV_LENGTHOF(expsubwvalIBM_949), "ibm-949",
1809 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM949Offs, NULL, 0 ))
1810 log_err("u-> ibm-949 with subst with value did not match.\n");
1811
1812 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1813 expsubwvalIBM_943, UPRV_LENGTHOF(expsubwvalIBM_943), "ibm-943",
1814 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM943Offs, NULL, 0 ))
1815 log_err("u-> ibm-943 with sub with value did not match.\n");
1816
1817 if(!testConvertFromUnicode(sampleText2, UPRV_LENGTHOF(sampleText2),
1818 expsubwvalIBM_930, UPRV_LENGTHOF(expsubwvalIBM_930), "ibm-930",
1819 UCNV_FROM_U_CALLBACK_ESCAPE, toIBM930Offs, NULL, 0 ))
1820 log_err("u-> ibm-930 with subst with value did not match.\n");
1821
1822
1823 log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE \n");
1824 {
1825 static const UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
1826 static const uint8_t toIBM943[]= { 0x61,
1827 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1828 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1829 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1830 0x61 };
1831 static const int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
1832
1833
1834 /* EUC_JP*/
1835 static const UChar euc_jp_inputText[]={ 0x0061, 0x4edd, 0x5bec, 0xd801, 0xdc01, 0xd801, 0x0061, 0x00a2, };
1836 static const uint8_t to_euc_jp[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
1837 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1838 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1839 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1840 0x61, 0x8e, 0xe0,
1841 };
1842 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 1, 2, 2, 2,
1843 3, 3, 3, 3, 3, 3,
1844 3, 3, 3, 3, 3, 3,
1845 5, 5, 5, 5, 5, 5,
1846 6, 7, 7,
1847 };
1848
1849 /*EUC_TW*/
1850 static const UChar euc_tw_inputText[]={ 0x0061, 0x2295, 0x5BF2, 0xd801, 0xdc01, 0xd801, 0x0061, 0x8706, 0x8a, };
1851 static const uint8_t to_euc_tw[]={
1852 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
1853 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1854 0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
1855 0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
1856 0x61, 0xe6, 0xca, 0x8a,
1857 };
1858 static const int32_t from_euc_twOffs [] ={ 0, 1, 1, 2, 2, 2, 2,
1859 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5,
1860 6, 7, 7, 8,
1861 };
1862 /*ISO-2022-JP*/
1863 static const UChar iso_2022_jp_inputText1[]={ 0x3000, 0x00E9, 0x3001,0x00E9, 0x0042} ;
1864 static const uint8_t to_iso_2022_jp1[]={
1865 0x1b, 0x24, 0x42, 0x21, 0x21,
1866 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1867 0x1b, 0x24, 0x42, 0x21, 0x22,
1868 0x1b, 0x28, 0x42, 0x25, 0x55, 0x30, 0x30, 0x45, 0x39,
1869 0x42,
1870 };
1871
1872 static const int32_t from_iso_2022_jpOffs1 [] ={
1873 0,0,0,0,0,
1874 1,1,1,1,1,1,1,1,1,
1875 2,2,2,2,2,
1876 3,3,3,3,3,3,3,3,3,
1877 4,
1878 };
1879 /* surrogate pair*/
1880 static const UChar iso_2022_jp_inputText2[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042} ;
1881 static const uint8_t to_iso_2022_jp2[]={
1882 0x1b, 0x24, 0x42, 0x21, 0x21,
1883 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1884 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1885 0x1b, 0x24, 0x42, 0x21, 0x22,
1886 0x1b, 0x28, 0x42, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1887 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1888 0x42,
1889 };
1890 static const int32_t from_iso_2022_jpOffs2 [] ={
1891 0,0,0,0,0,
1892 1,1,1,1,1,1,1,1,1,
1893 1,1,1,1,1,1,
1894 3,3,3,3,3,
1895 4,4,4,4,4,4,4,4,4,
1896 4,4,4,4,4,4,
1897 6,
1898 };
1899
1900 /*ISO-2022-cn*/
1901 static const UChar iso_2022_cn_inputText[]={ 0x0041, 0x3712, 0x0042, };
1902 static const uint8_t to_iso_2022_cn[]={
1903 0x41,
1904 0x25, 0x55, 0x33, 0x37, 0x31, 0x32,
1905 0x42,
1906 };
1907 static const int32_t from_iso_2022_cnOffs [] ={
1908 0,
1909 1,1,1,1,1,1,
1910 2,
1911 };
1912
1913 static const UChar iso_2022_cn_inputText4[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042};
1914
1915 static const uint8_t to_iso_2022_cn4[]={
1916 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
1917 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1918 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1919 0x0e, 0x21, 0x22,
1920 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1921 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1922 0x42,
1923 };
1924 static const int32_t from_iso_2022_cnOffs4 [] ={
1925 0,0,0,0,0,0,0,
1926 1,1,1,1,1,1,1,
1927 1,1,1,1,1,1,
1928 3,3,3,
1929 4,4,4,4,4,4,4,
1930 4,4,4,4,4,4,
1931 6
1932
1933 };
1934
1935 /*ISO-2022-kr*/
1936 static const UChar iso_2022_kr_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
1937 static const uint8_t to_iso_2022_kr2[]={
1938 0x1b, 0x24, 0x29, 0x43,
1939 0x41,
1940 0x0e, 0x25, 0x50,
1941 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1942 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1943 0x0e, 0x25, 0x50,
1944 0x0f, 0x42,
1945 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
1946 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
1947 0x43
1948 };
1949 static const int32_t from_iso_2022_krOffs2 [] ={
1950 -1,-1,-1,-1,
1951 0,
1952 1,1,1,
1953 2,2,2,2,2,2,2,
1954 2,2,2,2,2,2,
1955 4,4,4,
1956 5,5,
1957 6,6,6,6,6,6,
1958 6,6,6,6,6,6,
1959 8,
1960 };
1961
1962 static const UChar iso_2022_kr_inputText[]={ 0x0041, 0x03A0,0x3712/*unassigned*/,0x03A0, 0x0042,0x3712/*unassigned*/,0x43 };
1963 static const uint8_t to_iso_2022_kr[]={
1964 0x1b, 0x24, 0x29, 0x43,
1965 0x41,
1966 0x0e, 0x25, 0x50,
1967 0x0f, 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1968 0x0e, 0x25, 0x50,
1969 0x0f, 0x42,
1970 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
1971 0x43
1972 };
1973
1974
1975 static const int32_t from_iso_2022_krOffs [] ={
1976 -1,-1,-1,-1,
1977 0,
1978 1,1,1,
1979 2,2,2,2,2,2,2,
1980 3,3,3,
1981 4,4,
1982 5,5,5,5,5,5,
1983 6,
1984 };
1985 /* HZ encoding */
1986 static const UChar hz_inputText[]={ 0x0041, 0x03A0,0x0662/*unassigned*/,0x03A0, 0x0042, };
1987
1988 static const uint8_t to_hz[]={
1989 0x7e, 0x7d, 0x41,
1990 0x7e, 0x7b, 0x26, 0x30,
1991 0x7e, 0x7d, 0x25, 0x55, 0x30, 0x36, 0x36, 0x32, /*unassigned*/
1992 0x7e, 0x7b, 0x26, 0x30,
1993 0x7e, 0x7d, 0x42,
1994
1995 };
1996 static const int32_t from_hzOffs [] ={
1997 0,0,0,
1998 1,1,1,1,
1999 2,2,2,2,2,2,2,2,
2000 3,3,3,3,
2001 4,4,4
2002 };
2003
2004 static const UChar hz_inputText2[]={ 0x0041, 0x03A0,0xD84D, 0xDC56/*unassigned*/,0x03A0, 0x0042,0xD84D, 0xDC56/*unassigned*/,0x43 };
2005 static const uint8_t to_hz2[]={
2006 0x7e, 0x7d, 0x41,
2007 0x7e, 0x7b, 0x26, 0x30,
2008 0x7e, 0x7d, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2009 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2010 0x7e, 0x7b, 0x26, 0x30,
2011 0x7e, 0x7d, 0x42,
2012 0x25, 0x55, 0x44, 0x38, 0x34, 0x44,
2013 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2014 0x43
2015 };
2016 static const int32_t from_hzOffs2 [] ={
2017 0,0,0,
2018 1,1,1,1,
2019 2,2,2,2,2,2,2,2,
2020 2,2,2,2,2,2,
2021 4,4,4,4,
2022 5,5,5,
2023 6,6,6,6,6,6,
2024 6,6,6,6,6,6,
2025 8,
2026 };
2027
2028 /*ISCII*/
2029 static const UChar iscii_inputText[]={ 0x0041, 0x0901,0x3712/*unassigned*/,0x0902, 0x0042,0x3712/*unassigned*/,0x43 };
2030 static const uint8_t to_iscii[]={
2031 0x41,
2032 0xef, 0x42, 0xa1,
2033 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2034 0xa2,
2035 0x42,
2036 0x25, 0x55, 0x33, 0x37, 0x31, 0x32, /*unassigned*/
2037 0x43
2038 };
2039
2040
2041 static const int32_t from_isciiOffs [] ={
2042 0,
2043 1,1,1,
2044 2,2,2,2,2,2,
2045 3,
2046 4,
2047 5,5,5,5,5,5,
2048 6,
2049 };
2050
2051 if(!testConvertFromUnicode(inputTest, UPRV_LENGTHOF(inputTest),
2052 toIBM943, UPRV_LENGTHOF(toIBM943), "ibm-943",
2053 UCNV_FROM_U_CALLBACK_ESCAPE, offset, NULL, 0 ))
2054 log_err("u-> ibm-943 with subst with value did not match.\n");
2055
2056 if(!testConvertFromUnicode(euc_jp_inputText, UPRV_LENGTHOF(euc_jp_inputText),
2057 to_euc_jp, UPRV_LENGTHOF(to_euc_jp), "IBM-eucJP",
2058 UCNV_FROM_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0 ))
2059 log_err("u-> euc-jp with subst with value did not match.\n");
2060
2061 if(!testConvertFromUnicode(euc_tw_inputText, UPRV_LENGTHOF(euc_tw_inputText),
2062 to_euc_tw, UPRV_LENGTHOF(to_euc_tw), "euc-tw",
2063 UCNV_FROM_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0 ))
2064 log_err("u-> euc-tw with subst with value did not match.\n");
2065
2066 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2067 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2068 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2069 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2070
2071 if(!testConvertFromUnicode(iso_2022_jp_inputText1, UPRV_LENGTHOF(iso_2022_jp_inputText1),
2072 to_iso_2022_jp1, UPRV_LENGTHOF(to_iso_2022_jp1), "iso-2022-jp",
2073 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs1, NULL, 0 ))
2074 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2075
2076 if(!testConvertFromUnicode(iso_2022_jp_inputText2, UPRV_LENGTHOF(iso_2022_jp_inputText2),
2077 to_iso_2022_jp2, UPRV_LENGTHOF(to_iso_2022_jp2), "iso-2022-jp",
2078 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs2, NULL, 0 ))
2079 log_err("u-> iso_2022_jp with subst with value did not match.\n");
2080 /*ESCAPE OPTIONS*/
2081 {
2082 /* surrogate pair*/
2083 static const UChar iso_2022_jp_inputText3[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0901c } ;
2084 static const uint8_t to_iso_2022_jp3_v2[]={
2085 0x1b, 0x24, 0x42, 0x21, 0x21,
2086 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2087
2088 0x1b, 0x24, 0x42, 0x21, 0x22,
2089 0x1b, 0x28, 0x42, 0x26, 0x23, 0x31, 0x34, 0x34, 0x34, 0x37, 0x30, 0x3b,
2090
2091 0x42,
2092 0x26, 0x23, 0x33, 0x36, 0x38, 0x39, 0x32, 0x3b,
2093 };
2094
2095 static const int32_t from_iso_2022_jpOffs3_v2 [] ={
2096 0,0,0,0,0,
2097 1,1,1,1,1,1,1,1,1,1,1,1,
2098
2099 3,3,3,3,3,
2100 4,4,4,4,4,4,4,4,4,4,4,4,
2101
2102 6,
2103 7,7,7,7,7,7,7,7,7
2104 };
2105
2106 if(!testConvertFromUnicodeWithContext(iso_2022_jp_inputText3, UPRV_LENGTHOF(iso_2022_jp_inputText3),
2107 to_iso_2022_jp3_v2, UPRV_LENGTHOF(to_iso_2022_jp3_v2), "iso-2022-jp",
2108 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs3_v2, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2109 log_err("u-> iso-2022-jp with sub & UCNV_ESCAPE_XML_DEC did not match.\n");
2110 }
2111 {
2112 static const UChar iso_2022_cn_inputText5[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2113 static const uint8_t to_iso_2022_cn5_v2[]={
2114 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2115 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2116 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2117 0x0e, 0x21, 0x22,
2118 0x0f, 0x5c, 0x75, 0x44, 0x38, 0x34, 0x44,
2119 0x5c, 0x75, 0x44, 0x43, 0x35, 0x36,
2120 0x42,
2121 0x5c, 0x75, 0x30, 0x39, 0x30, 0x32,
2122 };
2123 static const int32_t from_iso_2022_cnOffs5_v2 [] ={
2124 0,0,0,0,0,0,0,
2125 1,1,1,1,1,1,1,
2126 1,1,1,1,1,1,
2127 3,3,3,
2128 4,4,4,4,4,4,4,
2129 4,4,4,4,4,4,
2130 6,
2131 7,7,7,7,7,7
2132 };
2133 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText5, UPRV_LENGTHOF(iso_2022_cn_inputText5),
2134 to_iso_2022_cn5_v2, UPRV_LENGTHOF(to_iso_2022_cn5_v2), "iso-2022-cn",
2135 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs5_v2, NULL, 0,UCNV_ESCAPE_JAVA,U_ZERO_ERROR ))
2136 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_JAVA did not match.\n");
2137
2138 }
2139 {
2140 static const UChar iso_2022_cn_inputText6[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2141 static const uint8_t to_iso_2022_cn6_v2[]={
2142 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2143 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2144 0x0e, 0x21, 0x22,
2145 0x0f, 0x7b, 0x55, 0x2b, 0x32, 0x33, 0x34, 0x35, 0x36, 0x7d,
2146 0x42,
2147 0x7b, 0x55, 0x2b, 0x30, 0x39, 0x30, 0x32, 0x7d
2148 };
2149 static const int32_t from_iso_2022_cnOffs6_v2 [] ={
2150 0, 0, 0, 0, 0, 0, 0,
2151 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2152 3, 3, 3,
2153 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2154 6,
2155 7, 7, 7, 7, 7, 7, 7, 7,
2156 };
2157 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText6, UPRV_LENGTHOF(iso_2022_cn_inputText6),
2158 to_iso_2022_cn6_v2, UPRV_LENGTHOF(to_iso_2022_cn6_v2), "iso-2022-cn",
2159 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs6_v2, NULL, 0,UCNV_ESCAPE_UNICODE,U_ZERO_ERROR ))
2160 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_UNICODE did not match.\n");
2161
2162 }
2163 {
2164 static const UChar iso_2022_cn_inputText7[]={ 0x3000, 0xD84D, 0xDC56, 0x3001,0xD84D,0xDC56, 0x0042,0x0902};
2165 static const uint8_t to_iso_2022_cn7_v2[]={
2166 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2167 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2168 0x0e, 0x21, 0x22,
2169 0x0f, 0x25, 0x55, 0x44, 0x38, 0x34, 0x44, 0x25, 0x55, 0x44, 0x43, 0x35, 0x36,
2170 0x42, 0x25, 0x55, 0x30, 0x39, 0x30, 0x32,
2171 };
2172 static const int32_t from_iso_2022_cnOffs7_v2 [] ={
2173 0, 0, 0, 0, 0, 0, 0,
2174 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2175 3, 3, 3,
2176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2177 6,
2178 7, 7, 7, 7, 7, 7,
2179 };
2180 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText7, UPRV_LENGTHOF(iso_2022_cn_inputText7),
2181 to_iso_2022_cn7_v2, UPRV_LENGTHOF(to_iso_2022_cn7_v2), "iso-2022-cn",
2182 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs7_v2, NULL, 0,"K" ,U_ZERO_ERROR ))
2183 log_err("u-> iso-2022-cn with sub & K did not match.\n");
2184
2185 }
2186 {
2187 static const UChar iso_2022_cn_inputText8[]={
2188 0x3000,
2189 0xD84D, 0xDC56,
2190 0x3001,
2191 0xD84D, 0xDC56,
2192 0xDBFF, 0xDFFF,
2193 0x0042,
2194 0x0902};
2195 static const uint8_t to_iso_2022_cn8_v2[]={
2196 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2197 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2198 0x0e, 0x21, 0x22,
2199 0x0f, 0x5c, 0x32, 0x33, 0x34, 0x35, 0x36, 0x20,
2200 0x5c, 0x31, 0x30, 0x46, 0x46, 0x46, 0x46, 0x20,
2201 0x42,
2202 0x5c, 0x39, 0x30, 0x32, 0x20
2203 };
2204 static const int32_t from_iso_2022_cnOffs8_v2 [] ={
2205 0, 0, 0, 0, 0, 0, 0,
2206 1, 1, 1, 1, 1, 1, 1, 1,
2207 3, 3, 3,
2208 4, 4, 4, 4, 4, 4, 4, 4,
2209 6, 6, 6, 6, 6, 6, 6, 6,
2210 8,
2211 9, 9, 9, 9, 9
2212 };
2213 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText8, UPRV_LENGTHOF(iso_2022_cn_inputText8),
2214 to_iso_2022_cn8_v2, UPRV_LENGTHOF(to_iso_2022_cn8_v2), "iso-2022-cn",
2215 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs8_v2, NULL, 0,UCNV_ESCAPE_CSS2,U_ZERO_ERROR ))
2216 log_err("u-> iso-2022-cn with sub & UCNV_ESCAPE_CSS2 did not match.\n");
2217
2218 }
2219 {
2220 static const uint8_t to_iso_2022_cn4_v3[]={
2221 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x21, 0x21,
2222 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2223 0x0e, 0x21, 0x22,
2224 0x0f, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x32, 0x33, 0x34, 0x35, 0x36,
2225 0x42
2226 };
2227
2228
2229 static const int32_t from_iso_2022_cnOffs4_v3 [] ={
2230 0,0,0,0,0,0,0,
2231 1,1,1,1,1,1,1,1,1,1,1,
2232
2233 3,3,3,
2234 4,4,4,4,4,4,4,4,4,4,4,
2235
2236 6
2237
2238 };
2239 if(!testConvertFromUnicodeWithContext(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2240 to_iso_2022_cn4_v3, UPRV_LENGTHOF(to_iso_2022_cn4_v3), "iso-2022-cn",
2241 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4_v3, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2242 {
2243 log_err("u-> iso-2022-cn with skip & UCNV_ESCAPE_C did not match.\n");
2244 }
2245 }
2246 if(!testConvertFromUnicode(iso_2022_cn_inputText, UPRV_LENGTHOF(iso_2022_cn_inputText),
2247 to_iso_2022_cn, UPRV_LENGTHOF(to_iso_2022_cn), "iso-2022-cn",
2248 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0 ))
2249 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2250
2251 if(!testConvertFromUnicode(iso_2022_cn_inputText4, UPRV_LENGTHOF(iso_2022_cn_inputText4),
2252 to_iso_2022_cn4, UPRV_LENGTHOF(to_iso_2022_cn4), "iso-2022-cn",
2253 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs4, NULL, 0 ))
2254 log_err("u-> iso_2022_cn with subst with value did not match.\n");
2255 if(!testConvertFromUnicode(iso_2022_kr_inputText, UPRV_LENGTHOF(iso_2022_kr_inputText),
2256 to_iso_2022_kr, UPRV_LENGTHOF(to_iso_2022_kr), "iso-2022-kr",
2257 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0 ))
2258 log_err("u-> iso_2022_kr with subst with value did not match.\n");
2259 if(!testConvertFromUnicode(iso_2022_kr_inputText2, UPRV_LENGTHOF(iso_2022_kr_inputText2),
2260 to_iso_2022_kr2, UPRV_LENGTHOF(to_iso_2022_kr2), "iso-2022-kr",
2261 UCNV_FROM_U_CALLBACK_ESCAPE, from_iso_2022_krOffs2, NULL, 0 ))
2262 log_err("u-> iso_2022_kr2 with subst with value did not match.\n");
2263 if(!testConvertFromUnicode(hz_inputText, UPRV_LENGTHOF(hz_inputText),
2264 to_hz, UPRV_LENGTHOF(to_hz), "HZ",
2265 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0 ))
2266 log_err("u-> hz with subst with value did not match.\n");
2267 if(!testConvertFromUnicode(hz_inputText2, UPRV_LENGTHOF(hz_inputText2),
2268 to_hz2, UPRV_LENGTHOF(to_hz2), "HZ",
2269 UCNV_FROM_U_CALLBACK_ESCAPE, from_hzOffs2, NULL, 0 ))
2270 log_err("u-> hz with subst with value did not match.\n");
2271
2272 if(!testConvertFromUnicode(iscii_inputText, UPRV_LENGTHOF(iscii_inputText),
2273 to_iscii, UPRV_LENGTHOF(to_iscii), "ISCII,version=0",
2274 UCNV_FROM_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0 ))
2275 log_err("u-> iscii with subst with value did not match.\n");
2276 }
2277 #endif
2278
2279 log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
2280 /*to Unicode*/
2281 {
2282 #if !UCONFIG_NO_LEGACY_CONVERSION
2283 static const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
2284 0x81, 0xad, /*unassigned*/
2285 0x89, 0xd3 };
2286 static const UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
2287 0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
2288 0x7B87};
2289 static const int32_t fromIBM943Offs [] = { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
2290
2291 /* EUC_JP*/
2292 static const uint8_t sampleTxt_EUC_JP[]={ 0x61, 0xa1, 0xb8, 0x8f, 0xf4, 0xae,
2293 0x8f, 0xda, 0xa1, /*unassigned*/
2294 0x8e, 0xe0,
2295 };
2296 static const UChar EUC_JPtoUnicode[]={ 0x0061, 0x4edd, 0x5bec,
2297 0x25, 0x58, 0x38, 0x46, 0x25, 0x58, 0x44, 0x41, 0x25, 0x58, 0x41, 0x31,
2298 0x00a2 };
2299 static const int32_t fromEUC_JPOffs [] ={ 0, 1, 3,
2300 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2301 9,
2302 };
2303
2304 /*EUC_TW*/
2305 static const uint8_t sampleTxt_euc_tw[]={
2306 0x61, 0xa2, 0xd3, 0x8e, 0xa2, 0xdc, 0xe5,
2307 0x8e, 0xaa, 0xbb, 0xcc,/*unassigned*/
2308 0xe6, 0xca, 0x8a,
2309 };
2310 static const UChar euc_twtoUnicode[]={ 0x0061, 0x2295, 0x5BF2,
2311 0x25, 0x58, 0x38, 0x45, 0x25, 0x58, 0x41, 0x41, 0x25, 0x58, 0x42, 0x42, 0x25, 0x58, 0x43, 0x43,
2312 0x8706, 0x8a, };
2313 static const int32_t from_euc_twOffs [] ={ 0, 1, 3,
2314 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2315 11, 13};
2316
2317 /*iso-2022-jp*/
2318 static const uint8_t sampleTxt_iso_2022_jp[]={
2319 0x1b, 0x28, 0x42, 0x41,
2320 0x1b, 0x24, 0x42, 0x3a, 0x1a, /*unassigned*/
2321 0x1b, 0x28, 0x42, 0x42,
2322
2323 };
2324 /* A % X 3 A % X 1 A B */
2325 static const UChar iso_2022_jptoUnicode[]={ 0x41,0x25,0x58,0x33,0x41,0x25,0x58,0x31,0x41, 0x42 };
2326 static const int32_t from_iso_2022_jpOffs [] ={ 3, 7, 7, 7, 7, 7, 7, 7, 7, 12 };
2327
2328 /*iso-2022-cn*/
2329 static const uint8_t sampleTxt_iso_2022_cn[]={
2330 0x0f, 0x41, 0x44,
2331 0x1B, 0x24, 0x29, 0x47,
2332 0x0E, 0x40, 0x6c, /*unassigned*/
2333 0x0f, 0x42,
2334
2335 };
2336 static const UChar iso_2022_cntoUnicode[]={ 0x41, 0x44,0x25,0x58,0x34,0x30,0x25,0x58,0x36,0x43,0x42 };
2337 static const int32_t from_iso_2022_cnOffs [] ={ 1, 2, 8, 8, 8, 8, 8, 8, 8, 8, 11 };
2338
2339 /*iso-2022-kr*/
2340 static const uint8_t sampleTxt_iso_2022_kr[]={
2341 0x1b, 0x24, 0x29, 0x43,
2342 0x41,
2343 0x0E, 0x7f, 0x1E,
2344 0x0e, 0x25, 0x50,
2345 0x0f, 0x51,
2346 0x42, 0x43,
2347
2348 };
2349 static const UChar iso_2022_krtoUnicode[]={ 0x41,0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,0x03A0,0x51, 0x42,0x43};
2350 static const int32_t from_iso_2022_krOffs [] ={ 4, 6, 6, 6, 6, 6, 6, 6, 6, 9, 12, 13 , 14 };
2351
2352 /*hz*/
2353 static const uint8_t sampleTxt_hz[]={
2354 0x41,
2355 0x7e, 0x7b, 0x26, 0x30,
2356 0x7f, 0x1E, /*unassigned*/
2357 0x26, 0x30,
2358 0x7e, 0x7d, 0x42,
2359 0x7e, 0x7b, 0x7f, 0x1E,/*unassigned*/
2360 0x7e, 0x7d, 0x42,
2361 };
2362 static const UChar hztoUnicode[]={
2363 0x41,
2364 0x03a0,
2365 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2366 0x03A0,
2367 0x42,
2368 0x25,0x58,0x37,0x46,0x25,0x58,0x31,0x45,
2369 0x42,};
2370
2371 static const int32_t from_hzOffs [] ={0,3,5,5,5,5,5,5,5,5,7,11,14,14,14,14,14,14,14,14,18, };
2372
2373
2374 /*iscii*/
2375 static const uint8_t sampleTxt_iscii[]={
2376 0x41,
2377 0x30,
2378 0xEB, /*unassigned*/
2379 0xa3,
2380 0x42,
2381 0xEC, /*unassigned*/
2382 0x42,
2383 };
2384 static const UChar isciitoUnicode[]={
2385 0x41,
2386 0x30,
2387 0x25, 0x58, 0x45, 0x42,
2388 0x0903,
2389 0x42,
2390 0x25, 0x58, 0x45, 0x43,
2391 0x42,};
2392
2393 static const int32_t from_isciiOffs [] ={0,1,2,2,2,2,3,4,5,5,5,5,6 };
2394 #endif
2395
2396 /*UTF8*/
2397 static const uint8_t sampleTxtUTF8[]={
2398 0x20, 0x64, 0x50,
2399 0xC2, 0x7E, /* truncated char */
2400 0x20,
2401 0xE0, 0xB5, 0x7E, /* truncated char */
2402 0x40,
2403 };
2404 static const UChar UTF8ToUnicode[]={
2405 0x0020, 0x0064, 0x0050,
2406 0x0025, 0x0058, 0x0043, 0x0032, 0x007E, /* \xC2~ */
2407 0x0020,
2408 0x0025, 0x0058, 0x0045, 0x0030, 0x0025, 0x0058, 0x0042, 0x0035, 0x007E,
2409 0x0040
2410 };
2411 static const int32_t fromUTF8[] = {
2412 0, 1, 2,
2413 3, 3, 3, 3, 4,
2414 5,
2415 6, 6, 6, 6, 6, 6, 6, 6, 8,
2416 9
2417 };
2418 static const UChar UTF8ToUnicodeXML_DEC[]={
2419 0x0020, 0x0064, 0x0050,
2420 0x0026, 0x0023, 0x0031, 0x0039, 0x0034, 0x003B, 0x007E, /* Â~ */
2421 0x0020,
2422 0x0026, 0x0023, 0x0032, 0x0032, 0x0034, 0x003B, 0x0026, 0x0023, 0x0031, 0x0038, 0x0031, 0x003B, 0x007E,
2423 0x0040
2424 };
2425 static const int32_t fromUTF8XML_DEC[] = {
2426 0, 1, 2,
2427 3, 3, 3, 3, 3, 3, 4,
2428 5,
2429 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8,
2430 9
2431 };
2432
2433
2434 #if !UCONFIG_NO_LEGACY_CONVERSION
2435 if(!testConvertToUnicode(sampleTxtToU, UPRV_LENGTHOF(sampleTxtToU),
2436 IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
2437 UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs, NULL, 0 ))
2438 log_err("ibm-943->u with substitute with value did not match.\n");
2439
2440 if(!testConvertToUnicode(sampleTxt_EUC_JP, UPRV_LENGTHOF(sampleTxt_EUC_JP),
2441 EUC_JPtoUnicode, UPRV_LENGTHOF(EUC_JPtoUnicode),"IBM-eucJP",
2442 UCNV_TO_U_CALLBACK_ESCAPE, fromEUC_JPOffs, NULL, 0))
2443 log_err("euc-jp->u with substitute with value did not match.\n");
2444
2445 if(!testConvertToUnicode(sampleTxt_euc_tw, UPRV_LENGTHOF(sampleTxt_euc_tw),
2446 euc_twtoUnicode, UPRV_LENGTHOF(euc_twtoUnicode),"euc-tw",
2447 UCNV_TO_U_CALLBACK_ESCAPE, from_euc_twOffs, NULL, 0))
2448 log_err("euc-tw->u with substitute with value did not match.\n");
2449
2450 if(!testConvertToUnicode(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2451 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2452 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0))
2453 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2454
2455 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2456 iso_2022_jptoUnicode, UPRV_LENGTHOF(iso_2022_jptoUnicode),"iso-2022-jp",
2457 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffs, NULL, 0,"K",U_ZERO_ERROR))
2458 log_err("iso-2022-jp->u with substitute with value did not match.\n");
2459
2460 {/* test UCNV_TO_U_CALLBACK_ESCAPE with options */
2461 {
2462 static const UChar iso_2022_jptoUnicodeDec[]={
2463 0x0041,
2464 /* & # 5 8 ; */
2465 0x0026, 0x0023, 0x0035, 0x0038, 0x003b,
2466 0x0026, 0x0023, 0x0032, 0x0036, 0x003b,
2467 0x0042 };
2468 static const int32_t from_iso_2022_jpOffsDec [] ={ 3,7,7,7,7,7,7,7,7,7,7,12, };
2469 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2470 iso_2022_jptoUnicodeDec, UPRV_LENGTHOF(iso_2022_jptoUnicodeDec),"iso-2022-jp",
2471 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsDec, NULL, 0,UCNV_ESCAPE_XML_DEC,U_ZERO_ERROR ))
2472 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_DEC did not match.\n");
2473 }
2474 {
2475 static const UChar iso_2022_jptoUnicodeHex[]={
2476 0x0041,
2477 /* & # x 3 A ; */
2478 0x0026, 0x0023, 0x0078, 0x0033, 0x0041, 0x003b,
2479 0x0026, 0x0023, 0x0078, 0x0031, 0x0041, 0x003b,
2480 0x0042 };
2481 static const int32_t from_iso_2022_jpOffsHex [] ={ 3,7,7,7,7,7,7,7,7,7,7,7,7,12 };
2482 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2483 iso_2022_jptoUnicodeHex, UPRV_LENGTHOF(iso_2022_jptoUnicodeHex),"iso-2022-jp",
2484 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsHex, NULL, 0,UCNV_ESCAPE_XML_HEX,U_ZERO_ERROR ))
2485 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_XML_HEX did not match.\n");
2486 }
2487 {
2488 static const UChar iso_2022_jptoUnicodeC[]={
2489 0x0041,
2490 0x005C, 0x0078, 0x0033, 0x0041, /* \x3A */
2491 0x005C, 0x0078, 0x0031, 0x0041, /* \x1A */
2492 0x0042 };
2493 int32_t from_iso_2022_jpOffsC [] ={ 3,7,7,7,7,7,7,7,7,12 };
2494 if(!testConvertToUnicodeWithContext(sampleTxt_iso_2022_jp, UPRV_LENGTHOF(sampleTxt_iso_2022_jp),
2495 iso_2022_jptoUnicodeC, UPRV_LENGTHOF(iso_2022_jptoUnicodeC),"iso-2022-jp",
2496 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_jpOffsC, NULL, 0,UCNV_ESCAPE_C,U_ZERO_ERROR ))
2497 log_err("iso-2022-jp->u with substitute with value and UCNV_ESCAPE_C did not match.\n");
2498 }
2499 }
2500 if(!testConvertToUnicode(sampleTxt_iso_2022_cn, UPRV_LENGTHOF(sampleTxt_iso_2022_cn),
2501 iso_2022_cntoUnicode, UPRV_LENGTHOF(iso_2022_cntoUnicode),"iso-2022-cn",
2502 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_cnOffs, NULL, 0))
2503 log_err("iso-2022-cn->u with substitute with value did not match.\n");
2504
2505 if(!testConvertToUnicode(sampleTxt_iso_2022_kr, UPRV_LENGTHOF(sampleTxt_iso_2022_kr),
2506 iso_2022_krtoUnicode, UPRV_LENGTHOF(iso_2022_krtoUnicode),"iso-2022-kr",
2507 UCNV_TO_U_CALLBACK_ESCAPE, from_iso_2022_krOffs, NULL, 0))
2508 log_err("iso-2022-kr->u with substitute with value did not match.\n");
2509
2510 if(!testConvertToUnicode(sampleTxt_hz, UPRV_LENGTHOF(sampleTxt_hz),
2511 hztoUnicode, UPRV_LENGTHOF(hztoUnicode),"HZ",
2512 UCNV_TO_U_CALLBACK_ESCAPE, from_hzOffs, NULL, 0))
2513 log_err("hz->u with substitute with value did not match.\n");
2514
2515 if(!testConvertToUnicode(sampleTxt_iscii, UPRV_LENGTHOF(sampleTxt_iscii),
2516 isciitoUnicode, UPRV_LENGTHOF(isciitoUnicode),"ISCII,version=0",
2517 UCNV_TO_U_CALLBACK_ESCAPE, from_isciiOffs, NULL, 0))
2518 log_err("ISCII ->u with substitute with value did not match.\n");
2519 #endif
2520
2521 if(!testConvertToUnicode(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2522 UTF8ToUnicode, UPRV_LENGTHOF(UTF8ToUnicode),"UTF-8",
2523 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8, NULL, 0))
2524 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2525 if(!testConvertToUnicodeWithContext(sampleTxtUTF8, UPRV_LENGTHOF(sampleTxtUTF8),
2526 UTF8ToUnicodeXML_DEC, UPRV_LENGTHOF(UTF8ToUnicodeXML_DEC),"UTF-8",
2527 UCNV_TO_U_CALLBACK_ESCAPE, fromUTF8XML_DEC, NULL, 0, UCNV_ESCAPE_XML_DEC, U_ZERO_ERROR))
2528 log_err("UTF8->u with UCNV_TO_U_CALLBACK_ESCAPE with value did not match.\n");
2529 }
2530 }
2531
2532 #if !UCONFIG_NO_LEGACY_CONVERSION
TestLegalAndOthers(int32_t inputsize,int32_t outputsize)2533 static void TestLegalAndOthers(int32_t inputsize, int32_t outputsize)
2534 {
2535 static const UChar legalText[] = { 0x0000, 0xAC00, 0xAC01, 0xD700 };
2536 static const uint8_t templegal949[] ={ 0x00, 0xb0, 0xa1, 0xb0, 0xa2, 0xc8, 0xd3 };
2537 static const int32_t to949legal[] = {0, 1, 1, 2, 2, 3, 3};
2538
2539
2540 static const uint8_t text943[] = {
2541 0x82, 0xa9, 0x82, 0x20, 0x61, 0x8a, 0xbf, 0x8e, 0x9a };
2542 static const UChar toUnicode943sub[] = { 0x304b, 0x1a, 0x20, 0x0061, 0x6f22, 0x5b57 };
2543 static const UChar toUnicode943skip[]= { 0x304b, 0x20, 0x0061, 0x6f22, 0x5b57 };
2544 static const UChar toUnicode943stop[]= { 0x304b};
2545
2546 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 7 };
2547 static const int32_t fromIBM943Offsskip[] = { 0, 3, 4, 5, 7 };
2548 static const int32_t fromIBM943Offsstop[] = { 0};
2549
2550 gInBufferSize = inputsize;
2551 gOutBufferSize = outputsize;
2552 /*checking with a legal value*/
2553 if(!testConvertFromUnicode(legalText, UPRV_LENGTHOF(legalText),
2554 templegal949, UPRV_LENGTHOF(templegal949), "ibm-949",
2555 UCNV_FROM_U_CALLBACK_SKIP, to949legal, NULL, 0 ))
2556 log_err("u-> ibm-949 with skip did not match.\n");
2557
2558 /*checking illegal value for ibm-943 with substitute*/
2559 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2560 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2561 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2562 log_err("ibm-943->u with subst did not match.\n");
2563 /*checking illegal value for ibm-943 with skip */
2564 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2565 toUnicode943skip, UPRV_LENGTHOF(toUnicode943skip),"ibm-943",
2566 UCNV_TO_U_CALLBACK_SKIP, fromIBM943Offsskip, NULL, 0 ))
2567 log_err("ibm-943->u with skip did not match.\n");
2568
2569 /*checking illegal value for ibm-943 with stop */
2570 if(!testConvertToUnicode(text943, UPRV_LENGTHOF(text943),
2571 toUnicode943stop, UPRV_LENGTHOF(toUnicode943stop),"ibm-943",
2572 UCNV_TO_U_CALLBACK_STOP, fromIBM943Offsstop, NULL, 0 ))
2573 log_err("ibm-943->u with stop did not match.\n");
2574
2575 }
2576
TestSingleByte(int32_t inputsize,int32_t outputsize)2577 static void TestSingleByte(int32_t inputsize, int32_t outputsize)
2578 {
2579 static const uint8_t sampleText[] = {
2580 0x82, 0xa9, 0x61, 0x62, 0x63 , 0x82,
2581 0xff, 0x32, 0x33};
2582 static const UChar toUnicode943sub[] = { 0x304b, 0x0061, 0x0062, 0x0063, 0x1a, 0x1a, 0x0032, 0x0033 };
2583 static const int32_t fromIBM943Offssub[] = { 0, 2, 3, 4, 5, 6, 7, 8 };
2584 /*checking illegal value for ibm-943 with substitute*/
2585 gInBufferSize = inputsize;
2586 gOutBufferSize = outputsize;
2587
2588 if(!testConvertToUnicode(sampleText, UPRV_LENGTHOF(sampleText),
2589 toUnicode943sub, UPRV_LENGTHOF(toUnicode943sub),"ibm-943",
2590 UCNV_TO_U_CALLBACK_SUBSTITUTE, fromIBM943Offssub, NULL, 0 ))
2591 log_err("ibm-943->u with subst did not match.\n");
2592 }
2593
TestEBCDIC_STATEFUL_Sub(int32_t inputsize,int32_t outputsize)2594 static void TestEBCDIC_STATEFUL_Sub(int32_t inputsize, int32_t outputsize)
2595 {
2596 /*EBCDIC_STATEFUL*/
2597 static const UChar ebcdic_inputTest[] = { 0x0061, 0x6d64, 0x0061, 0x00A2, 0x6d65, 0x0061 };
2598 static const uint8_t toIBM930[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x0e, 0xfe, 0xfe, 0x0f, 0x62 };
2599 static const int32_t offset_930[]= { 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 5, 5 };
2600 /* s SO doubl SI sng s SO fe fe SI s */
2601
2602 /*EBCDIC_STATEFUL with subChar=3f*/
2603 static const uint8_t toIBM930_subvaried[]= { 0x62, 0x0e, 0x5d, 0x63, 0x0f, 0x62, 0xb1, 0x3f, 0x62 };
2604 static const int32_t offset_930_subvaried[]= { 0, 1, 1, 1, 2, 2, 3, 4, 5 };
2605 static const char mySubChar[]={ 0x3f};
2606
2607 gInBufferSize = inputsize;
2608 gOutBufferSize = outputsize;
2609
2610 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2611 toIBM930, UPRV_LENGTHOF(toIBM930), "ibm-930",
2612 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930, NULL, 0 ))
2613 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst did not match.\n");
2614
2615 if(!testConvertFromUnicode(ebcdic_inputTest, UPRV_LENGTHOF(ebcdic_inputTest),
2616 toIBM930_subvaried, UPRV_LENGTHOF(toIBM930_subvaried), "ibm-930",
2617 UCNV_FROM_U_CALLBACK_SUBSTITUTE, offset_930_subvaried, mySubChar, 1 ))
2618 log_err("u-> ibm-930(EBCDIC_STATEFUL) with subst(setSubChar=0x3f) did not match.\n");
2619 }
2620 #endif
2621
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2622 UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
2623 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
2624 const char *mySubChar, int8_t len)
2625 {
2626
2627
2628 UErrorCode status = U_ZERO_ERROR;
2629 UConverter *conv = 0;
2630 char junkout[NEW_MAX_BUFFER]; /* FIX */
2631 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2632 const UChar *src;
2633 char *end;
2634 char *targ;
2635 int32_t *offs;
2636 int i;
2637 int32_t realBufferSize;
2638 char *realBufferEnd;
2639 const UChar *realSourceEnd;
2640 const UChar *sourceLimit;
2641 UBool checkOffsets = true;
2642 UBool doFlush;
2643 char junk[9999];
2644 char offset_str[9999];
2645 char *p;
2646 UConverterFromUCallback oldAction = NULL;
2647 const void* oldContext = NULL;
2648
2649
2650 for(i=0;i<NEW_MAX_BUFFER;i++)
2651 junkout[i] = (char)0xF0;
2652 for(i=0;i<NEW_MAX_BUFFER;i++)
2653 junokout[i] = 0xFF;
2654 setNuConvTestName(codepage, "FROM");
2655
2656 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
2657 gOutBufferSize);
2658
2659 conv = ucnv_open(codepage, &status);
2660 if(U_FAILURE(status))
2661 {
2662 log_data_err("Couldn't open converter %s\n",codepage);
2663 return true;
2664 }
2665
2666 log_verbose("Converter opened..\n");
2667
2668 /*----setting the callback routine----*/
2669 ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2670 if (U_FAILURE(status))
2671 {
2672 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2673 }
2674 /*------------------------*/
2675 /*setting the subChar*/
2676 if(mySubChar != NULL){
2677 ucnv_setSubstChars(conv, mySubChar, len, &status);
2678 if (U_FAILURE(status)) {
2679 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2680 }
2681 }
2682 /*------------*/
2683
2684 src = source;
2685 targ = junkout;
2686 offs = junokout;
2687
2688 realBufferSize = UPRV_LENGTHOF(junkout);
2689 realBufferEnd = junkout + realBufferSize;
2690 realSourceEnd = source + sourceLen;
2691
2692 if ( gOutBufferSize != realBufferSize )
2693 checkOffsets = false;
2694
2695 if( gInBufferSize != NEW_MAX_BUFFER )
2696 checkOffsets = false;
2697
2698 do
2699 {
2700 end = nct_min(targ + gOutBufferSize, realBufferEnd);
2701 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
2702
2703 doFlush = (UBool)(sourceLimit == realSourceEnd);
2704
2705 if(targ == realBufferEnd)
2706 {
2707 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
2708 return false;
2709 }
2710 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
2711
2712
2713 status = U_ZERO_ERROR;
2714
2715 ucnv_fromUnicode (conv,
2716 (char **)&targ,
2717 (const char *)end,
2718 &src,
2719 sourceLimit,
2720 checkOffsets ? offs : NULL,
2721 doFlush, /* flush if we're at the end of the input data */
2722 &status);
2723 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
2724
2725
2726 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2727 UChar errChars[50]; /* should be sufficient */
2728 int8_t errLen = 50;
2729 UErrorCode err = U_ZERO_ERROR;
2730 const UChar* start= NULL;
2731 ucnv_getInvalidUChars(conv,errChars, &errLen, &err);
2732 if(U_FAILURE(err)){
2733 log_err("ucnv_getInvalidUChars failed with error : %s\n",u_errorName(err));
2734 }
2735 /* length of in invalid chars should be equal to returned length*/
2736 start = src - errLen;
2737 if(u_strncmp(errChars,start,errLen)!=0){
2738 log_err("ucnv_getInvalidUChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2739 }
2740 }
2741 /* allow failure codes for the stop callback */
2742 if(U_FAILURE(status) &&
2743 (callback != UCNV_FROM_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND)))
2744 {
2745 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2746 return false;
2747 }
2748
2749 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
2750 sourceLen, targ-junkout);
2751 if(getTestOption(VERBOSITY_OPTION))
2752 {
2753
2754 junk[0] = 0;
2755 offset_str[0] = 0;
2756 for(p = junkout;p<targ;p++)
2757 {
2758 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
2759 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
2760 }
2761
2762 log_verbose(junk);
2763 printSeq(expect, expectLen);
2764 if ( checkOffsets )
2765 {
2766 log_verbose("\nOffsets:");
2767 log_verbose(offset_str);
2768 }
2769 log_verbose("\n");
2770 }
2771 ucnv_close(conv);
2772
2773
2774 if(expectLen != targ-junkout)
2775 {
2776 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2777 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
2778 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2779 printSeqErr(expect, expectLen);
2780 return false;
2781 }
2782
2783 if (checkOffsets && (expectOffsets != 0) )
2784 {
2785 log_verbose("comparing %d offsets..\n", targ-junkout);
2786 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
2787 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2788 log_err("Got Output : ");
2789 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
2790 log_err("Got Offsets: ");
2791 for(p=junkout;p<targ;p++)
2792 log_err("%d,", junokout[p-junkout]);
2793 log_err("\n");
2794 log_err("Expected Offsets: ");
2795 for(i=0; i<(targ-junkout); i++)
2796 log_err("%d,", expectOffsets[i]);
2797 log_err("\n");
2798 return false;
2799 }
2800 }
2801
2802 if(!memcmp(junkout, expect, expectLen))
2803 {
2804 log_verbose("String matches! %s\n", gNuConvTestName);
2805 return true;
2806 }
2807 else
2808 {
2809 log_err("String does not match. %s\n", gNuConvTestName);
2810 log_err("source: ");
2811 printUSeqErr(source, sourceLen);
2812 log_err("Got: ");
2813 printSeqErr((const uint8_t *)junkout, expectLen);
2814 log_err("Expected: ");
2815 printSeqErr(expect, expectLen);
2816 return false;
2817 }
2818 }
2819
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len)2820 UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
2821 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
2822 const char *mySubChar, int8_t len)
2823 {
2824 UErrorCode status = U_ZERO_ERROR;
2825 UConverter *conv = 0;
2826 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
2827 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
2828 const char *src;
2829 const char *realSourceEnd;
2830 const char *srcLimit;
2831 UChar *targ;
2832 UChar *end;
2833 int32_t *offs;
2834 int i;
2835 UBool checkOffsets = true;
2836 char junk[9999];
2837 char offset_str[9999];
2838 UChar *p;
2839 UConverterToUCallback oldAction = NULL;
2840 const void* oldContext = NULL;
2841
2842 int32_t realBufferSize;
2843 UChar *realBufferEnd;
2844
2845
2846 for(i=0;i<NEW_MAX_BUFFER;i++)
2847 junkout[i] = 0xFFFE;
2848
2849 for(i=0;i<NEW_MAX_BUFFER;i++)
2850 junokout[i] = -1;
2851
2852 setNuConvTestName(codepage, "TO");
2853
2854 log_verbose("\n========= %s\n", gNuConvTestName);
2855
2856 conv = ucnv_open(codepage, &status);
2857 if(U_FAILURE(status))
2858 {
2859 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
2860 return true;
2861 }
2862
2863 log_verbose("Converter opened..\n");
2864
2865 src = (const char *)source;
2866 targ = junkout;
2867 offs = junokout;
2868
2869 realBufferSize = UPRV_LENGTHOF(junkout);
2870 realBufferEnd = junkout + realBufferSize;
2871 realSourceEnd = src + sourcelen;
2872 /*----setting the callback routine----*/
2873 ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
2874 if (U_FAILURE(status))
2875 {
2876 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2877 }
2878 /*-------------------------------------*/
2879 /*setting the subChar*/
2880 if(mySubChar != NULL){
2881 ucnv_setSubstChars(conv, mySubChar, len, &status);
2882 if (U_FAILURE(status)) {
2883 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
2884 }
2885 }
2886 /*------------*/
2887
2888
2889 if ( gOutBufferSize != realBufferSize )
2890 checkOffsets = false;
2891
2892 if( gInBufferSize != NEW_MAX_BUFFER )
2893 checkOffsets = false;
2894
2895 do
2896 {
2897 end = nct_min( targ + gOutBufferSize, realBufferEnd);
2898 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
2899
2900 if(targ == realBufferEnd)
2901 {
2902 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
2903 return false;
2904 }
2905 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
2906
2907
2908
2909 status = U_ZERO_ERROR;
2910
2911 ucnv_toUnicode (conv,
2912 &targ,
2913 end,
2914 (const char **)&src,
2915 (const char *)srcLimit,
2916 checkOffsets ? offs : NULL,
2917 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
2918 &status);
2919 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
2920
2921 if(status==U_INVALID_CHAR_FOUND || status == U_ILLEGAL_CHAR_FOUND){
2922 char errChars[50]; /* should be sufficient */
2923 int8_t errLen = 50;
2924 UErrorCode err = U_ZERO_ERROR;
2925 const char* start= NULL;
2926 ucnv_getInvalidChars(conv,errChars, &errLen, &err);
2927 if(U_FAILURE(err)){
2928 log_err("ucnv_getInvalidChars failed with error : %s\n",u_errorName(err));
2929 }
2930 /* length of in invalid chars should be equal to returned length*/
2931 start = src - errLen;
2932 if(uprv_strncmp(errChars,start,errLen)!=0){
2933 log_err("ucnv_getInvalidChars did not return the correct invalid chars for encoding %s \n", ucnv_getName(conv,&err));
2934 }
2935 }
2936 /* allow failure codes for the stop callback */
2937 if(U_FAILURE(status) &&
2938 (callback != UCNV_TO_U_CALLBACK_STOP || (status != U_INVALID_CHAR_FOUND && status != U_ILLEGAL_CHAR_FOUND && status != U_TRUNCATED_CHAR_FOUND)))
2939 {
2940 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
2941 return false;
2942 }
2943
2944 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
2945 sourcelen, targ-junkout);
2946 if(getTestOption(VERBOSITY_OPTION))
2947 {
2948
2949 junk[0] = 0;
2950 offset_str[0] = 0;
2951
2952 for(p = junkout;p<targ;p++)
2953 {
2954 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
2955 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
2956 }
2957
2958 log_verbose(junk);
2959 printUSeq(expect, expectlen);
2960 if ( checkOffsets )
2961 {
2962 log_verbose("\nOffsets:");
2963 log_verbose(offset_str);
2964 }
2965 log_verbose("\n");
2966 }
2967 ucnv_close(conv);
2968
2969 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
2970
2971 if (checkOffsets && (expectOffsets != 0))
2972 {
2973 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
2974 {
2975 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
2976 log_err("Got offsets: ");
2977 for(p=junkout;p<targ;p++)
2978 log_err(" %2d,", junokout[p-junkout]);
2979 log_err("\n");
2980 log_err("Expected offsets: ");
2981 for(i=0; i<(targ-junkout); i++)
2982 log_err(" %2d,", expectOffsets[i]);
2983 log_err("\n");
2984 log_err("Got output: ");
2985 for(i=0; i<(targ-junkout); i++)
2986 log_err("0x%04x,", junkout[i]);
2987 log_err("\n");
2988 log_err("From source: ");
2989 for(i=0; i<(src-(const char *)source); i++)
2990 log_err(" 0x%02x,", (unsigned char)source[i]);
2991 log_err("\n");
2992 }
2993 }
2994
2995 if(!memcmp(junkout, expect, expectlen*2))
2996 {
2997 log_verbose("Matches!\n");
2998 return true;
2999 }
3000 else
3001 {
3002 log_err("String does not match. %s\n", gNuConvTestName);
3003 log_verbose("String does not match. %s\n", gNuConvTestName);
3004 log_err("Got: ");
3005 printUSeqErr(junkout, expectlen);
3006 log_err("Expected: ");
3007 printUSeqErr(expect, expectlen);
3008 log_err("\n");
3009 return false;
3010 }
3011 }
3012
testConvertFromUnicodeWithContext(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3013 UBool testConvertFromUnicodeWithContext(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
3014 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets,
3015 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3016 {
3017
3018
3019 UErrorCode status = U_ZERO_ERROR;
3020 UConverter *conv = 0;
3021 char junkout[NEW_MAX_BUFFER]; /* FIX */
3022 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3023 const UChar *src;
3024 char *end;
3025 char *targ;
3026 int32_t *offs;
3027 int i;
3028 int32_t realBufferSize;
3029 char *realBufferEnd;
3030 const UChar *realSourceEnd;
3031 const UChar *sourceLimit;
3032 UBool checkOffsets = true;
3033 UBool doFlush;
3034 char junk[9999];
3035 char offset_str[9999];
3036 char *p;
3037 UConverterFromUCallback oldAction = NULL;
3038 const void* oldContext = NULL;
3039
3040
3041 for(i=0;i<NEW_MAX_BUFFER;i++)
3042 junkout[i] = (char)0xF0;
3043 for(i=0;i<NEW_MAX_BUFFER;i++)
3044 junokout[i] = 0xFF;
3045 setNuConvTestName(codepage, "FROM");
3046
3047 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
3048 gOutBufferSize);
3049
3050 conv = ucnv_open(codepage, &status);
3051 if(U_FAILURE(status))
3052 {
3053 log_data_err("Couldn't open converter %s\n",codepage);
3054 return true; /* Because the err has already been logged. */
3055 }
3056
3057 log_verbose("Converter opened..\n");
3058
3059 /*----setting the callback routine----*/
3060 ucnv_setFromUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3061 if (U_FAILURE(status))
3062 {
3063 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3064 }
3065 /*------------------------*/
3066 /*setting the subChar*/
3067 if(mySubChar != NULL){
3068 ucnv_setSubstChars(conv, mySubChar, len, &status);
3069 if (U_FAILURE(status)) {
3070 log_err("FAILURE in setting substitution chars! %s\n", myErrorName(status));
3071 }
3072 }
3073 /*------------*/
3074
3075 src = source;
3076 targ = junkout;
3077 offs = junokout;
3078
3079 realBufferSize = UPRV_LENGTHOF(junkout);
3080 realBufferEnd = junkout + realBufferSize;
3081 realSourceEnd = source + sourceLen;
3082
3083 if ( gOutBufferSize != realBufferSize )
3084 checkOffsets = false;
3085
3086 if( gInBufferSize != NEW_MAX_BUFFER )
3087 checkOffsets = false;
3088
3089 do
3090 {
3091 end = nct_min(targ + gOutBufferSize, realBufferEnd);
3092 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
3093
3094 doFlush = (UBool)(sourceLimit == realSourceEnd);
3095
3096 if(targ == realBufferEnd)
3097 {
3098 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
3099 return false;
3100 }
3101 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
3102
3103
3104 status = U_ZERO_ERROR;
3105
3106 ucnv_fromUnicode (conv,
3107 (char **)&targ,
3108 (const char *)end,
3109 &src,
3110 sourceLimit,
3111 checkOffsets ? offs : NULL,
3112 doFlush, /* flush if we're at the end of the input data */
3113 &status);
3114 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (sourceLimit < realSourceEnd)) );
3115
3116 /* allow failure codes for the stop callback */
3117 if(U_FAILURE(status) && status != expectedError)
3118 {
3119 log_err("Problem in fromUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3120 return false;
3121 }
3122
3123 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
3124 sourceLen, targ-junkout);
3125 if(getTestOption(VERBOSITY_OPTION))
3126 {
3127
3128 junk[0] = 0;
3129 offset_str[0] = 0;
3130 for(p = junkout;p<targ;p++)
3131 {
3132 sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
3133 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
3134 }
3135
3136 log_verbose(junk);
3137 printSeq(expect, expectLen);
3138 if ( checkOffsets )
3139 {
3140 log_verbose("\nOffsets:");
3141 log_verbose(offset_str);
3142 }
3143 log_verbose("\n");
3144 }
3145 ucnv_close(conv);
3146
3147
3148 if(expectLen != targ-junkout)
3149 {
3150 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3151 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
3152 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3153 printSeqErr(expect, expectLen);
3154 return false;
3155 }
3156
3157 if (checkOffsets && (expectOffsets != 0) )
3158 {
3159 log_verbose("comparing %d offsets..\n", targ-junkout);
3160 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
3161 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3162 log_err("Got Output : ");
3163 printSeqErr((const uint8_t *)junkout, (int32_t)(targ-junkout));
3164 log_err("Got Offsets: ");
3165 for(p=junkout;p<targ;p++)
3166 log_err("%d,", junokout[p-junkout]);
3167 log_err("\n");
3168 log_err("Expected Offsets: ");
3169 for(i=0; i<(targ-junkout); i++)
3170 log_err("%d,", expectOffsets[i]);
3171 log_err("\n");
3172 return false;
3173 }
3174 }
3175
3176 if(!memcmp(junkout, expect, expectLen))
3177 {
3178 log_verbose("String matches! %s\n", gNuConvTestName);
3179 return true;
3180 }
3181 else
3182 {
3183 log_err("String does not match. %s\n", gNuConvTestName);
3184 log_err("source: ");
3185 printUSeqErr(source, sourceLen);
3186 log_err("Got: ");
3187 printSeqErr((const uint8_t *)junkout, expectLen);
3188 log_err("Expected: ");
3189 printSeqErr(expect, expectLen);
3190 return false;
3191 }
3192 }
testConvertToUnicodeWithContext(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,const char * mySubChar,int8_t len,const void * context,UErrorCode expectedError)3193 UBool testConvertToUnicodeWithContext( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
3194 const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets,
3195 const char *mySubChar, int8_t len, const void* context, UErrorCode expectedError)
3196 {
3197 UErrorCode status = U_ZERO_ERROR;
3198 UConverter *conv = 0;
3199 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
3200 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
3201 const char *src;
3202 const char *realSourceEnd;
3203 const char *srcLimit;
3204 UChar *targ;
3205 UChar *end;
3206 int32_t *offs;
3207 int i;
3208 UBool checkOffsets = true;
3209 char junk[9999];
3210 char offset_str[9999];
3211 UChar *p;
3212 UConverterToUCallback oldAction = NULL;
3213 const void* oldContext = NULL;
3214
3215 int32_t realBufferSize;
3216 UChar *realBufferEnd;
3217
3218
3219 for(i=0;i<NEW_MAX_BUFFER;i++)
3220 junkout[i] = 0xFFFE;
3221
3222 for(i=0;i<NEW_MAX_BUFFER;i++)
3223 junokout[i] = -1;
3224
3225 setNuConvTestName(codepage, "TO");
3226
3227 log_verbose("\n========= %s\n", gNuConvTestName);
3228
3229 conv = ucnv_open(codepage, &status);
3230 if(U_FAILURE(status))
3231 {
3232 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
3233 return true;
3234 }
3235
3236 log_verbose("Converter opened..\n");
3237
3238 src = (const char *)source;
3239 targ = junkout;
3240 offs = junokout;
3241
3242 realBufferSize = UPRV_LENGTHOF(junkout);
3243 realBufferEnd = junkout + realBufferSize;
3244 realSourceEnd = src + sourcelen;
3245 /*----setting the callback routine----*/
3246 ucnv_setToUCallBack (conv, callback, context, &oldAction, &oldContext, &status);
3247 if (U_FAILURE(status))
3248 {
3249 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3250 }
3251 /*-------------------------------------*/
3252 /*setting the subChar*/
3253 if(mySubChar != NULL){
3254 ucnv_setSubstChars(conv, mySubChar, len, &status);
3255 if (U_FAILURE(status)) {
3256 log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
3257 }
3258 }
3259 /*------------*/
3260
3261
3262 if ( gOutBufferSize != realBufferSize )
3263 checkOffsets = false;
3264
3265 if( gInBufferSize != NEW_MAX_BUFFER )
3266 checkOffsets = false;
3267
3268 do
3269 {
3270 end = nct_min( targ + gOutBufferSize, realBufferEnd);
3271 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
3272
3273 if(targ == realBufferEnd)
3274 {
3275 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
3276 return false;
3277 }
3278 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
3279
3280
3281
3282 status = U_ZERO_ERROR;
3283
3284 ucnv_toUnicode (conv,
3285 &targ,
3286 end,
3287 (const char **)&src,
3288 (const char *)srcLimit,
3289 checkOffsets ? offs : NULL,
3290 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
3291 &status);
3292 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
3293
3294 /* allow failure codes for the stop callback */
3295 if(U_FAILURE(status) && status!=expectedError)
3296 {
3297 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
3298 return false;
3299 }
3300
3301 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
3302 sourcelen, targ-junkout);
3303 if(getTestOption(VERBOSITY_OPTION))
3304 {
3305
3306 junk[0] = 0;
3307 offset_str[0] = 0;
3308
3309 for(p = junkout;p<targ;p++)
3310 {
3311 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
3312 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
3313 }
3314
3315 log_verbose(junk);
3316 printUSeq(expect, expectlen);
3317 if ( checkOffsets )
3318 {
3319 log_verbose("\nOffsets:");
3320 log_verbose(offset_str);
3321 }
3322 log_verbose("\n");
3323 }
3324 ucnv_close(conv);
3325
3326 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
3327
3328 if (checkOffsets && (expectOffsets != 0))
3329 {
3330 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
3331 {
3332 log_err("did not get the expected offsets while %s \n", gNuConvTestName);
3333 log_err("Got offsets: ");
3334 for(p=junkout;p<targ;p++)
3335 log_err(" %2d,", junokout[p-junkout]);
3336 log_err("\n");
3337 log_err("Expected offsets: ");
3338 for(i=0; i<(targ-junkout); i++)
3339 log_err(" %2d,", expectOffsets[i]);
3340 log_err("\n");
3341 log_err("Got output: ");
3342 for(i=0; i<(targ-junkout); i++)
3343 log_err("0x%04x,", junkout[i]);
3344 log_err("\n");
3345 log_err("From source: ");
3346 for(i=0; i<(src-(const char *)source); i++)
3347 log_err(" 0x%02x,", (unsigned char)source[i]);
3348 log_err("\n");
3349 }
3350 }
3351
3352 if(!memcmp(junkout, expect, expectlen*2))
3353 {
3354 log_verbose("Matches!\n");
3355 return true;
3356 }
3357 else
3358 {
3359 log_err("String does not match. %s\n", gNuConvTestName);
3360 log_verbose("String does not match. %s\n", gNuConvTestName);
3361 log_err("Got: ");
3362 printUSeqErr(junkout, expectlen);
3363 log_err("Expected: ");
3364 printUSeqErr(expect, expectlen);
3365 log_err("\n");
3366 return false;
3367 }
3368 }
3369
TestCallBackFailure(void)3370 static void TestCallBackFailure(void) {
3371 UErrorCode status = U_USELESS_COLLATOR_ERROR;
3372 ucnv_cbFromUWriteBytes(NULL, NULL, -1, -1, &status);
3373 if (status != U_USELESS_COLLATOR_ERROR) {
3374 log_err("Error: ucnv_cbFromUWriteBytes did not react correctly to a bad UErrorCode\n");
3375 }
3376 ucnv_cbFromUWriteUChars(NULL, NULL, NULL, -1, &status);
3377 if (status != U_USELESS_COLLATOR_ERROR) {
3378 log_err("Error: ucnv_cbFromUWriteUChars did not react correctly to a bad UErrorCode\n");
3379 }
3380 ucnv_cbFromUWriteSub(NULL, -1, &status);
3381 if (status != U_USELESS_COLLATOR_ERROR) {
3382 log_err("Error: ucnv_cbFromUWriteSub did not react correctly to a bad UErrorCode\n");
3383 }
3384 ucnv_cbToUWriteUChars(NULL, NULL, -1, -1, &status);
3385 if (status != U_USELESS_COLLATOR_ERROR) {
3386 log_err("Error: ucnv_cbToUWriteUChars did not react correctly to a bad UErrorCode\n");
3387 }
3388 }
3389