• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ncnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
15 ******************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include "unicode/uloc.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uset.h"
26 #include "unicode/utf8.h"
27 #include "unicode/utf16.h"
28 #include "cintltst.h"
29 #include "cmemory.h"
30 
31 #define MAX_LENGTH 999
32 
33 #define UNICODE_LIMIT 0x10FFFF
34 #define SURROGATE_HIGH_START    0xD800
35 #define SURROGATE_LOW_END       0xDFFF
36 
37 static int32_t  gInBufferSize = 0;
38 static int32_t  gOutBufferSize = 0;
39 static char     gNuConvTestName[1024];
40 
41 #define nct_min(x,y)  ((x<y) ? x : y)
42 
43 static void printSeq(const unsigned char* a, int len);
44 static void printSeqErr(const unsigned char* a, int len);
45 static void printUSeq(const UChar* a, int len);
46 static void printUSeqErr(const UChar* a, int len);
47 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
48                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
49 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
50                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
51 
52 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
53                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
54 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
55                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
56 
setNuConvTestName(const char * codepage,const char * direction)57 static void setNuConvTestName(const char *codepage, const char *direction)
58 {
59     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
60         codepage,
61         direction,
62         (int)gInBufferSize,
63         (int)gOutBufferSize);
64 }
65 
66 
67 static void TestSurrogateBehaviour(void);
68 static void TestErrorBehaviour(void);
69 
70 #if !UCONFIG_NO_LEGACY_CONVERSION
71 static void TestToUnicodeErrorBehaviour(void);
72 static void TestGetNextErrorBehaviour(void);
73 #endif
74 
75 static void TestRegressionUTF8(void);
76 static void TestRegressionUTF32(void);
77 static void TestAvailableConverters(void);
78 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
79 static void TestResetBehaviour(void);
80 static void TestTruncated(void);
81 static void TestUnicodeSet(void);
82 
83 static void TestWithBufferSize(int32_t osize, int32_t isize);
84 
85 
printSeq(const unsigned char * a,int len)86 static void printSeq(const unsigned char* a, int len)
87 {
88     int i=0;
89     log_verbose("\n{");
90     while (i<len)
91         log_verbose("0x%02X ", a[i++]);
92     log_verbose("}\n");
93 }
94 
printUSeq(const UChar * a,int len)95 static void printUSeq(const UChar* a, int len)
96 {
97     int i=0;
98     log_verbose("\n{");
99     while (i<len)
100         log_verbose("%0x04X ", a[i++]);
101     log_verbose("}\n");
102 }
103 
printSeqErr(const unsigned char * a,int len)104 static void printSeqErr(const unsigned char* a, int len)
105 {
106     int i=0;
107     fprintf(stderr, "\n{");
108     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
109     fprintf(stderr, "}\n");
110 }
111 
printUSeqErr(const UChar * a,int len)112 static void printUSeqErr(const UChar* a, int len)
113 {
114     int i=0;
115     fprintf(stderr, "\n{");
116     while (i<len)
117         fprintf(stderr, "0x%04X ", a[i++]);
118     fprintf(stderr,"}\n");
119 }
120 
121 void addExtraTests(TestNode** root);
122 
addExtraTests(TestNode ** root)123 void addExtraTests(TestNode** root)
124 {
125      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
126      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
127 
128 #if !UCONFIG_NO_LEGACY_CONVERSION
129      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
130      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
131 #endif
132 
133      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
134      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
135      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
136      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
137      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
138      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
139      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
140 }
141 
142 /*test surrogate behaviour*/
TestSurrogateBehaviour()143 static void TestSurrogateBehaviour(){
144     log_verbose("Testing for SBCS and LATIN_1\n");
145     {
146         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
147         const uint8_t expected[] = {0x31, 0x1a, 0x32};
148 
149 #if !UCONFIG_NO_LEGACY_CONVERSION
150         /*SBCS*/
151         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
152                 expected, sizeof(expected), "ibm-920", 0 , true, U_ZERO_ERROR))
153             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
154 #endif
155 
156         /*LATIN_1*/
157         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
158                 expected, sizeof(expected), "LATIN_1", 0, true, U_ZERO_ERROR ))
159             log_err("u-> LATIN_1 not match.\n");
160 
161     }
162 
163 #if !UCONFIG_NO_LEGACY_CONVERSION
164     log_verbose("Testing for DBCS and MBCS\n");
165     {
166         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
167         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
168         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
169 
170         /*DBCS*/
171         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
172                 expected, sizeof(expected), "ibm-1363", 0 , true, U_ZERO_ERROR))
173             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
174         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
175                 expected, sizeof(expected), "ibm-1363", offsets , true, U_ZERO_ERROR))
176             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
177         /*MBCS*/
178         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
179                 expected, sizeof(expected), "ibm-1363", 0 , true, U_ZERO_ERROR))
180             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
181         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
182                 expected, sizeof(expected), "ibm-1363", offsets, true, U_ZERO_ERROR))
183             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
184     }
185 
186     log_verbose("Testing for ISO-2022-jp\n");
187     {
188         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
189 
190         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
191                                     0x31,0x1A, 0x32};
192 
193 
194         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
195 
196         /*iso-2022-jp*/
197         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
198                 expected, sizeof(expected), "iso-2022-jp", 0 , true, U_ZERO_ERROR))
199             log_err("u-> not match.\n");
200         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
201                 expected, sizeof(expected), "iso-2022-jp", offsets , true, U_ZERO_ERROR))
202             log_err("u->  not match.\n");
203     }
204 
205     log_verbose("Testing for ISO-2022-cn\n");
206     {
207         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
208 
209         static const uint8_t expected[] = {
210                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
211                                     0x36, 0x21,
212                                     0x0F, 0x31,
213                                     0x1A,
214                                     0x32
215                                     };
216 
217 
218 
219         static const int32_t offsets[] = {
220                                     0,    0,    0,    0,    0,    0,    0,
221                                     1,    1,
222                                     2,    2,
223                                     3,
224                                     5,  };
225 
226         /*iso-2022-CN*/
227         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
228                 expected, sizeof(expected), "iso-2022-cn", 0 , true, U_ZERO_ERROR))
229             log_err("u-> not match.\n");
230         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
231                 expected, sizeof(expected), "iso-2022-cn", offsets , true, U_ZERO_ERROR))
232             log_err("u-> not match.\n");
233     }
234 
235         log_verbose("Testing for ISO-2022-kr\n");
236     {
237         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
238 
239         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
240                                     0x0E, 0x6C, 0x69,
241                                     0x0f, 0x1A,
242                                     0x0e, 0x6F, 0x4B,
243                                     0x0F, 0x31,
244                                     0x1A,
245                                     0x32 };
246 
247         static const int32_t offsets[] = {-1, -1, -1, -1,
248                               0, 0, 0,
249                               1, 1,
250                               3, 3, 3,
251                               4, 4,
252                               5,
253                               7,
254                             };
255 
256         /*iso-2022-kr*/
257         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
258                 expected, sizeof(expected), "iso-2022-kr", 0 , true, U_ZERO_ERROR))
259             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
260         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
261                 expected, sizeof(expected), "iso-2022-kr", offsets , true, U_ZERO_ERROR))
262             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
263     }
264 
265         log_verbose("Testing for HZ\n");
266     {
267         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
268 
269         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
270                                     0x7E, 0x7D, 0x1A,
271                                     0x7E, 0x7B, 0x36, 0x21,
272                                     0x7E, 0x7D, 0x31,
273                                     0x1A,
274                                     0x32 };
275 
276 
277         static const int32_t offsets[] = {0,0,0,0,
278                              1,1,1,
279                              3,3,3,3,
280                              4,4,4,
281                              5,
282                              7,};
283 
284         /*hz*/
285         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
286                 expected, sizeof(expected), "HZ", 0 , true, U_ZERO_ERROR))
287             log_err("u-> HZ not match.\n");
288         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
289                 expected, sizeof(expected), "HZ", offsets , true, U_ZERO_ERROR))
290             log_err("u-> HZ not match.\n");
291     }
292 #endif
293 
294     /*UTF-8*/
295      log_verbose("Testing for UTF8\n");
296     {
297         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
298         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
299                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
300                            0x04, 0x06 };
301         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
302             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
303 
304 
305         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
306         /*UTF-8*/
307         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
308             expected, sizeof(expected), "UTF8", offsets, true, U_ZERO_ERROR ))
309             log_err("u-> UTF8 with offsets and flush true did not match.\n");
310         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
311             expected, sizeof(expected), "UTF8", 0, true, U_ZERO_ERROR ))
312             log_err("u-> UTF8 with offsets and flush true did not match.\n");
313         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
314             expected, sizeof(expected), "UTF8", offsets, false, U_ZERO_ERROR ))
315             log_err("u-> UTF8 with offsets and flush true did not match.\n");
316         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
317             expected, sizeof(expected), "UTF8", 0, false, U_ZERO_ERROR ))
318             log_err("u-> UTF8 with offsets and flush true did not match.\n");
319 
320         if(!convertToU(expected, sizeof(expected),
321             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, true, U_ZERO_ERROR ))
322             log_err("UTF8 -> u did not match.\n");
323         if(!convertToU(expected, sizeof(expected),
324             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, false, U_ZERO_ERROR ))
325             log_err("UTF8 -> u did not match.\n");
326         if(!convertToU(expected, sizeof(expected),
327             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, true, U_ZERO_ERROR ))
328             log_err("UTF8 ->u  did not match.\n");
329         if(!convertToU(expected, sizeof(expected),
330             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, false, U_ZERO_ERROR ))
331             log_err("UTF8 -> u did not match.\n");
332 
333     }
334 }
335 
336 /*test various error behaviours*/
TestErrorBehaviour()337 static void TestErrorBehaviour(){
338     log_verbose("Testing for SBCS and LATIN_1\n");
339     {
340         static const UChar    sampleText[] =   { 0x0031, 0xd801};
341         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
342         static const uint8_t expected0[] =          { 0x31};
343         static const uint8_t expected[] =          { 0x31, 0x1a};
344         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
345 
346 #if !UCONFIG_NO_LEGACY_CONVERSION
347         /*SBCS*/
348         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
349                 expected, sizeof(expected), "ibm-920", 0, true, U_ZERO_ERROR))
350             log_err("u-> ibm-920 [UCNV_SBCS] \n");
351         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
352                 expected0, sizeof(expected0), "ibm-920", 0, false, U_ZERO_ERROR))
353             log_err("u-> ibm-920 [UCNV_SBCS] \n");
354         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
355                 expected2, sizeof(expected2), "ibm-920", 0, true, U_ZERO_ERROR))
356             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
357 #endif
358 
359         /*LATIN_1*/
360         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
361                 expected, sizeof(expected), "LATIN_1", 0, true, U_ZERO_ERROR))
362             log_err("u-> LATIN_1 is supposed to fail\n");
363         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
364                 expected0, sizeof(expected0), "LATIN_1", 0, false, U_ZERO_ERROR))
365             log_err("u-> LATIN_1 is supposed to fail\n");
366 
367         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
368                 expected2, sizeof(expected2), "LATIN_1", 0, true, U_ZERO_ERROR))
369             log_err("u-> LATIN_1 did not match\n");
370     }
371 
372 #if !UCONFIG_NO_LEGACY_CONVERSION
373     log_verbose("Testing for DBCS and MBCS\n");
374     {
375         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
376         static const uint8_t expected[] = { 0xa2, 0xae};
377         static const int32_t offsets[]        = { 0x00, 0x00};
378         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
379         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
380 
381         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
382         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
383         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
384 
385         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
386         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
387         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
388 
389         static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
390         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
391         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
392 
393         /*DBCS*/
394         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
395                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, true, U_ZERO_ERROR))
396             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
397         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
398                 expected, sizeof(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING))
399             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
400 
401         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
402                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, true, U_ZERO_ERROR))
403             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
404         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
405                 expected, sizeof(expected), "ibm-1363", offsets, false, U_AMBIGUOUS_ALIAS_WARNING))
406             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
407 
408 
409         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
410                 expected2, sizeof(expected2), "ibm-1363", 0, true, U_ZERO_ERROR))
411             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
412         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
413                 expected2, sizeof(expected2), "ibm-1363", offsets2, true, U_ZERO_ERROR))
414             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
415 
416         /*MBCS*/
417         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
418                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, true, U_ZERO_ERROR))
419             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
420         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
421                 expected, sizeof(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING))
422             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
423 
424         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
425                 expected2, sizeof(expected2), "ibm-1363", 0, true, U_ZERO_ERROR))
426             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
427         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
428                 expected2, sizeof(expected2), "ibm-1363", 0, false, U_ZERO_ERROR))
429             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
430         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
431                 expected2, sizeof(expected2), "ibm-1363", offsets2, false, U_ZERO_ERROR))
432             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
433 
434         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
435                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, true, U_ZERO_ERROR))
436             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
437         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
438                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, false, U_ZERO_ERROR))
439             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
440 
441         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
442                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, true, U_ZERO_ERROR))
443             log_err("u-> euc-jp [UCNV_MBCS] \n");
444         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
445                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, false, U_ZERO_ERROR))
446             log_err("u-> euc-jp [UCNV_MBCS] \n");
447     }
448 
449     /*iso-2022-jp*/
450     log_verbose("Testing for iso-2022-jp\n");
451     {
452         static const UChar    sampleText[]    = { 0x0031, 0xd801};
453         static const uint8_t expected[] = {  0x31};
454         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
455         static const int32_t offsets[]        = { 0x00, 1};
456 
457         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
458         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
459         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
460 
461         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
462         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
463         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
464         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
465                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, true, U_ZERO_ERROR))
466             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
467         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
468                 expected, sizeof(expected), "iso-2022-jp", offsets, false, U_AMBIGUOUS_ALIAS_WARNING))
469             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
470 
471         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
472                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, true, U_ZERO_ERROR))
473             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
474         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
475                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, false, U_ZERO_ERROR))
476             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
477         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
478                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, false, U_ZERO_ERROR))
479             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
480 
481         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
482                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, true, U_ZERO_ERROR))
483             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
484         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
485                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, false, U_ZERO_ERROR))
486             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
487     }
488 
489     /*iso-2022-cn*/
490     log_verbose("Testing for iso-2022-cn\n");
491     {
492         static const UChar    sampleText[]    = { 0x0031, 0xd801};
493         static const uint8_t expected[] = { 0x31};
494         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
495         static const int32_t offsets[]        = { 0x00, 1};
496 
497         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
498         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
499         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
500 
501         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
502         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
503         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
504 
505         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
506         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
507         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
508         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
509                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, true, U_ZERO_ERROR))
510             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
511         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
512                 expected, sizeof(expected), "iso-2022-cn", offsets, false, U_ZERO_ERROR))
513             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
514 
515         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
516                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, true, U_ZERO_ERROR))
517             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
518         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
519                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, false, U_ZERO_ERROR))
520             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
521         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
522                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, false, U_ZERO_ERROR))
523             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
524 
525         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
526                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, true, U_ZERO_ERROR))
527             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
528         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
529                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, false, U_ZERO_ERROR))
530             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
531 
532         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
533                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, true, U_ZERO_ERROR))
534             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
535         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
536                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, false, U_ZERO_ERROR))
537             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
538     }
539 
540     /*iso-2022-kr*/
541     log_verbose("Testing for iso-2022-kr\n");
542     {
543         static const UChar    sampleText[]    = { 0x0031, 0xd801};
544         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
545         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
546         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
547 
548         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
549         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
550         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
551 
552         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
553         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
554         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
555 
556         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
557                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, true, U_ZERO_ERROR))
558             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
559         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
560                 expected, sizeof(expected), "iso-2022-kr", offsets, false, U_ZERO_ERROR))
561             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
562 
563         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
564                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, true, U_ZERO_ERROR))
565             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
566         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
567                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, false, U_ZERO_ERROR))
568             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
569         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
570                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, false, U_ZERO_ERROR))
571             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
572 
573         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
574                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, true, U_ZERO_ERROR))
575             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
576         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
577                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, false, U_ZERO_ERROR))
578             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
579     }
580 
581     /*HZ*/
582     log_verbose("Testing for HZ\n");
583     {
584         static const UChar    sampleText[]    = { 0x0031, 0xd801};
585         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
586         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
587         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
588 
589         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
590         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
591         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
592 
593         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
594         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
595         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
596 
597         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
598         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
599         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
600         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
601                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, true, U_ZERO_ERROR))
602             log_err("u-> HZ [UCNV_MBCS] \n");
603         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
604                 expected, sizeof(expected), "HZ", offsets, false, U_ZERO_ERROR))
605             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
606 
607         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
608                 expected2, sizeof(expected2), "HZ", offsets2, true, U_ZERO_ERROR))
609             log_err("u->HZ[UCNV_DBCS] did not match\n");
610         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
611                 expected2, sizeof(expected2), "HZ", offsets2, false, U_ZERO_ERROR))
612             log_err("u-> HZ [UCNV_DBCS] did not match\n");
613         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
614                 expected2, sizeof(expected2), "HZ", offsets2, false, U_ZERO_ERROR))
615             log_err("u-> HZ [UCNV_DBCS] did not match\n");
616 
617         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
618                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, true, U_ZERO_ERROR))
619             log_err("u->HZ [UCNV_MBCS] \n");
620         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
621                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, false, U_ZERO_ERROR))
622             log_err("u-> HZ[UCNV_MBCS] \n");
623 
624         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
625                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, true, U_ZERO_ERROR))
626             log_err("u-> HZ [UCNV_MBCS] \n");
627         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
628                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, false, U_ZERO_ERROR))
629             log_err("u-> HZ [UCNV_MBCS] \n");
630     }
631 #endif
632 }
633 
634 #if !UCONFIG_NO_LEGACY_CONVERSION
635 /*test different convertToUnicode error behaviours*/
TestToUnicodeErrorBehaviour()636 static void TestToUnicodeErrorBehaviour()
637 {
638     log_verbose("Testing error conditions for DBCS\n");
639     {
640         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
641         const UChar expected[] = { 0x00a1 };
642 
643         if(!convertToU(sampleText, sizeof(sampleText),
644                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, true, U_AMBIGUOUS_ALIAS_WARNING ))
645             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
646         if(!convertToU(sampleText, sizeof(sampleText),
647                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING ))
648             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
649     }
650     log_verbose("Testing error conditions for SBCS\n");
651     {
652         uint8_t sampleText[] = { 0xa2, 0xFF};
653         const UChar expected[] = { 0x00c2 };
654 
655       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
656         const UChar expected2[] = { 0x0073 };*/
657 
658         if(!convertToU(sampleText, sizeof(sampleText),
659                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, true, U_ZERO_ERROR ))
660             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
661         if(!convertToU(sampleText, sizeof(sampleText),
662                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, false, U_ZERO_ERROR ))
663             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
664 
665     }
666 }
667 
TestGetNextErrorBehaviour()668 static void TestGetNextErrorBehaviour(){
669    /*Test for unassigned character*/
670 #define INPUT_SIZE 1
671     static const char input1[INPUT_SIZE]={ 0x70 };
672     const char* source=(const char*)input1;
673     UErrorCode err=U_ZERO_ERROR;
674     UChar32 c=0;
675     UConverter *cnv=ucnv_open("ibm-424", &err);
676     if(U_FAILURE(err)) {
677         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
678         return;
679     }
680     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
681     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
682         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
683     }
684     ucnv_close(cnv);
685 }
686 #endif
687 
688 #define MAX_UTF16_LEN 2
689 #define MAX_UTF8_LEN 4
690 
691 /*Regression test for utf8 converter*/
TestRegressionUTF8()692 static void TestRegressionUTF8(){
693     UChar32 currCh = 0;
694     int32_t offset8;
695     int32_t offset16;
696     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
697     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
698 
699     while (currCh <= UNICODE_LIMIT) {
700         offset16 = 0;
701         offset8 = 0;
702         while(currCh <= UNICODE_LIMIT
703             && offset16 < ((int32_t)(MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN))
704             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
705         {
706             if (currCh == SURROGATE_HIGH_START) {
707                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
708             }
709             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
710             U8_APPEND_UNSAFE(utf8, offset8, currCh);
711             currCh++;
712         }
713         if(!convertFromU(standardForm, offset16,
714             utf8, offset8, "UTF8", 0, true, U_ZERO_ERROR )) {
715             log_err("Unicode->UTF8 did not match.\n");
716         }
717         if(!convertToU(utf8, offset8,
718             standardForm, offset16, "UTF8", 0, true, U_ZERO_ERROR )) {
719             log_err("UTF8->Unicode did not match.\n");
720         }
721     }
722 
723     free(standardForm);
724     free(utf8);
725 
726     {
727         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
728         static const UChar expected[] = { 0x0301, 0x0300 };
729         UConverter *conv8;
730         UErrorCode err = U_ZERO_ERROR;
731         UChar pivotBuffer[100];
732         const UChar* const pivEnd = pivotBuffer + 100;
733         const char* srcBeg;
734         const char* srcEnd;
735         UChar* pivBeg;
736 
737         conv8 = ucnv_open("UTF-8", &err);
738 
739         srcBeg = src8;
740         pivBeg = pivotBuffer;
741         srcEnd = src8 + 3;
742         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
743         if (srcBeg != srcEnd) {
744             log_err("Did not consume whole buffer on first call.\n");
745         }
746 
747         srcEnd = src8 + 4;
748         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
749         if (srcBeg != srcEnd) {
750             log_err("Did not consume whole buffer on second call.\n");
751         }
752 
753         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
754             log_err("Did not get expected results for UTF-8.\n");
755         }
756         ucnv_close(conv8);
757     }
758 }
759 
760 #define MAX_UTF32_LEN 1
761 
TestRegressionUTF32()762 static void TestRegressionUTF32(){
763 #if !UCONFIG_ONLY_HTML_CONVERSION
764     UChar32 currCh = 0;
765     int32_t offset32;
766     int32_t offset16;
767     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
768     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
769 
770     while (currCh <= UNICODE_LIMIT) {
771         offset16 = 0;
772         offset32 = 0;
773         while(currCh <= UNICODE_LIMIT
774             && offset16 < ((int32_t)(MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN))
775             && offset32 < ((int32_t)(MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)))
776         {
777             if (currCh == SURROGATE_HIGH_START) {
778                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
779             }
780             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
781             utf32[offset32++] = currCh;
782             currCh++;
783         }
784         if(!convertFromU(standardForm, offset16,
785             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, true, U_ZERO_ERROR )) {
786             log_err("Unicode->UTF32 did not match.\n");
787         }
788         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
789             standardForm, offset16, "UTF32_PlatformEndian", 0, true, U_ZERO_ERROR )) {
790             log_err("UTF32->Unicode did not match.\n");
791         }
792     }
793     free(standardForm);
794     free(utf32);
795 
796     {
797         /* Check for lone surrogate error handling. */
798         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
799         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
800         static const uint8_t expectedUTF32BE[] = {
801             0x00, 0x00, 0x00, 0x31,
802             0x00, 0x00, 0xff, 0xfd,
803             0x00, 0x00, 0x00, 0x32
804         };
805         static const uint8_t expectedUTF32LE[] = {
806             0x31, 0x00, 0x00, 0x00,
807             0xfd, 0xff, 0x00, 0x00,
808             0x32, 0x00, 0x00, 0x00
809         };
810         static const int32_t offsetsUTF32[] = {
811             0x00, 0x00, 0x00, 0x00,
812             0x01, 0x01, 0x01, 0x01,
813             0x02, 0x02, 0x02, 0x02
814         };
815 
816         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
817                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, true, U_ZERO_ERROR))
818             log_err("u->UTF-32BE\n");
819         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
820                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, true, U_ZERO_ERROR))
821             log_err("u->UTF-32BE\n");
822 
823         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
824                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, true, U_ZERO_ERROR))
825             log_err("u->UTF-32LE\n");
826         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
827                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, true, U_ZERO_ERROR))
828             log_err("u->UTF-32LE\n");
829     }
830 
831     {
832         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
833         static const UChar expected[] = { 0x0031, 0x0030 };
834         UConverter *convBE;
835         UErrorCode err = U_ZERO_ERROR;
836         UChar pivotBuffer[100];
837         const UChar* const pivEnd = pivotBuffer + 100;
838         const char* srcBeg;
839         const char* srcEnd;
840         UChar* pivBeg;
841 
842         convBE = ucnv_open("UTF-32BE", &err);
843 
844         srcBeg = srcBE;
845         pivBeg = pivotBuffer;
846         srcEnd = srcBE + 5;
847         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
848         if (srcBeg != srcEnd) {
849             log_err("Did not consume whole buffer on first call.\n");
850         }
851 
852         srcEnd = srcBE + 8;
853         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
854         if (srcBeg != srcEnd) {
855             log_err("Did not consume whole buffer on second call.\n");
856         }
857 
858         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
859             log_err("Did not get expected results for UTF-32BE.\n");
860         }
861         ucnv_close(convBE);
862     }
863     {
864         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
865         static const UChar expected[] = { 0x0031, 0x0030 };
866         UConverter *convLE;
867         UErrorCode err = U_ZERO_ERROR;
868         UChar pivotBuffer[100];
869         const UChar* const pivEnd = pivotBuffer + 100;
870         const char* srcBeg;
871         const char* srcEnd;
872         UChar* pivBeg;
873 
874         convLE = ucnv_open("UTF-32LE", &err);
875 
876         srcBeg = srcLE;
877         pivBeg = pivotBuffer;
878         srcEnd = srcLE + 5;
879         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
880         if (srcBeg != srcEnd) {
881             log_err("Did not consume whole buffer on first call.\n");
882         }
883 
884         srcEnd = srcLE + 8;
885         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
886         if (srcBeg != srcEnd) {
887             log_err("Did not consume whole buffer on second call.\n");
888         }
889 
890         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
891             log_err("Did not get expected results for UTF-32LE.\n");
892         }
893         ucnv_close(convLE);
894     }
895 #endif
896 }
897 
898 /*Walk through the available converters*/
TestAvailableConverters()899 static void TestAvailableConverters(){
900     UErrorCode status=U_ZERO_ERROR;
901     UConverter *conv=NULL;
902     int32_t i=0;
903     for(i=0; i < ucnv_countAvailable(); i++){
904         status=U_ZERO_ERROR;
905         conv=ucnv_open(ucnv_getAvailableName(i), &status);
906         if(U_FAILURE(status)){
907             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
908                         ucnv_getAvailableName(i), myErrorName(status));
909             continue;
910         }
911         ucnv_close(conv);
912     }
913 
914 }
915 
TestFlushInternalBuffer()916 static void TestFlushInternalBuffer(){
917     TestWithBufferSize(MAX_LENGTH, 1);
918     TestWithBufferSize(1, 1);
919     TestWithBufferSize(1, MAX_LENGTH);
920     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
921 }
922 
TestWithBufferSize(int32_t insize,int32_t outsize)923 static void TestWithBufferSize(int32_t insize, int32_t outsize){
924 
925     gInBufferSize =insize;
926     gOutBufferSize = outsize;
927 
928      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
929     {
930         UChar    sampleText[] =
931             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
932         const uint8_t expectedUTF8[] =
933             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
934         int32_t  toUTF8Offs[] =
935             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
936        /* int32_t fmUTF8Offs[] =
937             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
938 
939         /*UTF-8*/
940         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
941             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,false))
942              log_err("u-> UTF8 did not match.\n");
943     }
944 
945 #if !UCONFIG_NO_LEGACY_CONVERSION
946      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
947     {
948         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
949         const uint8_t toIBM943[]= { 0x61,
950             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
951             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
952             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
953             0x61 };
954         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
955 
956         if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest),
957                 toIBM943, sizeof(toIBM943), "ibm-943",
958                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,false))
959             log_err("u-> ibm-943 with subst with value did not match.\n");
960     }
961 #endif
962 
963      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
964     {
965         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
966             0xe0, 0x80,  0x61};
967         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
968         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
969 
970         if(!testConvertToU(sampleText1, sizeof(sampleText1),
971                  expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,false))
972             log_err("utf8->u with substitute did not match.\n");
973     }
974 
975 #if !UCONFIG_NO_LEGACY_CONVERSION
976     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
977     /*to Unicode*/
978     {
979         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
980             0x81, 0xad, /*unassigned*/
981             0x89, 0xd3 };
982         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
983             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
984             0x7B87};
985         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
986 
987         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
988                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
989                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,false))
990             log_err("ibm-943->u with substitute with value did not match.\n");
991 
992     }
993 #endif
994 }
995 
convertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)996 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
997                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
998 {
999 
1000     int32_t i=0;
1001     char *p=0;
1002     const UChar *src;
1003     char buffer[MAX_LENGTH];
1004     int32_t offsetBuffer[MAX_LENGTH];
1005     int32_t *offs=0;
1006     char *targ;
1007     char *targetLimit;
1008     UChar *sourceLimit=0;
1009     UErrorCode status = U_ZERO_ERROR;
1010     UConverter *conv = 0;
1011     conv = ucnv_open(codepage, &status);
1012     if(U_FAILURE(status))
1013     {
1014         log_data_err("Couldn't open converter %s\n",codepage);
1015         return true;
1016     }
1017     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1018 
1019     for(i=0; i<MAX_LENGTH; i++){
1020         buffer[i]=(char)0xF0;
1021         offsetBuffer[i]=0xFF;
1022     }
1023 
1024     src=source;
1025     sourceLimit=(UChar*)src+(sourceLen);
1026     targ=buffer;
1027     targetLimit=targ+MAX_LENGTH;
1028     offs=offsetBuffer;
1029     ucnv_fromUnicode (conv,
1030                   (char **)&targ,
1031                   (const char *)targetLimit,
1032                   &src,
1033                   sourceLimit,
1034                   expectOffsets ? offs : NULL,
1035                   doFlush,
1036                   &status);
1037     ucnv_close(conv);
1038     if(status != expectedStatus){
1039           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1040           return false;
1041     }
1042 
1043     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1044         sourceLen, targ-buffer);
1045 
1046     if(expectLen != targ-buffer)
1047     {
1048         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1049         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1050         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
1051         printSeqErr((const unsigned char*)expect, expectLen);
1052         return false;
1053     }
1054 
1055     if(memcmp(buffer, expect, expectLen)){
1056         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
1057         log_info("\nGot:");
1058         printSeqErr((const unsigned char *)buffer, expectLen);
1059         log_info("\nExpected:");
1060         printSeqErr((const unsigned char *)expect, expectLen);
1061         return false;
1062     }
1063     else {
1064         log_verbose("Matches!\n");
1065     }
1066 
1067     if (expectOffsets != 0){
1068         log_verbose("comparing %d offsets..\n", targ-buffer);
1069         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
1070             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
1071             log_info("\nGot  : ");
1072             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
1073             for(p=buffer;p<targ;p++)
1074                 log_info("%d, ", offsetBuffer[p-buffer]);
1075             log_info("\nExpected: ");
1076             for(i=0; i< (targ-buffer); i++)
1077                 log_info("%d,", expectOffsets[i]);
1078         }
1079     }
1080 
1081     return true;
1082 }
1083 
1084 
convertToU(const uint8_t * source,int sourceLen,const UChar * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)1085 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1086                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1087 {
1088     UErrorCode status = U_ZERO_ERROR;
1089     UConverter *conv = 0;
1090     int32_t i=0;
1091     UChar *p=0;
1092     const char* src;
1093     UChar buffer[MAX_LENGTH];
1094     int32_t offsetBuffer[MAX_LENGTH];
1095     int32_t *offs=0;
1096     UChar *targ;
1097     UChar *targetLimit;
1098     uint8_t *sourceLimit=0;
1099 
1100 
1101 
1102     conv = ucnv_open(codepage, &status);
1103     if(U_FAILURE(status))
1104     {
1105         log_data_err("Couldn't open converter %s\n",codepage);
1106         return true;
1107     }
1108     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1109 
1110 
1111 
1112     for(i=0; i<MAX_LENGTH; i++){
1113         buffer[i]=0xFFFE;
1114         offsetBuffer[i]=-1;
1115     }
1116 
1117     src=(const char *)source;
1118     sourceLimit=(uint8_t*)(src+(sourceLen));
1119     targ=buffer;
1120     targetLimit=targ+MAX_LENGTH;
1121     offs=offsetBuffer;
1122 
1123 
1124 
1125     ucnv_toUnicode (conv,
1126                 &targ,
1127                 targetLimit,
1128                 (const char **)&src,
1129                 (const char *)sourceLimit,
1130                 expectOffsets ? offs : NULL,
1131                 doFlush,
1132                 &status);
1133 
1134     ucnv_close(conv);
1135     if(status != expectedStatus){
1136           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1137           return false;
1138     }
1139     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1140         sourceLen, targ-buffer);
1141 
1142 
1143 
1144 
1145     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1146 
1147     if (expectOffsets != 0) {
1148         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1149 
1150             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1151             log_info("\nGot : ");
1152             for(p=buffer;p<targ;p++)
1153                 log_info("%d, ", offsetBuffer[p-buffer]);
1154             log_info("\nExpected: ");
1155             for(i=0; i<(targ-buffer); i++)
1156                 log_info("%d, ", expectOffsets[i]);
1157             log_info("\nGot result:");
1158             for(i=0; i<(targ-buffer); i++)
1159                 log_info("0x%04X,", buffer[i]);
1160             log_info("\nFrom Input:");
1161             for(i=0; i<(src-(const char *)source); i++)
1162                 log_info("0x%02X,", (unsigned char)source[i]);
1163             log_info("\n");
1164         }
1165     }
1166     if(memcmp(buffer, expect, expectLen*2)){
1167         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1168         log_info("\nGot:");
1169         printUSeqErr(buffer, expectLen);
1170         log_info("\nExpected:");
1171         printUSeqErr(expect, expectLen);
1172         return false;
1173     }
1174     else {
1175         log_verbose("Matches!\n");
1176     }
1177 
1178     return true;
1179 }
1180 
1181 
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,UBool testReset)1182 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1183                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1184 {
1185     UErrorCode status = U_ZERO_ERROR;
1186     UConverter *conv = 0;
1187     char    junkout[MAX_LENGTH]; /* FIX */
1188     int32_t    junokout[MAX_LENGTH]; /* FIX */
1189     char *p;
1190     const UChar *src;
1191     char *end;
1192     char *targ;
1193     int32_t *offs;
1194     int i;
1195     int32_t   realBufferSize;
1196     char *realBufferEnd;
1197     const UChar *realSourceEnd;
1198     const UChar *sourceLimit;
1199     UBool checkOffsets = true;
1200     UBool doFlush;
1201 
1202     UConverterFromUCallback oldAction = NULL;
1203     const void* oldContext = NULL;
1204 
1205     for(i=0;i<MAX_LENGTH;i++)
1206         junkout[i] = (char)0xF0;
1207     for(i=0;i<MAX_LENGTH;i++)
1208         junokout[i] = 0xFF;
1209 
1210     setNuConvTestName(codepage, "FROM");
1211 
1212     log_verbose("\n=========  %s\n", gNuConvTestName);
1213 
1214     conv = ucnv_open(codepage, &status);
1215     if(U_FAILURE(status))
1216     {
1217         log_data_err("Couldn't open converter %s\n",codepage);
1218         return true;
1219     }
1220 
1221     log_verbose("Converter opened..\n");
1222     /*----setting the callback routine----*/
1223     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1224     if (U_FAILURE(status)) {
1225         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1226     }
1227     /*------------------------*/
1228 
1229     src = source;
1230     targ = junkout;
1231     offs = junokout;
1232 
1233     realBufferSize = UPRV_LENGTHOF(junkout);
1234     realBufferEnd = junkout + realBufferSize;
1235     realSourceEnd = source + sourceLen;
1236 
1237     if ( gOutBufferSize != realBufferSize )
1238       checkOffsets = false;
1239 
1240     if( gInBufferSize != MAX_LENGTH )
1241       checkOffsets = false;
1242 
1243     do
1244     {
1245         end = nct_min(targ + gOutBufferSize, realBufferEnd);
1246         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1247 
1248         doFlush = (UBool)(sourceLimit == realSourceEnd);
1249 
1250         if(targ == realBufferEnd)
1251           {
1252         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1253         return false;
1254           }
1255         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
1256 
1257 
1258         status = U_ZERO_ERROR;
1259         if(gInBufferSize ==999 && gOutBufferSize==999)
1260             doFlush = false;
1261         ucnv_fromUnicode (conv,
1262                   (char **)&targ,
1263                   (const char *)end,
1264                   &src,
1265                   sourceLimit,
1266                   offs,
1267                   doFlush, /* flush if we're at the end of the input data */
1268                   &status);
1269         if(testReset)
1270             ucnv_resetToUnicode(conv);
1271         if(gInBufferSize ==999 && gOutBufferSize==999)
1272             ucnv_resetToUnicode(conv);
1273 
1274       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1275 
1276     if(U_FAILURE(status)) {
1277         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1278         return false;
1279       }
1280 
1281     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1282         sourceLen, targ-junkout);
1283     if(getTestOption(VERBOSITY_OPTION))
1284     {
1285         char junk[999];
1286         char offset_str[999];
1287         char *ptr;
1288 
1289         junk[0] = 0;
1290         offset_str[0] = 0;
1291         for(ptr = junkout;ptr<targ;ptr++)
1292         {
1293             sprintf(junk + strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1294             sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1295         }
1296 
1297         log_verbose(junk);
1298         printSeq((const unsigned char *)expect, expectLen);
1299         if ( checkOffsets )
1300           {
1301             log_verbose("\nOffsets:");
1302             log_verbose(offset_str);
1303           }
1304         log_verbose("\n");
1305     }
1306     ucnv_close(conv);
1307 
1308 
1309     if(expectLen != targ-junkout)
1310     {
1311         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1312         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1313         log_info("\nGot:");
1314         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1315         log_info("\nExpected:");
1316         printSeqErr((const unsigned char*)expect, expectLen);
1317         return false;
1318     }
1319 
1320     if (checkOffsets && (expectOffsets != 0) )
1321     {
1322         log_verbose("comparing %d offsets..\n", targ-junkout);
1323         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1324             log_err("did not get the expected offsets. %s", gNuConvTestName);
1325             log_err("Got  : ");
1326             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1327             for(p=junkout;p<targ;p++)
1328                 log_err("%d, ", junokout[p-junkout]);
1329             log_err("\nExpected: ");
1330             for(i=0; i<(targ-junkout); i++)
1331                 log_err("%d,", expectOffsets[i]);
1332         }
1333     }
1334 
1335     log_verbose("comparing..\n");
1336     if(!memcmp(junkout, expect, expectLen))
1337     {
1338         log_verbose("Matches!\n");
1339         return true;
1340     }
1341     else
1342     {
1343         log_err("String does not match. %s\n", gNuConvTestName);
1344         printUSeqErr(source, sourceLen);
1345         log_info("\nGot:");
1346         printSeqErr((const unsigned char *)junkout, expectLen);
1347         log_info("\nExpected:");
1348         printSeqErr((const unsigned char *)expect, expectLen);
1349 
1350         return false;
1351     }
1352 }
1353 
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,UBool testReset)1354 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1355                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1356 {
1357     UErrorCode status = U_ZERO_ERROR;
1358     UConverter *conv = 0;
1359     UChar    junkout[MAX_LENGTH]; /* FIX */
1360     int32_t    junokout[MAX_LENGTH]; /* FIX */
1361     const char *src;
1362     const char *realSourceEnd;
1363     const char *srcLimit;
1364     UChar *p;
1365     UChar *targ;
1366     UChar *end;
1367     int32_t *offs;
1368     int i;
1369     UBool   checkOffsets = true;
1370     int32_t   realBufferSize;
1371     UChar *realBufferEnd;
1372     UBool doFlush;
1373 
1374     UConverterToUCallback oldAction = NULL;
1375     const void* oldContext = NULL;
1376 
1377 
1378     for(i=0;i<MAX_LENGTH;i++)
1379         junkout[i] = 0xFFFE;
1380 
1381     for(i=0;i<MAX_LENGTH;i++)
1382         junokout[i] = -1;
1383 
1384     setNuConvTestName(codepage, "TO");
1385 
1386     log_verbose("\n=========  %s\n", gNuConvTestName);
1387 
1388     conv = ucnv_open(codepage, &status);
1389     if(U_FAILURE(status))
1390     {
1391         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1392         return true;
1393     }
1394 
1395     log_verbose("Converter opened..\n");
1396      /*----setting the callback routine----*/
1397     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1398     if (U_FAILURE(status)) {
1399         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1400     }
1401     /*-------------------------------------*/
1402 
1403     src = (const char *)source;
1404     targ = junkout;
1405     offs = junokout;
1406 
1407     realBufferSize = UPRV_LENGTHOF(junkout);
1408     realBufferEnd = junkout + realBufferSize;
1409     realSourceEnd = src + sourcelen;
1410 
1411     if ( gOutBufferSize != realBufferSize )
1412       checkOffsets = false;
1413 
1414     if( gInBufferSize != MAX_LENGTH )
1415       checkOffsets = false;
1416 
1417     do
1418       {
1419         end = nct_min( targ + gOutBufferSize, realBufferEnd);
1420         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1421 
1422         if(targ == realBufferEnd)
1423         {
1424             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1425             return false;
1426         }
1427         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1428 
1429         /* oldTarg = targ; */
1430 
1431         status = U_ZERO_ERROR;
1432         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : false);
1433 
1434         ucnv_toUnicode (conv,
1435                 &targ,
1436                 end,
1437                 (const char **)&src,
1438                 (const char *)srcLimit,
1439                 offs,
1440                 doFlush, /* flush if we're at the end of the source data */
1441                 &status);
1442         if(testReset)
1443             ucnv_resetFromUnicode(conv);
1444         if(gInBufferSize ==999 && gOutBufferSize==999)
1445             ucnv_resetToUnicode(conv);
1446         /*        offs += (targ-oldTarg); */
1447 
1448       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1449 
1450     if(U_FAILURE(status))
1451     {
1452         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1453         return false;
1454     }
1455 
1456     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1457         sourcelen, targ-junkout);
1458     if(getTestOption(VERBOSITY_OPTION))
1459     {
1460         char junk[999];
1461         char offset_str[999];
1462 
1463         UChar *ptr;
1464 
1465         junk[0] = 0;
1466         offset_str[0] = 0;
1467 
1468         for(ptr = junkout;ptr<targ;ptr++)
1469         {
1470             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1471             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1472         }
1473 
1474         log_verbose(junk);
1475 
1476         if ( checkOffsets )
1477           {
1478             log_verbose("\nOffsets:");
1479             log_verbose(offset_str);
1480           }
1481         log_verbose("\n");
1482     }
1483     ucnv_close(conv);
1484 
1485     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1486 
1487     if (checkOffsets && (expectOffsets != 0))
1488     {
1489         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1490 
1491             log_err("did not get the expected offsets. %s",gNuConvTestName);
1492             for(p=junkout;p<targ;p++)
1493                 log_err("%d, ", junokout[p-junkout]);
1494             log_err("\nExpected: ");
1495             for(i=0; i<(targ-junkout); i++)
1496                 log_err("%d,", expectOffsets[i]);
1497             log_err("");
1498             for(i=0; i<(targ-junkout); i++)
1499                 log_err("%X,", junkout[i]);
1500             log_err("");
1501             for(i=0; i<(src-(const char *)source); i++)
1502                 log_err("%X,", (unsigned char)source[i]);
1503         }
1504     }
1505 
1506     if(!memcmp(junkout, expect, expectlen*2))
1507     {
1508         log_verbose("Matches!\n");
1509         return true;
1510     }
1511     else
1512     {
1513         log_err("String does not match. %s\n", gNuConvTestName);
1514         log_verbose("String does not match. %s\n", gNuConvTestName);
1515         log_info("\nGot:");
1516         printUSeq(junkout, expectlen);
1517         log_info("\nExpected:");
1518         printUSeq(expect, expectlen);
1519         return false;
1520     }
1521 }
1522 
1523 
TestResetBehaviour(void)1524 static void TestResetBehaviour(void){
1525 #if !UCONFIG_NO_LEGACY_CONVERSION
1526     log_verbose("Testing Reset for DBCS and MBCS\n");
1527     {
1528         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1529         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1530         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1531 
1532 
1533         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1534         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1535         static const int32_t offsets1[] =  { 0,2,4,6};
1536 
1537         /*DBCS*/
1538         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1539                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1540             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1541         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1542                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1543             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1544 
1545         if(!testConvertToU(expected1, sizeof(expected1),
1546                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1547                 offsets1, true))
1548            log_err("ibm-1363 -> did not match.\n");
1549         /*MBCS*/
1550         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1551                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1552             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1553         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1554                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1555             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1556 
1557         if(!testConvertToU(expected1, sizeof(expected1),
1558                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1559                 offsets1, true))
1560            log_err("ibm-1363 -> did not match.\n");
1561 
1562     }
1563 
1564     log_verbose("Testing Reset for ISO-2022-jp\n");
1565     {
1566         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1567 
1568         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1569                                     0x31,0x1A, 0x32};
1570 
1571 
1572         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1573 
1574 
1575         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1576         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1577                                     0x31,0x1A, 0x32};
1578         static const int32_t offsets1[] =  { 3,5,10,11,12};
1579 
1580         /*iso-2022-jp*/
1581         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1582                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1583             log_err("u-> not match.\n");
1584         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1585                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1586             log_err("u->  not match.\n");
1587 
1588         if(!testConvertToU(expected1, sizeof(expected1),
1589                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1590                 offsets1, true))
1591            log_err("iso-2022-jp -> did not match.\n");
1592 
1593     }
1594 
1595     log_verbose("Testing Reset for ISO-2022-cn\n");
1596     {
1597         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1598 
1599         static const uint8_t expected[] = {
1600                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1601                                     0x36, 0x21,
1602                                     0x0f, 0x31,
1603                                     0x1A,
1604                                     0x32
1605                                     };
1606 
1607 
1608         static const int32_t offsets[] = {
1609                                     0,    0,    0,    0,    0,    0,    0,
1610                                     1,    1,
1611                                     2,    2,
1612                                     3,
1613                                     5,  };
1614 
1615         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1616         static const uint8_t expected1[] = {
1617                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1618                                     0x36, 0x21,
1619                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
1620                                     0x0f, 0x1A,
1621                                     0x32
1622                                     };
1623         static const int32_t offsets1[] =  { 5,7,13,16,17};
1624 
1625         /*iso-2022-CN*/
1626         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1627                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1628             log_err("u-> not match.\n");
1629         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1630                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1631             log_err("u-> not match.\n");
1632 
1633         if(!testConvertToU(expected1, sizeof(expected1),
1634                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1635                 offsets1, true))
1636            log_err("iso-2022-cn -> did not match.\n");
1637     }
1638 
1639         log_verbose("Testing Reset for ISO-2022-kr\n");
1640     {
1641         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1642 
1643         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1644                                     0x0E, 0x6C, 0x69,
1645                                     0x0f, 0x1A,
1646                                     0x0e, 0x6F, 0x4B,
1647                                     0x0F, 0x31,
1648                                     0x1A,
1649                                     0x32 };
1650 
1651         static const int32_t offsets[] = {-1, -1, -1, -1,
1652                               0, 0, 0,
1653                               1, 1,
1654                               3, 3, 3,
1655                               4, 4,
1656                               5,
1657                               7,
1658                             };
1659         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1660 
1661         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1662                                     0x0E, 0x6C, 0x69,
1663                                     0x0f, 0x41,
1664                                     0x0e, 0x6F, 0x4B,
1665                                     0x0F, 0x31,
1666                                     0x42,
1667                                     0x32 };
1668 
1669         static const int32_t offsets1[] = {
1670                               5, 8, 10,
1671                               13, 14, 15
1672 
1673                             };
1674         /*iso-2022-kr*/
1675         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1676                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1677             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1678         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1679                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1680             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1681         if(!testConvertToU(expected1, sizeof(expected1),
1682                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1683                 offsets1, true))
1684            log_err("iso-2022-kr -> did not match.\n");
1685     }
1686 
1687         log_verbose("Testing Reset for HZ\n");
1688     {
1689         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1690 
1691         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1692                                     0x7E, 0x7D, 0x1A,
1693                                     0x7E, 0x7B, 0x36, 0x21,
1694                                     0x7E, 0x7D, 0x31,
1695                                     0x1A,
1696                                     0x32 };
1697 
1698 
1699         static const int32_t offsets[] = {0,0,0,0,
1700                              1,1,1,
1701                              3,3,3,3,
1702                              4,4,4,
1703                              5,
1704                              7,};
1705         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1706 
1707         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1708                                     0x7E, 0x7D, 0x35,
1709                                     0x7E, 0x7B, 0x36, 0x21,
1710                                     0x7E, 0x7D, 0x31,
1711                                     0x41,
1712                                     0x32 };
1713 
1714 
1715         static const int32_t offsets1[] = {2,6,9,13,14,15
1716                             };
1717 
1718         /*hz*/
1719         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1720                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1721             log_err("u->  not match.\n");
1722         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1723                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1724             log_err("u->  not match.\n");
1725         if(!testConvertToU(expected1, sizeof(expected1),
1726                 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1727                 offsets1, true))
1728            log_err("hz -> did not match.\n");
1729     }
1730 #endif
1731 
1732     /*UTF-8*/
1733      log_verbose("Testing for UTF8\n");
1734     {
1735         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1736         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1737                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1738                            0x04, 0x06 };
1739         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1740             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1741 
1742 
1743         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1744         /*UTF-8*/
1745         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1746             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1747             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1748         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1749             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1750             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1751         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1752             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1753             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1754         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1755             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1756             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1757         if(!testConvertToU(expected, sizeof(expected),
1758             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, true))
1759             log_err("UTF8 -> did not match.\n");
1760         if(!testConvertToU(expected, sizeof(expected),
1761             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, true))
1762             log_err("UTF8 -> did not match.\n");
1763         if(!testConvertToU(expected, sizeof(expected),
1764             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, true))
1765             log_err("UTF8 -> did not match.\n");
1766         if(!testConvertToU(expected, sizeof(expected),
1767             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, true))
1768             log_err("UTF8 -> did not match.\n");
1769 
1770     }
1771 
1772 }
1773 
1774 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
1775 static void
doTestTruncated(const char * cnvName,const uint8_t * bytes,int32_t length)1776 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1777     UConverter *cnv;
1778 
1779     UChar buffer[2];
1780     UChar *target, *targetLimit;
1781     const char *source, *sourceLimit;
1782 
1783     UErrorCode errorCode;
1784 
1785     errorCode=U_ZERO_ERROR;
1786     cnv=ucnv_open(cnvName, &errorCode);
1787     if(U_FAILURE(errorCode)) {
1788         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1789         return;
1790     }
1791     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1792     if(U_FAILURE(errorCode)) {
1793         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
1794                     cnvName, u_errorName(errorCode));
1795         ucnv_close(cnv);
1796         return;
1797     }
1798 
1799     source=(const char *)bytes;
1800     sourceLimit=source+length;
1801     target=buffer;
1802     targetLimit=buffer+UPRV_LENGTHOF(buffer);
1803 
1804     /* 1. input bytes with flush=false, then input nothing with flush=true */
1805     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, false, &errorCode);
1806     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1807         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=false: %s, input left %d, output %d\n",
1808                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1809     }
1810 
1811     errorCode=U_ZERO_ERROR;
1812     source=sourceLimit;
1813     target=buffer;
1814     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, true, &errorCode);
1815     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1816         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=true: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1817                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
1818     }
1819 
1820     /* 2. input bytes with flush=true */
1821     ucnv_resetToUnicode(cnv);
1822 
1823     errorCode=U_ZERO_ERROR;
1824     source=(const char *)bytes;
1825     target=buffer;
1826     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, true, &errorCode);
1827     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
1828         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=true: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1829                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1830     }
1831 
1832 
1833     ucnv_close(cnv);
1834 }
1835 
1836 static void
TestTruncated()1837 TestTruncated() {
1838     static const struct {
1839         const char *cnvName;
1840         uint8_t bytes[8]; /* partial input bytes resulting in no output */
1841         int32_t length;
1842     } testCases[]={
1843         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
1844         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
1845         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1846         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1847 
1848         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
1849         { "UTF-8",      { 0xd1 }, 1 },
1850 
1851         { "UTF-16BE",   { 0x4e }, 1 },
1852         { "UTF-16LE",   { 0x4e }, 1 },
1853         { "UTF-16",     { 0x4e }, 1 },
1854         { "UTF-16",     { 0xff }, 1 },
1855         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
1856 
1857         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
1858         { "UTF-32LE",   { 0x4e }, 1 },
1859         { "UTF-32",     { 0, 0, 0x4e }, 3 },
1860         { "UTF-32",     { 0xff }, 1 },
1861         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
1862         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1863 
1864 #if !UCONFIG_NO_LEGACY_CONVERSION
1865         { "BOCU-1",     { 0xd5 }, 1 },
1866 
1867         { "Shift-JIS",  { 0xe0 }, 1 },
1868 
1869         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1870 #else
1871         { "BOCU-1",     { 0xd5 }, 1 ,}
1872 #endif
1873     };
1874     int32_t i;
1875 
1876     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
1877         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1878     }
1879 }
1880 
1881 typedef struct NameRange {
1882     const char *name;
1883     UChar32 start, end, start2, end2, notStart, notEnd;
1884 } NameRange;
1885 
1886 static void
TestUnicodeSet()1887 TestUnicodeSet() {
1888     UErrorCode errorCode;
1889     UConverter *cnv;
1890     USet *set;
1891     const char *name;
1892     int32_t i, count;
1893 
1894     static const char *const completeSetNames[]={
1895         "UTF-7",
1896         "UTF-8",
1897         "UTF-16",
1898         "UTF-16BE",
1899         "UTF-16LE",
1900         "UTF-32",
1901         "UTF-32BE",
1902         "UTF-32LE",
1903         "SCSU",
1904         "BOCU-1",
1905         "CESU-8",
1906 #if !UCONFIG_NO_LEGACY_CONVERSION
1907         "gb18030",
1908 #endif
1909         "IMAP-mailbox-name"
1910     };
1911 #if !UCONFIG_NO_LEGACY_CONVERSION
1912     static const char *const lmbcsNames[]={
1913         "LMBCS-1",
1914         "LMBCS-2",
1915         "LMBCS-3",
1916         "LMBCS-4",
1917         "LMBCS-5",
1918         "LMBCS-6",
1919         "LMBCS-8",
1920         "LMBCS-11",
1921         "LMBCS-16",
1922         "LMBCS-17",
1923         "LMBCS-18",
1924         "LMBCS-19"
1925     };
1926 #endif
1927 
1928     static const NameRange nameRanges[]={
1929         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1930 #if !UCONFIG_NO_LEGACY_CONVERSION
1931         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1932 #endif
1933         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1934 #if !UCONFIG_NO_LEGACY_CONVERSION
1935         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1936         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1937         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
1938         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1939 #else
1940         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
1941 #endif
1942     };
1943 
1944     /* open an empty set */
1945     set=uset_open(1, 0);
1946 
1947     count=ucnv_countAvailable();
1948     for(i=0; i<count; ++i) {
1949         errorCode=U_ZERO_ERROR;
1950         name=ucnv_getAvailableName(i);
1951         cnv=ucnv_open(name, &errorCode);
1952         if(U_FAILURE(errorCode)) {
1953             log_data_err("error: unable to open converter %s - %s\n",
1954                     name, u_errorName(errorCode));
1955             continue;
1956         }
1957 
1958         uset_clear(set);
1959         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1960         if(U_FAILURE(errorCode)) {
1961             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1962                     name, u_errorName(errorCode));
1963         } else if(uset_size(set)==0) {
1964             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1965         }
1966 
1967         ucnv_close(cnv);
1968     }
1969 
1970     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1971     for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) {
1972         errorCode=U_ZERO_ERROR;
1973         name=completeSetNames[i];
1974         cnv=ucnv_open(name, &errorCode);
1975         if(U_FAILURE(errorCode)) {
1976             log_data_err("error: unable to open converter %s - %s\n",
1977                     name, u_errorName(errorCode));
1978             continue;
1979         }
1980 
1981         uset_clear(set);
1982         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1983         if(U_FAILURE(errorCode)) {
1984             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1985                     name, u_errorName(errorCode));
1986         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
1987             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
1988         }
1989 
1990         ucnv_close(cnv);
1991     }
1992 
1993 #if !UCONFIG_NO_LEGACY_CONVERSION
1994     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
1995     for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) {
1996         errorCode=U_ZERO_ERROR;
1997         name=lmbcsNames[i];
1998         cnv=ucnv_open(name, &errorCode);
1999         if(U_FAILURE(errorCode)) {
2000             log_data_err("error: unable to open converter %s - %s\n",
2001                     name, u_errorName(errorCode));
2002             continue;
2003         }
2004 
2005         uset_clear(set);
2006         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2007         if(U_FAILURE(errorCode)) {
2008             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2009                     name, u_errorName(errorCode));
2010         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
2011             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
2012         }
2013 
2014         ucnv_close(cnv);
2015     }
2016 #endif
2017 
2018     /* test specific sets */
2019     for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) {
2020         errorCode=U_ZERO_ERROR;
2021         name=nameRanges[i].name;
2022         cnv=ucnv_open(name, &errorCode);
2023         if(U_FAILURE(errorCode)) {
2024             log_data_err("error: unable to open converter %s - %s\n",
2025                          name, u_errorName(errorCode));
2026             continue;
2027         }
2028 
2029         uset_clear(set);
2030         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2031         if(U_FAILURE(errorCode)) {
2032             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2033                     name, u_errorName(errorCode));
2034         } else if(
2035             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
2036             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
2037         ) {
2038             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
2039         } else if(nameRanges[i].notStart>=0) {
2040             /* simulate containsAny() with the C API */
2041             uset_complement(set);
2042             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
2043                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
2044             }
2045         }
2046 
2047         ucnv_close(cnv);
2048     }
2049 
2050     errorCode = U_ZERO_ERROR;
2051     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
2052     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2053         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2054     }
2055     errorCode = U_PARSE_ERROR;
2056     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
2057     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
2058     if (errorCode != U_PARSE_ERROR) {
2059         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2060     }
2061 
2062     uset_close(set);
2063 }
2064