• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ncnvtst.c
11 *
12 * Modification History:
13 *        Name                     Description
14 *   Madhu Katragadda              7/7/2000        Converter Tests for extended code coverage
15 ******************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include "unicode/uloc.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/uset.h"
26 #include "unicode/utf8.h"
27 #include "unicode/utf16.h"
28 #include "cintltst.h"
29 #include "cmemory.h"
30 
31 #define MAX_LENGTH 999
32 
33 #define UNICODE_LIMIT 0x10FFFF
34 #define SURROGATE_HIGH_START    0xD800
35 #define SURROGATE_LOW_END       0xDFFF
36 
37 static int32_t  gInBufferSize = 0;
38 static int32_t  gOutBufferSize = 0;
39 static char     gNuConvTestName[1024];
40 
41 #define nct_min(x,y)  ((x<y) ? x : y)
42 
43 static void printSeq(const unsigned char* a, int len);
44 static void printSeqErr(const unsigned char* a, int len);
45 static void printUSeq(const UChar* a, int len);
46 static void printUSeqErr(const UChar* a, int len);
47 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
48                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
49 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
50                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus);
51 
52 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
53                 const char *codepage, UConverterFromUCallback callback, const int32_t *expectOffsets, UBool testReset);
54 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
55                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset);
56 
setNuConvTestName(const char * codepage,const char * direction)57 static void setNuConvTestName(const char *codepage, const char *direction)
58 {
59     snprintf(gNuConvTestName, sizeof(gNuConvTestName),  "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
60         codepage,
61         direction,
62         (int)gInBufferSize,
63         (int)gOutBufferSize);
64 }
65 
66 
67 static void TestSurrogateBehaviour(void);
68 static void TestErrorBehaviour(void);
69 
70 #if !UCONFIG_NO_LEGACY_CONVERSION
71 static void TestToUnicodeErrorBehaviour(void);
72 static void TestGetNextErrorBehaviour(void);
73 #endif
74 
75 static void TestRegressionUTF8(void);
76 static void TestRegressionUTF32(void);
77 static void TestAvailableConverters(void);
78 static void TestFlushInternalBuffer(void);  /*for improved code coverage in ucnv_cnv.c*/
79 static void TestResetBehaviour(void);
80 static void TestTruncated(void);
81 static void TestUnicodeSet(void);
82 
83 static void TestWithBufferSize(int32_t osize, int32_t isize);
84 
85 
printSeq(const unsigned char * a,int len)86 static void printSeq(const unsigned char* a, int len)
87 {
88     int i=0;
89     log_verbose("\n{");
90     while (i<len)
91         log_verbose("0x%02X ", a[i++]);
92     log_verbose("}\n");
93 }
94 
printUSeq(const UChar * a,int len)95 static void printUSeq(const UChar* a, int len)
96 {
97     int i=0;
98     log_verbose("\n{");
99     while (i<len)
100         log_verbose("%0x04X ", a[i++]);
101     log_verbose("}\n");
102 }
103 
printSeqErr(const unsigned char * a,int len)104 static void printSeqErr(const unsigned char* a, int len)
105 {
106     int i=0;
107     fprintf(stderr, "\n{");
108     while (i<len)  fprintf(stderr, "0x%02X ", a[i++]);
109     fprintf(stderr, "}\n");
110 }
111 
printUSeqErr(const UChar * a,int len)112 static void printUSeqErr(const UChar* a, int len)
113 {
114     int i=0;
115     fprintf(stderr, "\n{");
116     while (i<len)
117         fprintf(stderr, "0x%04X ", a[i++]);
118     fprintf(stderr,"}\n");
119 }
120 
121 void addExtraTests(TestNode** root);
122 
addExtraTests(TestNode ** root)123 void addExtraTests(TestNode** root)
124 {
125      addTest(root, &TestSurrogateBehaviour,         "tsconv/ncnvtst/TestSurrogateBehaviour");
126      addTest(root, &TestErrorBehaviour,             "tsconv/ncnvtst/TestErrorBehaviour");
127 
128 #if !UCONFIG_NO_LEGACY_CONVERSION
129      addTest(root, &TestToUnicodeErrorBehaviour,    "tsconv/ncnvtst/ToUnicodeErrorBehaviour");
130      addTest(root, &TestGetNextErrorBehaviour,      "tsconv/ncnvtst/TestGetNextErrorBehaviour");
131 #endif
132 
133      addTest(root, &TestAvailableConverters,        "tsconv/ncnvtst/TestAvailableConverters");
134      addTest(root, &TestFlushInternalBuffer,        "tsconv/ncnvtst/TestFlushInternalBuffer");
135      addTest(root, &TestResetBehaviour,             "tsconv/ncnvtst/TestResetBehaviour");
136      addTest(root, &TestRegressionUTF8,             "tsconv/ncnvtst/TestRegressionUTF8");
137      addTest(root, &TestRegressionUTF32,            "tsconv/ncnvtst/TestRegressionUTF32");
138      addTest(root, &TestTruncated,                  "tsconv/ncnvtst/TestTruncated");
139      addTest(root, &TestUnicodeSet,                 "tsconv/ncnvtst/TestUnicodeSet");
140 }
141 
142 /*test surrogate behaviour*/
TestSurrogateBehaviour(void)143 static void TestSurrogateBehaviour(void){
144     log_verbose("Testing for SBCS and LATIN_1\n");
145     {
146         UChar sampleText[] = {0x0031, 0xd801, 0xdc01, 0x0032};
147         const uint8_t expected[] = {0x31, 0x1a, 0x32};
148 
149 #if !UCONFIG_NO_LEGACY_CONVERSION
150         /*SBCS*/
151         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
152                 expected, sizeof(expected), "ibm-920", 0 , true, U_ZERO_ERROR))
153             log_err("u-> ibm-920 [UCNV_SBCS] not match.\n");
154 #endif
155 
156         /*LATIN_1*/
157         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
158                 expected, sizeof(expected), "LATIN_1", 0, true, U_ZERO_ERROR ))
159             log_err("u-> LATIN_1 not match.\n");
160 
161     }
162 
163 #if !UCONFIG_NO_LEGACY_CONVERSION
164     log_verbose("Testing for DBCS and MBCS\n");
165     {
166         UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
167         const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
168         int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
169 
170         /*DBCS*/
171         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
172                 expected, sizeof(expected), "ibm-1363", 0 , true, U_ZERO_ERROR))
173             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
174         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
175                 expected, sizeof(expected), "ibm-1363", offsets , true, U_ZERO_ERROR))
176             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
177         /*MBCS*/
178         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
179                 expected, sizeof(expected), "ibm-1363", 0 , true, U_ZERO_ERROR))
180             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
181         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
182                 expected, sizeof(expected), "ibm-1363", offsets, true, U_ZERO_ERROR))
183             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
184     }
185 
186     log_verbose("Testing for ISO-2022-jp\n");
187     {
188         UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
189 
190         const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
191                                     0x31,0x1A, 0x32};
192 
193 
194         int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
195 
196         /*iso-2022-jp*/
197         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
198                 expected, sizeof(expected), "iso-2022-jp", 0 , true, U_ZERO_ERROR))
199             log_err("u-> not match.\n");
200         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
201                 expected, sizeof(expected), "iso-2022-jp", offsets , true, U_ZERO_ERROR))
202             log_err("u->  not match.\n");
203     }
204 
205    /* BEGIN android-removed */
206    /* To save space, Android does not build full ISO-2022-CN tables.
207       We skip the tests for ISO-2022-CN. */
208    /*
209     log_verbose("Testing for ISO-2022-cn\n");
210     {
211         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
212 
213         static const uint8_t expected[] = {
214                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
215                                     0x36, 0x21,
216                                     0x0F, 0x31,
217                                     0x1A,
218                                     0x32
219                                     };
220 
221 
222 
223         static const int32_t offsets[] = {
224                                     0,    0,    0,    0,    0,    0,    0,
225                                     1,    1,
226                                     2,    2,
227                                     3,
228                                     5,  };
229 
230         // iso-2022-CN  android-change
231         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
232                 expected, sizeof(expected), "iso-2022-cn", 0 , true, U_ZERO_ERROR))
233             log_err("u-> not match.\n");
234         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
235                 expected, sizeof(expected), "iso-2022-cn", offsets , true, U_ZERO_ERROR))
236             log_err("u-> not match.\n");
237     }
238     */
239     /* END android-removed */
240 
241         log_verbose("Testing for ISO-2022-kr\n");
242     {
243         static const UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
244 
245         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
246                                     0x0E, 0x6C, 0x69,
247                                     0x0f, 0x1A,
248                                     0x0e, 0x6F, 0x4B,
249                                     0x0F, 0x31,
250                                     0x1A,
251                                     0x32 };
252 
253         static const int32_t offsets[] = {-1, -1, -1, -1,
254                               0, 0, 0,
255                               1, 1,
256                               3, 3, 3,
257                               4, 4,
258                               5,
259                               7,
260                             };
261 
262         /*iso-2022-kr*/
263         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
264                 expected, sizeof(expected), "iso-2022-kr", 0 , true, U_ZERO_ERROR))
265             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
266         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
267                 expected, sizeof(expected), "iso-2022-kr", offsets , true, U_ZERO_ERROR))
268             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
269     }
270 
271         log_verbose("Testing for HZ\n");
272     {
273         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
274 
275         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
276                                     0x7E, 0x7D, 0x1A,
277                                     0x7E, 0x7B, 0x36, 0x21,
278                                     0x7E, 0x7D, 0x31,
279                                     0x1A,
280                                     0x32 };
281 
282 
283         static const int32_t offsets[] = {0,0,0,0,
284                              1,1,1,
285                              3,3,3,3,
286                              4,4,4,
287                              5,
288                              7,};
289 
290         /*hz*/
291         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
292                 expected, sizeof(expected), "HZ", 0 , true, U_ZERO_ERROR))
293             log_err("u-> HZ not match.\n");
294         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
295                 expected, sizeof(expected), "HZ", offsets , true, U_ZERO_ERROR))
296             log_err("u-> HZ not match.\n");
297     }
298 #endif
299 
300     /*UTF-8*/
301      log_verbose("Testing for UTF8\n");
302     {
303         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
304         static const int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
305                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
306                            0x04, 0x06 };
307         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
308             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
309 
310 
311         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
312         /*UTF-8*/
313         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
314             expected, sizeof(expected), "UTF8", offsets, true, U_ZERO_ERROR ))
315             log_err("u-> UTF8 with offsets and flush true did not match.\n");
316         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
317             expected, sizeof(expected), "UTF8", 0, true, U_ZERO_ERROR ))
318             log_err("u-> UTF8 with offsets and flush true did not match.\n");
319         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
320             expected, sizeof(expected), "UTF8", offsets, false, U_ZERO_ERROR ))
321             log_err("u-> UTF8 with offsets and flush true did not match.\n");
322         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
323             expected, sizeof(expected), "UTF8", 0, false, U_ZERO_ERROR ))
324             log_err("u-> UTF8 with offsets and flush true did not match.\n");
325 
326         if(!convertToU(expected, sizeof(expected),
327             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, true, U_ZERO_ERROR ))
328             log_err("UTF8 -> u did not match.\n");
329         if(!convertToU(expected, sizeof(expected),
330             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", 0, false, U_ZERO_ERROR ))
331             log_err("UTF8 -> u did not match.\n");
332         if(!convertToU(expected, sizeof(expected),
333             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, true, U_ZERO_ERROR ))
334             log_err("UTF8 ->u  did not match.\n");
335         if(!convertToU(expected, sizeof(expected),
336             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", fromOffsets, false, U_ZERO_ERROR ))
337             log_err("UTF8 -> u did not match.\n");
338 
339     }
340 }
341 
342 /*test various error behaviours*/
TestErrorBehaviour(void)343 static void TestErrorBehaviour(void){
344     log_verbose("Testing for SBCS and LATIN_1\n");
345     {
346         static const UChar    sampleText[] =   { 0x0031, 0xd801};
347         static const UChar    sampleText2[] =   { 0x0031, 0xd801, 0x0032};
348         static const uint8_t expected0[] =          { 0x31};
349         static const uint8_t expected[] =          { 0x31, 0x1a};
350         static const uint8_t expected2[] =         { 0x31, 0x1a, 0x32};
351 
352 #if !UCONFIG_NO_LEGACY_CONVERSION
353         /*SBCS*/
354         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
355                 expected, sizeof(expected), "ibm-920", 0, true, U_ZERO_ERROR))
356             log_err("u-> ibm-920 [UCNV_SBCS] \n");
357         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
358                 expected0, sizeof(expected0), "ibm-920", 0, false, U_ZERO_ERROR))
359             log_err("u-> ibm-920 [UCNV_SBCS] \n");
360         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
361                 expected2, sizeof(expected2), "ibm-920", 0, true, U_ZERO_ERROR))
362             log_err("u-> ibm-920 [UCNV_SBCS] did not match\n");
363 #endif
364 
365         /*LATIN_1*/
366         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
367                 expected, sizeof(expected), "LATIN_1", 0, true, U_ZERO_ERROR))
368             log_err("u-> LATIN_1 is supposed to fail\n");
369         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
370                 expected0, sizeof(expected0), "LATIN_1", 0, false, U_ZERO_ERROR))
371             log_err("u-> LATIN_1 is supposed to fail\n");
372 
373         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
374                 expected2, sizeof(expected2), "LATIN_1", 0, true, U_ZERO_ERROR))
375             log_err("u-> LATIN_1 did not match\n");
376     }
377 
378 #if !UCONFIG_NO_LEGACY_CONVERSION
379     log_verbose("Testing for DBCS and MBCS\n");
380     {
381         static const UChar    sampleText[]    = { 0x00a1, 0xd801};
382         static const uint8_t expected[] = { 0xa2, 0xae};
383         static const int32_t offsets[]        = { 0x00, 0x00};
384         static const uint8_t expectedSUB[] = { 0xa2, 0xae, 0xa1, 0xe0};
385         static const int32_t offsetsSUB[]        = { 0x00, 0x00, 0x01, 0x01};
386 
387         static const UChar       sampleText2[] = { 0x00a1, 0xd801, 0x00a4};
388         static const uint8_t expected2[] = { 0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
389         static const int32_t offsets2[]        = { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02};
390 
391         static const UChar       sampleText3MBCS[] = { 0x0001, 0x00a4, 0xdc01};
392         static const uint8_t expected3MBCS[] = { 0x01, 0xa2, 0xb4, 0xa1, 0xe0};
393         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x01, 0x02, 0x02};
394 
395         static const UChar       sampleText4MBCS[] = { 0x0061, 0xFFE4, 0xdc01};
396         static const uint8_t expected4MBCS[] = { 0x61, 0x8f, 0xa2, 0xc3, 0xf4, 0xfe};
397         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01, 0x01, 0x02, 0x02 };
398 
399         /*DBCS*/
400         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
401                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, true, U_ZERO_ERROR))
402             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
403         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
404                 expected, sizeof(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING))
405             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
406 
407         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
408                 expectedSUB, sizeof(expectedSUB), "ibm-1363", offsetsSUB, true, U_ZERO_ERROR))
409             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
410         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
411                 expected, sizeof(expected), "ibm-1363", offsets, false, U_AMBIGUOUS_ALIAS_WARNING))
412             log_err("u-> ibm-1363 [UCNV_DBCS portion] is supposed to fail\n");
413 
414 
415         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
416                 expected2, sizeof(expected2), "ibm-1363", 0, true, U_ZERO_ERROR))
417             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
418         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
419                 expected2, sizeof(expected2), "ibm-1363", offsets2, true, U_ZERO_ERROR))
420             log_err("u-> ibm-1363 [UCNV_DBCS portion] did not match \n");
421 
422         /*MBCS*/
423         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
424                 expectedSUB, sizeof(expectedSUB), "ibm-1363", 0, true, U_ZERO_ERROR))
425             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
426         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
427                 expected, sizeof(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING))
428             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
429 
430         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
431                 expected2, sizeof(expected2), "ibm-1363", 0, true, U_ZERO_ERROR))
432             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
433         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
434                 expected2, sizeof(expected2), "ibm-1363", 0, false, U_ZERO_ERROR))
435             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
436         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
437                 expected2, sizeof(expected2), "ibm-1363", offsets2, false, U_ZERO_ERROR))
438             log_err("u-> ibm-1363 [UCNV_DBCS] did not match\n");
439 
440         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
441                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, true, U_ZERO_ERROR))
442             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
443         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
444                 expected3MBCS, sizeof(expected3MBCS), "ibm-1363", offsets3MBCS, false, U_ZERO_ERROR))
445             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
446 
447         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
448                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, true, U_ZERO_ERROR))
449             log_err("u-> euc-jp [UCNV_MBCS] \n");
450         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
451                 expected4MBCS, sizeof(expected4MBCS), "IBM-eucJP", offsets4MBCS, false, U_ZERO_ERROR))
452             log_err("u-> euc-jp [UCNV_MBCS] \n");
453     }
454 
455     /*iso-2022-jp*/
456     log_verbose("Testing for iso-2022-jp\n");
457     {
458         static const UChar    sampleText[]    = { 0x0031, 0xd801};
459         static const uint8_t expected[] = {  0x31};
460         static const uint8_t expectedSUB[] = {  0x31, 0x1a};
461         static const int32_t offsets[]        = { 0x00, 1};
462 
463         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
464         static const uint8_t expected2[] = {  0x31,0x1A,0x32};
465         static const int32_t offsets2[]        = { 0x00,0x01,0x02};
466 
467         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
468         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x42, 0x30, 0x6c,0x1b,0x28,0x42,0x1a};
469         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01,0x02,0x02,0x02,0x02 };
470         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
471                 expectedSUB, sizeof(expectedSUB), "iso-2022-jp", offsets, true, U_ZERO_ERROR))
472             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
473         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
474                 expected, sizeof(expected), "iso-2022-jp", offsets, false, U_AMBIGUOUS_ALIAS_WARNING))
475             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
476 
477         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
478                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, true, U_ZERO_ERROR))
479             log_err("u->iso-2022-jp[UCNV_DBCS] did not match\n");
480         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
481                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, false, U_ZERO_ERROR))
482             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
483         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
484                 expected2, sizeof(expected2), "iso-2022-jp", offsets2, false, U_ZERO_ERROR))
485             log_err("u-> iso-2022-jp [UCNV_DBCS] did not match\n");
486 
487         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
488                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, true, U_ZERO_ERROR))
489             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
490         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
491                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-jp", offsets4MBCS, false, U_ZERO_ERROR))
492             log_err("u-> iso-2022-jp [UCNV_MBCS] \n");
493     }
494 
495     /* BEGIN android-removed */
496     /* To save space, Android does not build full ISO-2022-CN tables.
497        We skip the tests for ISO-2022-CN. */
498     /*iso-2022-cn*/
499     /*
500     log_verbose("Testing for iso-2022-cn\n");
501     {
502         static const UChar    sampleText[]    = { 0x0031, 0xd801};
503         static const uint8_t expected[] = { 0x31};
504         static const uint8_t expectedSUB[] = { 0x31, 0x1A};
505         static const int32_t offsets[]        = { 0x00, 1};
506 
507         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
508         static const uint8_t expected2[] = { 0x31, 0x1A,0x32};
509         static const int32_t offsets2[]        = { 0x00, 0x01,0x02};
510 
511         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
512         static const uint8_t expected3MBCS[] = {0x51, 0x50, 0x1A};
513         static const int32_t offsets3MBCS[]        = { 0x00, 0x01, 0x02 };
514 
515         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
516         static const uint8_t expected4MBCS[] = { 0x61, 0x1b, 0x24, 0x29, 0x41, 0x0e, 0x52, 0x3b, 0x0f, 0x1a };
517         static const int32_t offsets4MBCS[]        = { 0x00, 0x01, 0x01 ,0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02 };
518         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
519                 expectedSUB, sizeof(expectedSUB), "iso-2022-cn", offsets, true, U_ZERO_ERROR))
520             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
521         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
522                 expected, sizeof(expected), "iso-2022-cn", offsets, false, U_ZERO_ERROR))
523             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
524 
525         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
526                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, true, U_ZERO_ERROR))
527             log_err("u->iso-2022-cn[UCNV_DBCS] did not match\n");
528         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
529                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, false, U_ZERO_ERROR))
530             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
531         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
532                 expected2, sizeof(expected2), "iso-2022-cn", offsets2, false, U_ZERO_ERROR))
533             log_err("u-> iso-2022-cn [UCNV_DBCS] did not match\n");
534 
535         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
536                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, true, U_ZERO_ERROR))
537             log_err("u->iso-2022-cn [UCNV_MBCS] \n");
538         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
539                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-cn", offsets3MBCS, false, U_ZERO_ERROR))
540             log_err("u-> iso-2022-cn[UCNV_MBCS] \n");
541 
542         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
543                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, true, U_ZERO_ERROR))
544             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
545         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
546                 expected4MBCS, sizeof(expected4MBCS), "iso-2022-cn", offsets4MBCS, false, U_ZERO_ERROR))
547             log_err("u-> iso-2022-cn [UCNV_MBCS] \n");
548     }
549     */
550     /* END android-removed */
551 
552     /*iso-2022-kr*/
553     log_verbose("Testing for iso-2022-kr\n");
554     {
555         static const UChar    sampleText[]    = { 0x0031, 0xd801};
556         static const uint8_t expected[] = { 0x1b, 0x24, 0x29, 0x43, 0x31};
557         static const uint8_t expectedSUB[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A};
558         static const int32_t offsets[]        = { -1,   -1,   -1,   -1,   0x00, 1};
559 
560         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
561         static const uint8_t expected2[] = { 0x1b, 0x24, 0x29, 0x43, 0x31, 0x1A, 0x32};
562         static const int32_t offsets2[]        = { -1,   -1,   -1,   -1,   0x00, 0x01, 0x02};
563 
564         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
565         static const uint8_t expected3MBCS[] = { 0x1b, 0x24, 0x29, 0x43,  0x51, 0x50, 0x1A };
566         static const int32_t offsets3MBCS[]        = { -1,   -1,   -1,   -1,    0x00, 0x01, 0x02, 0x02 };
567 
568         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
569                 expectedSUB, sizeof(expectedSUB), "iso-2022-kr", offsets, true, U_ZERO_ERROR))
570             log_err("u-> iso-2022-kr [UCNV_MBCS] \n");
571         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
572                 expected, sizeof(expected), "iso-2022-kr", offsets, false, U_ZERO_ERROR))
573             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
574 
575         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
576                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, true, U_ZERO_ERROR))
577             log_err("u->iso-2022-kr[UCNV_DBCS] did not match\n");
578         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
579                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, false, U_ZERO_ERROR))
580             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
581         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
582                 expected2, sizeof(expected2), "iso-2022-kr", offsets2, false, U_ZERO_ERROR))
583             log_err("u-> iso-2022-kr [UCNV_DBCS] did not match\n");
584 
585         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
586                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, true, U_ZERO_ERROR))
587             log_err("u->iso-2022-kr [UCNV_MBCS] \n");
588         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
589                 expected3MBCS, sizeof(expected3MBCS), "iso-2022-kr", offsets3MBCS, false, U_ZERO_ERROR))
590             log_err("u-> iso-2022-kr[UCNV_MBCS] \n");
591     }
592 
593     /*HZ*/
594     log_verbose("Testing for HZ\n");
595     {
596         static const UChar    sampleText[]    = { 0x0031, 0xd801};
597         static const uint8_t expected[] = { 0x7e, 0x7d, 0x31};
598         static const uint8_t expectedSUB[] = { 0x7e, 0x7d, 0x31, 0x1A};
599         static const int32_t offsets[]        = { 0x00, 0x00, 0x00, 1};
600 
601         static const UChar       sampleText2[] = { 0x0031, 0xd801, 0x0032};
602         static const uint8_t expected2[] = { 0x7e, 0x7d, 0x31,  0x1A,  0x32 };
603         static const int32_t offsets2[]        = { 0x00, 0x00, 0x00, 0x01,  0x02 };
604 
605         static const UChar       sampleText3MBCS[] = { 0x0051, 0x0050, 0xdc01};
606         static const uint8_t expected3MBCS[] = { 0x7e, 0x7d, 0x51, 0x50,  0x1A };
607         static const int32_t offsets3MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x02};
608 
609         static const UChar       sampleText4MBCS[] = { 0x0061, 0x4e00, 0xdc01};
610         static const uint8_t expected4MBCS[] = { 0x7e, 0x7d, 0x61, 0x7e, 0x7b, 0x52, 0x3b, 0x7e, 0x7d, 0x1a };
611         static const int32_t offsets4MBCS[]        = { 0x00, 0x00, 0x00, 0x01, 0x01, 0x01 ,0x01, 0x02, 0x02, 0x02 };
612         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
613                 expectedSUB, sizeof(expectedSUB), "HZ", offsets, true, U_ZERO_ERROR))
614             log_err("u-> HZ [UCNV_MBCS] \n");
615         if(!convertFromU(sampleText, UPRV_LENGTHOF(sampleText),
616                 expected, sizeof(expected), "HZ", offsets, false, U_ZERO_ERROR))
617             log_err("u-> ibm-1363 [UCNV_MBCS] \n");
618 
619         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
620                 expected2, sizeof(expected2), "HZ", offsets2, true, U_ZERO_ERROR))
621             log_err("u->HZ[UCNV_DBCS] did not match\n");
622         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
623                 expected2, sizeof(expected2), "HZ", offsets2, false, U_ZERO_ERROR))
624             log_err("u-> HZ [UCNV_DBCS] did not match\n");
625         if(!convertFromU(sampleText2, UPRV_LENGTHOF(sampleText2),
626                 expected2, sizeof(expected2), "HZ", offsets2, false, U_ZERO_ERROR))
627             log_err("u-> HZ [UCNV_DBCS] did not match\n");
628 
629         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
630                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, true, U_ZERO_ERROR))
631             log_err("u->HZ [UCNV_MBCS] \n");
632         if(!convertFromU(sampleText3MBCS, UPRV_LENGTHOF(sampleText3MBCS),
633                 expected3MBCS, sizeof(expected3MBCS), "HZ", offsets3MBCS, false, U_ZERO_ERROR))
634             log_err("u-> HZ[UCNV_MBCS] \n");
635 
636         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
637                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, true, U_ZERO_ERROR))
638             log_err("u-> HZ [UCNV_MBCS] \n");
639         if(!convertFromU(sampleText4MBCS, UPRV_LENGTHOF(sampleText4MBCS),
640                 expected4MBCS, sizeof(expected4MBCS), "HZ", offsets4MBCS, false, U_ZERO_ERROR))
641             log_err("u-> HZ [UCNV_MBCS] \n");
642     }
643 #endif
644 }
645 
646 #if !UCONFIG_NO_LEGACY_CONVERSION
647 /*test different convertToUnicode error behaviours*/
TestToUnicodeErrorBehaviour(void)648 static void TestToUnicodeErrorBehaviour(void)
649 {
650     log_verbose("Testing error conditions for DBCS\n");
651     {
652         uint8_t sampleText[] = { 0xa2, 0xae, 0x03, 0x04};
653         const UChar expected[] = { 0x00a1 };
654 
655         if(!convertToU(sampleText, sizeof(sampleText),
656                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, true, U_AMBIGUOUS_ALIAS_WARNING ))
657             log_err("DBCS (ibm-1363)->Unicode  did not match.\n");
658         if(!convertToU(sampleText, sizeof(sampleText),
659                 expected, UPRV_LENGTHOF(expected), "ibm-1363", 0, false, U_AMBIGUOUS_ALIAS_WARNING ))
660             log_err("DBCS (ibm-1363)->Unicode  with flush = false did not match.\n");
661     }
662     log_verbose("Testing error conditions for SBCS\n");
663     {
664         uint8_t sampleText[] = { 0xa2, 0xFF};
665         const UChar expected[] = { 0x00c2 };
666 
667       /*  uint8_t sampleText2[] = { 0xa2, 0x70 };
668         const UChar expected2[] = { 0x0073 };*/
669 
670         if(!convertToU(sampleText, sizeof(sampleText),
671                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, true, U_ZERO_ERROR ))
672             log_err("SBCS (ibm-1051)->Unicode  did not match.\n");
673         if(!convertToU(sampleText, sizeof(sampleText),
674                 expected, UPRV_LENGTHOF(expected), "ibm-1051", 0, false, U_ZERO_ERROR ))
675             log_err("SBCS (ibm-1051)->Unicode  with flush = false did not match.\n");
676 
677     }
678 }
679 
TestGetNextErrorBehaviour(void)680 static void TestGetNextErrorBehaviour(void){
681    /*Test for unassigned character*/
682 #define INPUT_SIZE 1
683     static const char input1[INPUT_SIZE]={ 0x70 };
684     const char* source=(const char*)input1;
685     UErrorCode err=U_ZERO_ERROR;
686     UChar32 c=0;
687     UConverter *cnv=ucnv_open("ibm-424", &err);
688     if(U_FAILURE(err)) {
689         log_data_err("Unable to open a SBCS(ibm-424) converter: %s\n", u_errorName(err));
690         return;
691     }
692     c=ucnv_getNextUChar(cnv, &source, source + INPUT_SIZE, &err);
693     if(err != U_INVALID_CHAR_FOUND && c!=0xfffd){
694         log_err("FAIL in TestGetNextErrorBehaviour(unassigned): Expected: U_INVALID_CHAR_ERROR or 0xfffd ----Got:%s and 0x%lx\n",  myErrorName(err), c);
695     }
696     ucnv_close(cnv);
697 }
698 #endif
699 
700 #define MAX_UTF16_LEN 2
701 #define MAX_UTF8_LEN 4
702 
703 /*Regression test for utf8 converter*/
TestRegressionUTF8(void)704 static void TestRegressionUTF8(void){
705     UChar32 currCh = 0;
706     int32_t offset8;
707     int32_t offset16;
708     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
709     uint8_t *utf8 = (uint8_t*)malloc(MAX_LENGTH);
710 
711     while (currCh <= UNICODE_LIMIT) {
712         offset16 = 0;
713         offset8 = 0;
714         while(currCh <= UNICODE_LIMIT
715             && offset16 < ((int32_t)(MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN))
716             && offset8 < (MAX_LENGTH - MAX_UTF8_LEN))
717         {
718             if (currCh == SURROGATE_HIGH_START) {
719                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
720             }
721             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
722             U8_APPEND_UNSAFE(utf8, offset8, currCh);
723             currCh++;
724         }
725         if(!convertFromU(standardForm, offset16,
726             utf8, offset8, "UTF8", 0, true, U_ZERO_ERROR )) {
727             log_err("Unicode->UTF8 did not match.\n");
728         }
729         if(!convertToU(utf8, offset8,
730             standardForm, offset16, "UTF8", 0, true, U_ZERO_ERROR )) {
731             log_err("UTF8->Unicode did not match.\n");
732         }
733     }
734 
735     free(standardForm);
736     free(utf8);
737 
738     {
739         static const char src8[] = { (char)0xCC, (char)0x81, (char)0xCC, (char)0x80 };
740         static const UChar expected[] = { 0x0301, 0x0300 };
741         UConverter *conv8;
742         UErrorCode err = U_ZERO_ERROR;
743         UChar pivotBuffer[100];
744         const UChar* const pivEnd = pivotBuffer + 100;
745         const char* srcBeg;
746         const char* srcEnd;
747         UChar* pivBeg;
748 
749         conv8 = ucnv_open("UTF-8", &err);
750 
751         srcBeg = src8;
752         pivBeg = pivotBuffer;
753         srcEnd = src8 + 3;
754         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
755         if (srcBeg != srcEnd) {
756             log_err("Did not consume whole buffer on first call.\n");
757         }
758 
759         srcEnd = src8 + 4;
760         ucnv_toUnicode(conv8, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
761         if (srcBeg != srcEnd) {
762             log_err("Did not consume whole buffer on second call.\n");
763         }
764 
765         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
766             log_err("Did not get expected results for UTF-8.\n");
767         }
768         ucnv_close(conv8);
769     }
770 }
771 
772 #define MAX_UTF32_LEN 1
773 
TestRegressionUTF32(void)774 static void TestRegressionUTF32(void){
775 #if !UCONFIG_ONLY_HTML_CONVERSION
776     UChar32 currCh = 0;
777     int32_t offset32;
778     int32_t offset16;
779     UChar *standardForm = (UChar*)malloc(MAX_LENGTH*sizeof(UChar));
780     UChar32 *utf32 = (UChar32*)malloc(MAX_LENGTH*sizeof(UChar32));
781 
782     while (currCh <= UNICODE_LIMIT) {
783         offset16 = 0;
784         offset32 = 0;
785         while(currCh <= UNICODE_LIMIT
786             && offset16 < ((int32_t)(MAX_LENGTH/sizeof(UChar) - MAX_UTF16_LEN))
787             && offset32 < ((int32_t)(MAX_LENGTH/sizeof(UChar32) - MAX_UTF32_LEN)))
788         {
789             if (currCh == SURROGATE_HIGH_START) {
790                 currCh = SURROGATE_LOW_END + 1; /* Skip surrogate range */
791             }
792             U16_APPEND_UNSAFE(standardForm, offset16, currCh);
793             utf32[offset32++] = currCh;
794             currCh++;
795         }
796         if(!convertFromU(standardForm, offset16,
797             (const uint8_t *)utf32, offset32*sizeof(UChar32), "UTF32_PlatformEndian", 0, true, U_ZERO_ERROR )) {
798             log_err("Unicode->UTF32 did not match.\n");
799         }
800         if(!convertToU((const uint8_t *)utf32, offset32*sizeof(UChar32),
801             standardForm, offset16, "UTF32_PlatformEndian", 0, true, U_ZERO_ERROR )) {
802             log_err("UTF32->Unicode did not match.\n");
803         }
804     }
805     free(standardForm);
806     free(utf32);
807 
808     {
809         /* Check for lone surrogate error handling. */
810         static const UChar   sampleBadStartSurrogate[] = { 0x0031, 0xD800, 0x0032 };
811         static const UChar   sampleBadEndSurrogate[] = { 0x0031, 0xDC00, 0x0032 };
812         static const uint8_t expectedUTF32BE[] = {
813             0x00, 0x00, 0x00, 0x31,
814             0x00, 0x00, 0xff, 0xfd,
815             0x00, 0x00, 0x00, 0x32
816         };
817         static const uint8_t expectedUTF32LE[] = {
818             0x31, 0x00, 0x00, 0x00,
819             0xfd, 0xff, 0x00, 0x00,
820             0x32, 0x00, 0x00, 0x00
821         };
822         static const int32_t offsetsUTF32[] = {
823             0x00, 0x00, 0x00, 0x00,
824             0x01, 0x01, 0x01, 0x01,
825             0x02, 0x02, 0x02, 0x02
826         };
827 
828         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
829                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, true, U_ZERO_ERROR))
830             log_err("u->UTF-32BE\n");
831         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
832                 expectedUTF32BE, sizeof(expectedUTF32BE), "UTF-32BE", offsetsUTF32, true, U_ZERO_ERROR))
833             log_err("u->UTF-32BE\n");
834 
835         if(!convertFromU(sampleBadStartSurrogate, UPRV_LENGTHOF(sampleBadStartSurrogate),
836                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, true, U_ZERO_ERROR))
837             log_err("u->UTF-32LE\n");
838         if(!convertFromU(sampleBadEndSurrogate, UPRV_LENGTHOF(sampleBadEndSurrogate),
839                 expectedUTF32LE, sizeof(expectedUTF32LE), "UTF-32LE", offsetsUTF32, true, U_ZERO_ERROR))
840             log_err("u->UTF-32LE\n");
841     }
842 
843     {
844         static const char srcBE[] = { 0, 0, 0, 0x31, 0, 0, 0, 0x30 };
845         static const UChar expected[] = { 0x0031, 0x0030 };
846         UConverter *convBE;
847         UErrorCode err = U_ZERO_ERROR;
848         UChar pivotBuffer[100];
849         const UChar* const pivEnd = pivotBuffer + 100;
850         const char* srcBeg;
851         const char* srcEnd;
852         UChar* pivBeg;
853 
854         convBE = ucnv_open("UTF-32BE", &err);
855 
856         srcBeg = srcBE;
857         pivBeg = pivotBuffer;
858         srcEnd = srcBE + 5;
859         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
860         if (srcBeg != srcEnd) {
861             log_err("Did not consume whole buffer on first call.\n");
862         }
863 
864         srcEnd = srcBE + 8;
865         ucnv_toUnicode(convBE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
866         if (srcBeg != srcEnd) {
867             log_err("Did not consume whole buffer on second call.\n");
868         }
869 
870         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
871             log_err("Did not get expected results for UTF-32BE.\n");
872         }
873         ucnv_close(convBE);
874     }
875     {
876         static const char srcLE[] = { 0x31, 0, 0, 0, 0x30, 0, 0, 0 };
877         static const UChar expected[] = { 0x0031, 0x0030 };
878         UConverter *convLE;
879         UErrorCode err = U_ZERO_ERROR;
880         UChar pivotBuffer[100];
881         const UChar* const pivEnd = pivotBuffer + 100;
882         const char* srcBeg;
883         const char* srcEnd;
884         UChar* pivBeg;
885 
886         convLE = ucnv_open("UTF-32LE", &err);
887 
888         srcBeg = srcLE;
889         pivBeg = pivotBuffer;
890         srcEnd = srcLE + 5;
891         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, false, &err);
892         if (srcBeg != srcEnd) {
893             log_err("Did not consume whole buffer on first call.\n");
894         }
895 
896         srcEnd = srcLE + 8;
897         ucnv_toUnicode(convLE, &pivBeg, pivEnd, &srcBeg, srcEnd, 0, true, &err);
898         if (srcBeg != srcEnd) {
899             log_err("Did not consume whole buffer on second call.\n");
900         }
901 
902         if (U_FAILURE(err) || (int32_t)(pivBeg - pivotBuffer) != 2 || u_strncmp(pivotBuffer, expected, 2) != 0) {
903             log_err("Did not get expected results for UTF-32LE.\n");
904         }
905         ucnv_close(convLE);
906     }
907 #endif
908 }
909 
910 /*Walk through the available converters*/
TestAvailableConverters(void)911 static void TestAvailableConverters(void){
912     UErrorCode status=U_ZERO_ERROR;
913     UConverter *conv=NULL;
914     int32_t i=0;
915     for(i=0; i < ucnv_countAvailable(); i++){
916         status=U_ZERO_ERROR;
917         conv=ucnv_open(ucnv_getAvailableName(i), &status);
918         if(U_FAILURE(status)){
919             log_err("ERROR: converter creation failed. Failure in alias table or the data table for \n converter=%s. Error=%s\n",
920                         ucnv_getAvailableName(i), myErrorName(status));
921             continue;
922         }
923         ucnv_close(conv);
924     }
925 
926 }
927 
TestFlushInternalBuffer(void)928 static void TestFlushInternalBuffer(void){
929     TestWithBufferSize(MAX_LENGTH, 1);
930     TestWithBufferSize(1, 1);
931     TestWithBufferSize(1, MAX_LENGTH);
932     TestWithBufferSize(MAX_LENGTH, MAX_LENGTH);
933 }
934 
TestWithBufferSize(int32_t insize,int32_t outsize)935 static void TestWithBufferSize(int32_t insize, int32_t outsize){
936 
937     gInBufferSize =insize;
938     gOutBufferSize = outsize;
939 
940      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
941     {
942         UChar    sampleText[] =
943             { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09,  0x002E  };
944         const uint8_t expectedUTF8[] =
945             { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
946         int32_t  toUTF8Offs[] =
947             { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07};
948        /* int32_t fmUTF8Offs[] =
949             { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d };*/
950 
951         /*UTF-8*/
952         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
953             expectedUTF8, sizeof(expectedUTF8), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE, toUTF8Offs ,false))
954              log_err("u-> UTF8 did not match.\n");
955     }
956 
957 #if !UCONFIG_NO_LEGACY_CONVERSION
958      log_verbose("Testing fromUnicode with UCNV_FROM_U_CALLBACK_ESCAPE  \n");
959     {
960         UChar inputTest[] = { 0x0061, 0xd801, 0xdc01, 0xd801, 0x0061 };
961         const uint8_t toIBM943[]= { 0x61,
962             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
963             0x25, 0x55, 0x44, 0x43, 0x30, 0x31,
964             0x25, 0x55, 0x44, 0x38, 0x30, 0x31,
965             0x61 };
966         int32_t offset[]= {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 4};
967 
968         if(!testConvertFromU(inputTest, UPRV_LENGTHOF(inputTest),
969                 toIBM943, sizeof(toIBM943), "ibm-943",
970                 (UConverterFromUCallback)UCNV_FROM_U_CALLBACK_ESCAPE, offset,false))
971             log_err("u-> ibm-943 with subst with value did not match.\n");
972     }
973 #endif
974 
975      log_verbose("Testing fromUnicode for UTF-8 with UCNV_TO_U_CALLBACK_SUBSTITUTE \n");
976     {
977         const uint8_t sampleText1[] = { 0x31, 0xe4, 0xba, 0x8c,
978             0xe0, 0x80,  0x61};
979         UChar    expected1[] = {  0x0031, 0x4e8c, 0xfffd, 0xfffd, 0x0061};
980         int32_t offsets1[] = {   0x0000, 0x0001, 0x0004, 0x0005, 0x0006};
981 
982         if(!testConvertToU(sampleText1, sizeof(sampleText1),
983                  expected1, UPRV_LENGTHOF(expected1),"utf8", UCNV_TO_U_CALLBACK_SUBSTITUTE, offsets1,false))
984             log_err("utf8->u with substitute did not match.\n");
985     }
986 
987 #if !UCONFIG_NO_LEGACY_CONVERSION
988     log_verbose("Testing toUnicode with UCNV_TO_U_CALLBACK_ESCAPE \n");
989     /*to Unicode*/
990     {
991         const uint8_t sampleTxtToU[]= { 0x00, 0x9f, 0xaf,
992             0x81, 0xad, /*unassigned*/
993             0x89, 0xd3 };
994         UChar IBM_943toUnicode[] = { 0x0000, 0x6D63,
995             0x25, 0x58, 0x38, 0x31, 0x25, 0x58, 0x41, 0x44,
996             0x7B87};
997         int32_t  fromIBM943Offs [] =    { 0, 1, 3, 3, 3, 3, 3, 3, 3, 3, 5};
998 
999         if(!testConvertToU(sampleTxtToU, sizeof(sampleTxtToU),
1000                  IBM_943toUnicode, UPRV_LENGTHOF(IBM_943toUnicode),"ibm-943",
1001                 (UConverterToUCallback)UCNV_TO_U_CALLBACK_ESCAPE, fromIBM943Offs,false))
1002             log_err("ibm-943->u with substitute with value did not match.\n");
1003 
1004     }
1005 #endif
1006 }
1007 
convertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)1008 static UBool convertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1009                 const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1010 {
1011 
1012     int32_t i=0;
1013     char *p=0;
1014     const UChar *src;
1015     char buffer[MAX_LENGTH];
1016     int32_t offsetBuffer[MAX_LENGTH];
1017     int32_t *offs=0;
1018     char *targ;
1019     char *targetLimit;
1020     UChar *sourceLimit=0;
1021     UErrorCode status = U_ZERO_ERROR;
1022     UConverter *conv = 0;
1023     conv = ucnv_open(codepage, &status);
1024     if(U_FAILURE(status))
1025     {
1026         log_data_err("Couldn't open converter %s\n",codepage);
1027         return true;
1028     }
1029     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1030 
1031     for(i=0; i<MAX_LENGTH; i++){
1032         buffer[i]=(char)0xF0;
1033         offsetBuffer[i]=0xFF;
1034     }
1035 
1036     src=source;
1037     sourceLimit=(UChar*)src+(sourceLen);
1038     targ=buffer;
1039     targetLimit=targ+MAX_LENGTH;
1040     offs=offsetBuffer;
1041     ucnv_fromUnicode (conv,
1042                   &targ,
1043                   targetLimit,
1044                   &src,
1045                   sourceLimit,
1046                   expectOffsets ? offs : NULL,
1047                   doFlush,
1048                   &status);
1049     ucnv_close(conv);
1050     if(status != expectedStatus){
1051           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1052           return false;
1053     }
1054 
1055     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1056         sourceLen, targ-buffer);
1057 
1058     if(expectLen != targ-buffer)
1059     {
1060         log_err("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1061         log_verbose("Expected %d chars out, got %d FROM Unicode to %s\n", expectLen, targ-buffer, codepage);
1062         printSeqErr((const unsigned char *)buffer, (int32_t)(targ-buffer));
1063         printSeqErr((const unsigned char*)expect, expectLen);
1064         return false;
1065     }
1066 
1067     if(memcmp(buffer, expect, expectLen)){
1068         log_err("String does not match. FROM Unicode to codePage%s\n", codepage);
1069         log_info("\nGot:");
1070         printSeqErr((const unsigned char *)buffer, expectLen);
1071         log_info("\nExpected:");
1072         printSeqErr((const unsigned char *)expect, expectLen);
1073         return false;
1074     }
1075     else {
1076         log_verbose("Matches!\n");
1077     }
1078 
1079     if (expectOffsets != 0){
1080         log_verbose("comparing %d offsets..\n", targ-buffer);
1081         if(memcmp(offsetBuffer,expectOffsets,(targ-buffer) * sizeof(int32_t) )){
1082             log_err("did not get the expected offsets. for FROM Unicode to %s\n", codepage);
1083             log_info("\nGot  : ");
1084             printSeqErr((const unsigned char*)buffer, (int32_t)(targ-buffer));
1085             for(p=buffer;p<targ;p++)
1086                 log_info("%d, ", offsetBuffer[p-buffer]);
1087             log_info("\nExpected: ");
1088             for(i=0; i< (targ-buffer); i++)
1089                 log_info("%d,", expectOffsets[i]);
1090         }
1091     }
1092 
1093     return true;
1094 }
1095 
1096 
convertToU(const uint8_t * source,int sourceLen,const UChar * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool doFlush,UErrorCode expectedStatus)1097 static UBool convertToU( const uint8_t *source, int sourceLen, const UChar *expect, int expectLen,
1098                const char *codepage, const int32_t *expectOffsets, UBool doFlush, UErrorCode expectedStatus)
1099 {
1100     UErrorCode status = U_ZERO_ERROR;
1101     UConverter *conv = 0;
1102     int32_t i=0;
1103     UChar *p=0;
1104     const char* src;
1105     UChar buffer[MAX_LENGTH];
1106     int32_t offsetBuffer[MAX_LENGTH];
1107     int32_t *offs=0;
1108     UChar *targ;
1109     UChar *targetLimit;
1110     uint8_t *sourceLimit=0;
1111 
1112 
1113 
1114     conv = ucnv_open(codepage, &status);
1115     if(U_FAILURE(status))
1116     {
1117         log_data_err("Couldn't open converter %s\n",codepage);
1118         return true;
1119     }
1120     log_verbose("Converter %s opened..\n", ucnv_getName(conv, &status));
1121 
1122 
1123 
1124     for(i=0; i<MAX_LENGTH; i++){
1125         buffer[i]=0xFFFE;
1126         offsetBuffer[i]=-1;
1127     }
1128 
1129     src=(const char *)source;
1130     sourceLimit=(uint8_t*)(src+(sourceLen));
1131     targ=buffer;
1132     targetLimit=targ+MAX_LENGTH;
1133     offs=offsetBuffer;
1134 
1135 
1136 
1137     ucnv_toUnicode (conv,
1138                 &targ,
1139                 targetLimit,
1140                 &src,
1141                 (const char *)sourceLimit,
1142                 expectOffsets ? offs : NULL,
1143                 doFlush,
1144                 &status);
1145 
1146     ucnv_close(conv);
1147     if(status != expectedStatus){
1148           log_err("ucnv_fromUnicode() failed for codepage=%s. Error =%s Expected=%s\n", codepage, myErrorName(status), myErrorName(expectedStatus));
1149           return false;
1150     }
1151     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1152         sourceLen, targ-buffer);
1153 
1154 
1155 
1156 
1157     log_verbose("comparing %d uchars (%d bytes)..\n",expectLen,expectLen*2);
1158 
1159     if (expectOffsets != 0) {
1160         if(memcmp(offsetBuffer, expectOffsets, (targ-buffer) * sizeof(int32_t))){
1161 
1162             log_err("did not get the expected offsets from %s To UNICODE\n", codepage);
1163             log_info("\nGot : ");
1164             for(p=buffer;p<targ;p++)
1165                 log_info("%d, ", offsetBuffer[p-buffer]);
1166             log_info("\nExpected: ");
1167             for(i=0; i<(targ-buffer); i++)
1168                 log_info("%d, ", expectOffsets[i]);
1169             log_info("\nGot result:");
1170             for(i=0; i<(targ-buffer); i++)
1171                 log_info("0x%04X,", buffer[i]);
1172             log_info("\nFrom Input:");
1173             for(i=0; i<(src-(const char *)source); i++)
1174                 log_info("0x%02X,", (unsigned char)source[i]);
1175             log_info("\n");
1176         }
1177     }
1178     if(memcmp(buffer, expect, expectLen*2)){
1179         log_err("String does not match. from codePage %s TO Unicode\n", codepage);
1180         log_info("\nGot:");
1181         printUSeqErr(buffer, expectLen);
1182         log_info("\nExpected:");
1183         printUSeqErr(expect, expectLen);
1184         return false;
1185     }
1186     else {
1187         log_verbose("Matches!\n");
1188     }
1189 
1190     return true;
1191 }
1192 
1193 
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UConverterFromUCallback callback,const int32_t * expectOffsets,UBool testReset)1194 static UBool testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
1195                 const char *codepage, UConverterFromUCallback callback , const int32_t *expectOffsets, UBool testReset)
1196 {
1197     UErrorCode status = U_ZERO_ERROR;
1198     UConverter *conv = 0;
1199     char    junkout[MAX_LENGTH]; /* FIX */
1200     int32_t    junokout[MAX_LENGTH]; /* FIX */
1201     char *p;
1202     const UChar *src;
1203     char *end;
1204     char *targ;
1205     int32_t *offs;
1206     int i;
1207     int32_t   realBufferSize;
1208     char *realBufferEnd;
1209     const UChar *realSourceEnd;
1210     const UChar *sourceLimit;
1211     UBool checkOffsets = true;
1212     UBool doFlush;
1213 
1214     UConverterFromUCallback oldAction = NULL;
1215     const void* oldContext = NULL;
1216 
1217     for(i=0;i<MAX_LENGTH;i++)
1218         junkout[i] = (char)0xF0;
1219     for(i=0;i<MAX_LENGTH;i++)
1220         junokout[i] = 0xFF;
1221 
1222     setNuConvTestName(codepage, "FROM");
1223 
1224     log_verbose("\n=========  %s\n", gNuConvTestName);
1225 
1226     conv = ucnv_open(codepage, &status);
1227     if(U_FAILURE(status))
1228     {
1229         log_data_err("Couldn't open converter %s\n",codepage);
1230         return true;
1231     }
1232 
1233     log_verbose("Converter opened..\n");
1234     /*----setting the callback routine----*/
1235     ucnv_setFromUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1236     if (U_FAILURE(status)) {
1237         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1238     }
1239     /*------------------------*/
1240 
1241     src = source;
1242     targ = junkout;
1243     offs = junokout;
1244 
1245     realBufferSize = UPRV_LENGTHOF(junkout);
1246     realBufferEnd = junkout + realBufferSize;
1247     realSourceEnd = source + sourceLen;
1248 
1249     if ( gOutBufferSize != realBufferSize )
1250       checkOffsets = false;
1251 
1252     if( gInBufferSize != MAX_LENGTH )
1253       checkOffsets = false;
1254 
1255     do
1256     {
1257         end = nct_min(targ + gOutBufferSize, realBufferEnd);
1258         sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
1259 
1260         doFlush = (UBool)(sourceLimit == realSourceEnd);
1261 
1262         if(targ == realBufferEnd)
1263           {
1264         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
1265         return false;
1266           }
1267         log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
1268 
1269 
1270         status = U_ZERO_ERROR;
1271         if(gInBufferSize ==999 && gOutBufferSize==999)
1272             doFlush = false;
1273         ucnv_fromUnicode (conv,
1274                   &targ,
1275                   end,
1276                   &src,
1277                   sourceLimit,
1278                   offs,
1279                   doFlush, /* flush if we're at the end of the input data */
1280                   &status);
1281         if(testReset)
1282             ucnv_resetToUnicode(conv);
1283         if(gInBufferSize ==999 && gOutBufferSize==999)
1284             ucnv_resetToUnicode(conv);
1285 
1286       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
1287 
1288     if(U_FAILURE(status)) {
1289         log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1290         return false;
1291       }
1292 
1293     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
1294         sourceLen, targ-junkout);
1295     if(getTestOption(VERBOSITY_OPTION))
1296     {
1297         char junk[999];
1298         char offset_str[999];
1299         char *ptr;
1300 
1301         junk[0] = 0;
1302         offset_str[0] = 0;
1303         for(ptr = junkout;ptr<targ;ptr++)
1304         {
1305             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*ptr);
1306             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[ptr-junkout]);
1307         }
1308 
1309         log_verbose(junk);
1310         printSeq((const unsigned char *)expect, expectLen);
1311         if ( checkOffsets )
1312           {
1313             log_verbose("\nOffsets:");
1314             log_verbose(offset_str);
1315           }
1316         log_verbose("\n");
1317     }
1318     ucnv_close(conv);
1319 
1320 
1321     if(expectLen != targ-junkout)
1322     {
1323         log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1324         log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
1325         log_info("\nGot:");
1326         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1327         log_info("\nExpected:");
1328         printSeqErr((const unsigned char*)expect, expectLen);
1329         return false;
1330     }
1331 
1332     if (checkOffsets && (expectOffsets != 0) )
1333     {
1334         log_verbose("comparing %d offsets..\n", targ-junkout);
1335         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
1336             log_err("did not get the expected offsets. %s", gNuConvTestName);
1337             log_err("Got  : ");
1338             printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
1339             for(p=junkout;p<targ;p++)
1340                 log_err("%d, ", junokout[p-junkout]);
1341             log_err("\nExpected: ");
1342             for(i=0; i<(targ-junkout); i++)
1343                 log_err("%d,", expectOffsets[i]);
1344         }
1345     }
1346 
1347     log_verbose("comparing..\n");
1348     if(!memcmp(junkout, expect, expectLen))
1349     {
1350         log_verbose("Matches!\n");
1351         return true;
1352     }
1353     else
1354     {
1355         log_err("String does not match. %s\n", gNuConvTestName);
1356         printUSeqErr(source, sourceLen);
1357         log_info("\nGot:");
1358         printSeqErr((const unsigned char *)junkout, expectLen);
1359         log_info("\nExpected:");
1360         printSeqErr((const unsigned char *)expect, expectLen);
1361 
1362         return false;
1363     }
1364 }
1365 
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UConverterToUCallback callback,const int32_t * expectOffsets,UBool testReset)1366 static UBool testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
1367                const char *codepage, UConverterToUCallback callback, const int32_t *expectOffsets, UBool testReset)
1368 {
1369     UErrorCode status = U_ZERO_ERROR;
1370     UConverter *conv = 0;
1371     UChar    junkout[MAX_LENGTH]; /* FIX */
1372     int32_t    junokout[MAX_LENGTH]; /* FIX */
1373     const char *src;
1374     const char *realSourceEnd;
1375     const char *srcLimit;
1376     UChar *p;
1377     UChar *targ;
1378     UChar *end;
1379     int32_t *offs;
1380     int i;
1381     UBool   checkOffsets = true;
1382     int32_t   realBufferSize;
1383     UChar *realBufferEnd;
1384     UBool doFlush;
1385 
1386     UConverterToUCallback oldAction = NULL;
1387     const void* oldContext = NULL;
1388 
1389 
1390     for(i=0;i<MAX_LENGTH;i++)
1391         junkout[i] = 0xFFFE;
1392 
1393     for(i=0;i<MAX_LENGTH;i++)
1394         junokout[i] = -1;
1395 
1396     setNuConvTestName(codepage, "TO");
1397 
1398     log_verbose("\n=========  %s\n", gNuConvTestName);
1399 
1400     conv = ucnv_open(codepage, &status);
1401     if(U_FAILURE(status))
1402     {
1403         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
1404         return true;
1405     }
1406 
1407     log_verbose("Converter opened..\n");
1408      /*----setting the callback routine----*/
1409     ucnv_setToUCallBack (conv, callback, NULL, &oldAction, &oldContext, &status);
1410     if (U_FAILURE(status)) {
1411         log_err("FAILURE in setting the callback Function! %s\n", myErrorName(status));
1412     }
1413     /*-------------------------------------*/
1414 
1415     src = (const char *)source;
1416     targ = junkout;
1417     offs = junokout;
1418 
1419     realBufferSize = UPRV_LENGTHOF(junkout);
1420     realBufferEnd = junkout + realBufferSize;
1421     realSourceEnd = src + sourcelen;
1422 
1423     if ( gOutBufferSize != realBufferSize )
1424       checkOffsets = false;
1425 
1426     if( gInBufferSize != MAX_LENGTH )
1427       checkOffsets = false;
1428 
1429     do
1430       {
1431         end = nct_min( targ + gOutBufferSize, realBufferEnd);
1432         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
1433 
1434         if(targ == realBufferEnd)
1435         {
1436             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
1437             return false;
1438         }
1439         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
1440 
1441         /* oldTarg = targ; */
1442 
1443         status = U_ZERO_ERROR;
1444         doFlush=(UBool)((gInBufferSize ==999 && gOutBufferSize==999)?(srcLimit == realSourceEnd) : false);
1445 
1446         ucnv_toUnicode (conv,
1447                 &targ,
1448                 end,
1449                 &src,
1450                 srcLimit,
1451                 offs,
1452                 doFlush, /* flush if we're at the end of the source data */
1453                 &status);
1454         if(testReset)
1455             ucnv_resetFromUnicode(conv);
1456         if(gInBufferSize ==999 && gOutBufferSize==999)
1457             ucnv_resetToUnicode(conv);
1458         /*        offs += (targ-oldTarg); */
1459 
1460       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
1461 
1462     if(U_FAILURE(status))
1463     {
1464         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
1465         return false;
1466     }
1467 
1468     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
1469         sourcelen, targ-junkout);
1470     if(getTestOption(VERBOSITY_OPTION))
1471     {
1472         char junk[999];
1473         char offset_str[999];
1474 
1475         UChar *ptr;
1476 
1477         junk[0] = 0;
1478         offset_str[0] = 0;
1479 
1480         for(ptr = junkout;ptr<targ;ptr++)
1481         {
1482             snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
1483             snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
1484         }
1485 
1486         log_verbose(junk);
1487 
1488         if ( checkOffsets )
1489           {
1490             log_verbose("\nOffsets:");
1491             log_verbose(offset_str);
1492           }
1493         log_verbose("\n");
1494     }
1495     ucnv_close(conv);
1496 
1497     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
1498 
1499     if (checkOffsets && (expectOffsets != 0))
1500     {
1501         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
1502 
1503             log_err("did not get the expected offsets. %s",gNuConvTestName);
1504             for(p=junkout;p<targ;p++)
1505                 log_err("%d, ", junokout[p-junkout]);
1506             log_err("\nExpected: ");
1507             for(i=0; i<(targ-junkout); i++)
1508                 log_err("%d,", expectOffsets[i]);
1509             log_err("");
1510             for(i=0; i<(targ-junkout); i++)
1511                 log_err("%X,", junkout[i]);
1512             log_err("");
1513             for(i=0; i<(src-(const char *)source); i++)
1514                 log_err("%X,", (unsigned char)source[i]);
1515         }
1516     }
1517 
1518     if(!memcmp(junkout, expect, expectlen*2))
1519     {
1520         log_verbose("Matches!\n");
1521         return true;
1522     }
1523     else
1524     {
1525         log_err("String does not match. %s\n", gNuConvTestName);
1526         log_verbose("String does not match. %s\n", gNuConvTestName);
1527         log_info("\nGot:");
1528         printUSeq(junkout, expectlen);
1529         log_info("\nExpected:");
1530         printUSeq(expect, expectlen);
1531         return false;
1532     }
1533 }
1534 
1535 
TestResetBehaviour(void)1536 static void TestResetBehaviour(void){
1537 #if !UCONFIG_NO_LEGACY_CONVERSION
1538     log_verbose("Testing Reset for DBCS and MBCS\n");
1539     {
1540         static const UChar sampleText[]       = {0x00a1, 0xd801, 0xdc01, 0x00a4};
1541         static const uint8_t expected[] = {0xa2, 0xae, 0xa1, 0xe0, 0xa2, 0xb4};
1542         static const int32_t offsets[]        = {0x00, 0x00, 0x01, 0x01, 0x03, 0x03 };
1543 
1544 
1545         static const UChar sampleText1[] = {0x00a1, 0x00a4, 0x00a7, 0x00a8};
1546         static const uint8_t expected1[] = {0xa2, 0xae,0xA2,0xB4,0xA1,0xD7,0xA1,0xA7};
1547         static const int32_t offsets1[] =  { 0,2,4,6};
1548 
1549         /*DBCS*/
1550         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1551                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1552             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1553         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1554                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1555             log_err("u-> ibm-1363 [UCNV_DBCS portion] not match.\n");
1556 
1557         if(!testConvertToU(expected1, sizeof(expected1),
1558                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1559                 offsets1, true))
1560            log_err("ibm-1363 -> did not match.\n");
1561         /*MBCS*/
1562         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1563                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1564             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1565         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1566                 expected, sizeof(expected), "ibm-1363", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1567             log_err("u-> ibm-1363 [UCNV_MBCS] not match.\n");
1568 
1569         if(!testConvertToU(expected1, sizeof(expected1),
1570                 sampleText1, UPRV_LENGTHOF(sampleText1), "ibm-1363",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1571                 offsets1, true))
1572            log_err("ibm-1363 -> did not match.\n");
1573 
1574     }
1575 
1576     log_verbose("Testing Reset for ISO-2022-jp\n");
1577     {
1578         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1579 
1580         static const uint8_t expected[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1581                                     0x31,0x1A, 0x32};
1582 
1583 
1584         static const int32_t offsets[] = {0,0,0,0,0,1,1,2,2,2,2,3,5 };
1585 
1586 
1587         static const UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1588         static const uint8_t expected1[] = {0x1b, 0x24, 0x42,0x30,0x6c,0x43,0x7a,0x1b,0x28,0x42,
1589                                     0x31,0x1A, 0x32};
1590         static const int32_t offsets1[] =  { 3,5,10,11,12};
1591 
1592         /*iso-2022-jp*/
1593         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1594                 expected, sizeof(expected), "iso-2022-jp",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1595             log_err("u-> not match.\n");
1596         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1597                 expected, sizeof(expected), "iso-2022-jp", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1598             log_err("u->  not match.\n");
1599 
1600         if(!testConvertToU(expected1, sizeof(expected1),
1601                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-jp",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1602                 offsets1, true))
1603            log_err("iso-2022-jp -> did not match.\n");
1604 
1605     }
1606 
1607     /* BEGIN android-removed */
1608     /* To save space, Android does not build full ISO-2022-CN tables.
1609        We skip the tests for ISO-2022-CN. */
1610     /*
1611     log_verbose("Testing Reset for ISO-2022-cn\n");
1612     {
1613         static const UChar    sampleText[] =   { 0x4e00, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1614 
1615         static const uint8_t expected[] = {
1616                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1617                                     0x36, 0x21,
1618                                     0x0f, 0x31,
1619                                     0x1A,
1620                                     0x32
1621                                     };
1622 
1623 
1624         static const int32_t offsets[] = {
1625                                     0,    0,    0,    0,    0,    0,    0,
1626                                     1,    1,
1627                                     2,    2,
1628                                     3,
1629                                     5,  };
1630 
1631         UChar sampleText1[] = {0x4e00, 0x04e01, 0x0031,0x001A, 0x0032};
1632         static const uint8_t expected1[] = {
1633                                     0x1B, 0x24, 0x29, 0x41, 0x0E, 0x52, 0x3B,
1634                                     0x36, 0x21,
1635                                     0x1B, 0x24, 0x29, 0x47, 0x24, 0x22,
1636                                     0x0f, 0x1A,
1637                                     0x32
1638                                     };
1639         static const int32_t offsets1[] =  { 5,7,13,16,17};
1640 
1641         // iso-2022-CN  android-change
1642         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1643                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1644             log_err("u-> not match.\n");
1645         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1646                 expected, sizeof(expected), "iso-2022-cn", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1647             log_err("u-> not match.\n");
1648 
1649         if(!testConvertToU(expected1, sizeof(expected1),
1650                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-cn",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1651                 offsets1, true))
1652            log_err("iso-2022-cn -> did not match.\n");
1653     }
1654     */
1655     /* END android-removed */
1656 
1657         log_verbose("Testing Reset for ISO-2022-kr\n");
1658     {
1659         UChar    sampleText[] =   { 0x4e00,0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1660 
1661         static const uint8_t expected[] = {0x1B, 0x24, 0x29, 0x43,
1662                                     0x0E, 0x6C, 0x69,
1663                                     0x0f, 0x1A,
1664                                     0x0e, 0x6F, 0x4B,
1665                                     0x0F, 0x31,
1666                                     0x1A,
1667                                     0x32 };
1668 
1669         static const int32_t offsets[] = {-1, -1, -1, -1,
1670                               0, 0, 0,
1671                               1, 1,
1672                               3, 3, 3,
1673                               4, 4,
1674                               5,
1675                               7,
1676                             };
1677         static const UChar    sampleText1[] =   { 0x4e00,0x0041, 0x04e01, 0x0031, 0x0042, 0x0032};
1678 
1679         static const uint8_t expected1[] = {0x1B, 0x24, 0x29, 0x43,
1680                                     0x0E, 0x6C, 0x69,
1681                                     0x0f, 0x41,
1682                                     0x0e, 0x6F, 0x4B,
1683                                     0x0F, 0x31,
1684                                     0x42,
1685                                     0x32 };
1686 
1687         static const int32_t offsets1[] = {
1688                               5, 8, 10,
1689                               13, 14, 15
1690 
1691                             };
1692         /*iso-2022-kr*/
1693         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1694                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE , NULL, true))
1695             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1696         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1697                 expected, sizeof(expected), "iso-2022-kr",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1698             log_err("u-> iso-2022-kr [UCNV_DBCS] not match.\n");
1699         if(!testConvertToU(expected1, sizeof(expected1),
1700                 sampleText1, UPRV_LENGTHOF(sampleText1), "iso-2022-kr",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1701                 offsets1, true))
1702            log_err("iso-2022-kr -> did not match.\n");
1703     }
1704 
1705         log_verbose("Testing Reset for HZ\n");
1706     {
1707         static const UChar    sampleText[] =   { 0x4e00, 0xd801, 0xdc01, 0x04e01, 0x0031, 0xd801, 0xdc01, 0x0032};
1708 
1709         static const uint8_t expected[] = {0x7E, 0x7B, 0x52, 0x3B,
1710                                     0x7E, 0x7D, 0x1A,
1711                                     0x7E, 0x7B, 0x36, 0x21,
1712                                     0x7E, 0x7D, 0x31,
1713                                     0x1A,
1714                                     0x32 };
1715 
1716 
1717         static const int32_t offsets[] = {0,0,0,0,
1718                              1,1,1,
1719                              3,3,3,3,
1720                              4,4,4,
1721                              5,
1722                              7,};
1723         static const UChar    sampleText1[] =   { 0x4e00, 0x0035, 0x04e01, 0x0031, 0x0041, 0x0032};
1724 
1725         static const uint8_t expected1[] = {0x7E, 0x7B, 0x52, 0x3B,
1726                                     0x7E, 0x7D, 0x35,
1727                                     0x7E, 0x7B, 0x36, 0x21,
1728                                     0x7E, 0x7D, 0x31,
1729                                     0x41,
1730                                     0x32 };
1731 
1732 
1733         static const int32_t offsets1[] = {2,6,9,13,14,15
1734                             };
1735 
1736         /*hz*/
1737         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1738                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1739             log_err("u->  not match.\n");
1740         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1741                 expected, sizeof(expected), "HZ", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1742             log_err("u->  not match.\n");
1743         if(!testConvertToU(expected1, sizeof(expected1),
1744                 sampleText1, UPRV_LENGTHOF(sampleText1), "hz",UCNV_TO_U_CALLBACK_SUBSTITUTE ,
1745                 offsets1, true))
1746            log_err("hz -> did not match.\n");
1747     }
1748 #endif
1749 
1750     /*UTF-8*/
1751      log_verbose("Testing for UTF8\n");
1752     {
1753         static const UChar    sampleText[] =   { 0x4e00, 0x0701, 0x0031, 0xbfc1, 0xd801, 0xdc01, 0x0032};
1754         int32_t offsets[]={0x00, 0x00, 0x00, 0x01, 0x01, 0x02,
1755                            0x03, 0x03, 0x03, 0x04, 0x04, 0x04,
1756                            0x04, 0x06 };
1757         static const uint8_t expected[] = {0xe4, 0xb8, 0x80, 0xdc, 0x81, 0x31,
1758             0xeb, 0xbf, 0x81, 0xF0, 0x90, 0x90, 0x81, 0x32};
1759 
1760 
1761         static const int32_t fromOffsets[] = { 0x0000, 0x0003, 0x0005, 0x0006, 0x0009, 0x0009, 0x000D };
1762         /*UTF-8*/
1763         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1764             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1765             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1766         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1767             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1768             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1769         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1770             expected, sizeof(expected), "UTF8", UCNV_FROM_U_CALLBACK_SUBSTITUTE,offsets , true))
1771             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1772         if(!testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
1773             expected, sizeof(expected), "UTF8",  UCNV_FROM_U_CALLBACK_SUBSTITUTE,NULL , true))
1774             log_err("u-> UTF8 with offsets and flush true did not match.\n");
1775         if(!testConvertToU(expected, sizeof(expected),
1776             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, true))
1777             log_err("UTF8 -> did not match.\n");
1778         if(!testConvertToU(expected, sizeof(expected),
1779             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , NULL, true))
1780             log_err("UTF8 -> did not match.\n");
1781         if(!testConvertToU(expected, sizeof(expected),
1782             sampleText, UPRV_LENGTHOF(sampleText), "UTF8",UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, true))
1783             log_err("UTF8 -> did not match.\n");
1784         if(!testConvertToU(expected, sizeof(expected),
1785             sampleText, UPRV_LENGTHOF(sampleText), "UTF8", UCNV_TO_U_CALLBACK_SUBSTITUTE , fromOffsets, true))
1786             log_err("UTF8 -> did not match.\n");
1787 
1788     }
1789 
1790 }
1791 
1792 /* Test that U_TRUNCATED_CHAR_FOUND is set. */
1793 static void
doTestTruncated(const char * cnvName,const uint8_t * bytes,int32_t length)1794 doTestTruncated(const char *cnvName, const uint8_t *bytes, int32_t length) {
1795     UConverter *cnv;
1796 
1797     UChar buffer[2];
1798     UChar *target, *targetLimit;
1799     const char *source, *sourceLimit;
1800 
1801     UErrorCode errorCode;
1802 
1803     errorCode=U_ZERO_ERROR;
1804     cnv=ucnv_open(cnvName, &errorCode);
1805     if(U_FAILURE(errorCode)) {
1806         log_data_err("error TestTruncated: unable to open \"%s\" - %s\n", cnvName, u_errorName(errorCode));
1807         return;
1808     }
1809     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
1810     if(U_FAILURE(errorCode)) {
1811         log_data_err("error TestTruncated: unable to set the stop callback on \"%s\" - %s\n",
1812                     cnvName, u_errorName(errorCode));
1813         ucnv_close(cnv);
1814         return;
1815     }
1816 
1817     source=(const char *)bytes;
1818     sourceLimit=source+length;
1819     target=buffer;
1820     targetLimit=buffer+UPRV_LENGTHOF(buffer);
1821 
1822     /* 1. input bytes with flush=false, then input nothing with flush=true */
1823     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, false, &errorCode);
1824     if(U_FAILURE(errorCode) || source!=sourceLimit || target!=buffer) {
1825         log_err("error TestTruncated(%s, 1a): input bytes[%d], flush=false: %s, input left %d, output %d\n",
1826                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1827     }
1828 
1829     errorCode=U_ZERO_ERROR;
1830     source=sourceLimit;
1831     target=buffer;
1832     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, true, &errorCode);
1833     if(errorCode!=U_TRUNCATED_CHAR_FOUND || target!=buffer) {
1834         log_err("error TestTruncated(%s, 1b): no input (previously %d), flush=true: %s (should be U_TRUNCATED_CHAR_FOUND), output %d\n",
1835                 cnvName, (int)length, u_errorName(errorCode), (int)(target-buffer));
1836     }
1837 
1838     /* 2. input bytes with flush=true */
1839     ucnv_resetToUnicode(cnv);
1840 
1841     errorCode=U_ZERO_ERROR;
1842     source=(const char *)bytes;
1843     target=buffer;
1844     ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, true, &errorCode);
1845     if(errorCode!=U_TRUNCATED_CHAR_FOUND || source!=sourceLimit || target!=buffer) {
1846         log_err("error TestTruncated(%s, 2): input bytes[%d], flush=true: %s (should be U_TRUNCATED_CHAR_FOUND), input left %d, output %d\n",
1847                 cnvName, length, u_errorName(errorCode), (int)(sourceLimit-source), (int)(target-buffer));
1848     }
1849 
1850 
1851     ucnv_close(cnv);
1852 }
1853 
1854 static void
TestTruncated(void)1855 TestTruncated(void) {
1856     static const struct {
1857         const char *cnvName;
1858         uint8_t bytes[8]; /* partial input bytes resulting in no output */
1859         int32_t length;
1860     } testCases[]={
1861         { "IMAP-mailbox-name",  { 0x26 }, 1 }, /* & */
1862         { "IMAP-mailbox-name",  { 0x26, 0x42 }, 2 }, /* &B */
1863         { "IMAP-mailbox-name",  { 0x26, 0x42, 0x42 }, 3 }, /* &BB */
1864         { "IMAP-mailbox-name",  { 0x26, 0x41, 0x41 }, 3 }, /* &AA */
1865 
1866         { "UTF-7",      { 0x2b, 0x42 }, 2 }, /* +B */
1867         { "UTF-8",      { 0xd1 }, 1 },
1868 
1869         { "UTF-16BE",   { 0x4e }, 1 },
1870         { "UTF-16LE",   { 0x4e }, 1 },
1871         { "UTF-16",     { 0x4e }, 1 },
1872         { "UTF-16",     { 0xff }, 1 },
1873         { "UTF-16",     { 0xfe, 0xff, 0x4e }, 3 },
1874 
1875         { "UTF-32BE",   { 0, 0, 0x4e }, 3 },
1876         { "UTF-32LE",   { 0x4e }, 1 },
1877         { "UTF-32",     { 0, 0, 0x4e }, 3 },
1878         { "UTF-32",     { 0xff }, 1 },
1879         { "UTF-32",     { 0, 0, 0xfe, 0xff, 0 }, 5 },
1880         { "SCSU",       { 0x0e, 0x4e }, 2 }, /* SQU 0x4e */
1881 
1882 #if !UCONFIG_NO_LEGACY_CONVERSION
1883         { "BOCU-1",     { 0xd5 }, 1 },
1884 
1885         { "Shift-JIS",  { 0xe0 }, 1 },
1886 
1887         { "ibm-939",    { 0x0e, 0x41 }, 2 } /* SO 0x41 */
1888 #else
1889         { "BOCU-1",     { 0xd5 }, 1 ,}
1890 #endif
1891     };
1892     int32_t i;
1893 
1894     for(i=0; i<UPRV_LENGTHOF(testCases); ++i) {
1895         doTestTruncated(testCases[i].cnvName, testCases[i].bytes, testCases[i].length);
1896     }
1897 }
1898 
1899 typedef struct NameRange {
1900     const char *name;
1901     UChar32 start, end, start2, end2, notStart, notEnd;
1902 } NameRange;
1903 
1904 static void
TestUnicodeSet(void)1905 TestUnicodeSet(void) {
1906     UErrorCode errorCode;
1907     UConverter *cnv;
1908     USet *set;
1909     const char *name;
1910     int32_t i, count;
1911 
1912     static const char *const completeSetNames[]={
1913         "UTF-7",
1914         "UTF-8",
1915         "UTF-16",
1916         "UTF-16BE",
1917         "UTF-16LE",
1918         "UTF-32",
1919         "UTF-32BE",
1920         "UTF-32LE",
1921         "SCSU",
1922         "BOCU-1",
1923         "CESU-8",
1924 #if !UCONFIG_NO_LEGACY_CONVERSION
1925         "gb18030",
1926 #endif
1927         "IMAP-mailbox-name"
1928     };
1929 #if !UCONFIG_NO_LEGACY_CONVERSION
1930     static const char *const lmbcsNames[]={
1931         "LMBCS-1",
1932         "LMBCS-2",
1933         "LMBCS-3",
1934         "LMBCS-4",
1935         "LMBCS-5",
1936         "LMBCS-6",
1937         "LMBCS-8",
1938         "LMBCS-11",
1939         "LMBCS-16",
1940         "LMBCS-17",
1941         "LMBCS-18",
1942         "LMBCS-19"
1943     };
1944 #endif
1945 
1946     static const NameRange nameRanges[]={
1947         { "US-ASCII", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1948 #if !UCONFIG_NO_LEGACY_CONVERSION
1949         { "ibm-367", 0, 0x7f, -1, -1, 0x80, 0x10ffff },
1950 #endif
1951         { "ISO-8859-1", 0, 0x7f, -1, -1, 0x100, 0x10ffff },
1952 #if !UCONFIG_NO_LEGACY_CONVERSION
1953         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff },
1954         { "windows-1251", 0, 0x7f, 0x410, 0x44f, 0x3000, 0xd7ff },
1955         /* HZ test case fixed and moved to intltest's conversion.txt, ticket #6002 */
1956         { "shift-jis", 0x3041, 0x3093, 0x30a1, 0x30f3, 0x900, 0x1cff }
1957 #else
1958         { "UTF-8", 0, 0xd7ff, 0xe000, 0x10ffff, 0xd800, 0xdfff }
1959 #endif
1960     };
1961 
1962     /* open an empty set */
1963     set=uset_open(1, 0);
1964 
1965     count=ucnv_countAvailable();
1966     for(i=0; i<count; ++i) {
1967         errorCode=U_ZERO_ERROR;
1968         name=ucnv_getAvailableName(i);
1969         cnv=ucnv_open(name, &errorCode);
1970         if(U_FAILURE(errorCode)) {
1971             log_data_err("error: unable to open converter %s - %s\n",
1972                     name, u_errorName(errorCode));
1973             continue;
1974         }
1975 
1976         uset_clear(set);
1977         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
1978         if(U_FAILURE(errorCode)) {
1979             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
1980                     name, u_errorName(errorCode));
1981         } else if(uset_size(set)==0) {
1982             log_err("error: ucnv_getUnicodeSet(%s) returns an empty set\n", name);
1983         }
1984 
1985         ucnv_close(cnv);
1986     }
1987 
1988     /* test converters that are known to convert all of Unicode (except maybe for surrogates) */
1989     for(i=0; i<UPRV_LENGTHOF(completeSetNames); ++i) {
1990         errorCode=U_ZERO_ERROR;
1991         name=completeSetNames[i];
1992         cnv=ucnv_open(name, &errorCode);
1993         if(U_FAILURE(errorCode)) {
1994             log_data_err("error: unable to open converter %s - %s\n",
1995                     name, u_errorName(errorCode));
1996             continue;
1997         }
1998 
1999         uset_clear(set);
2000         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2001         if(U_FAILURE(errorCode)) {
2002             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2003                     name, u_errorName(errorCode));
2004         } else if(!uset_containsRange(set, 0, 0xd7ff) || !uset_containsRange(set, 0xe000, 0x10ffff)) {
2005             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set\n", name);
2006         }
2007 
2008         ucnv_close(cnv);
2009     }
2010 
2011 #if !UCONFIG_NO_LEGACY_CONVERSION
2012     /* test LMBCS variants which convert all of Unicode except for U+F6xx */
2013     for(i=0; i<UPRV_LENGTHOF(lmbcsNames); ++i) {
2014         errorCode=U_ZERO_ERROR;
2015         name=lmbcsNames[i];
2016         cnv=ucnv_open(name, &errorCode);
2017         if(U_FAILURE(errorCode)) {
2018             log_data_err("error: unable to open converter %s - %s\n",
2019                     name, u_errorName(errorCode));
2020             continue;
2021         }
2022 
2023         uset_clear(set);
2024         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2025         if(U_FAILURE(errorCode)) {
2026             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2027                     name, u_errorName(errorCode));
2028         } else if(!uset_containsRange(set, 0, 0xf5ff) || !uset_containsRange(set, 0xf700, 0x10ffff)) {
2029             log_err("error: ucnv_getUnicodeSet(%s) does not return an all-Unicode set (minus U+F6xx)\n", name);
2030         }
2031 
2032         ucnv_close(cnv);
2033     }
2034 #endif
2035 
2036     /* test specific sets */
2037     for(i=0; i<UPRV_LENGTHOF(nameRanges); ++i) {
2038         errorCode=U_ZERO_ERROR;
2039         name=nameRanges[i].name;
2040         cnv=ucnv_open(name, &errorCode);
2041         if(U_FAILURE(errorCode)) {
2042             log_data_err("error: unable to open converter %s - %s\n",
2043                          name, u_errorName(errorCode));
2044             continue;
2045         }
2046 
2047         uset_clear(set);
2048         ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2049         if(U_FAILURE(errorCode)) {
2050             log_err("error: ucnv_getUnicodeSet(%s) failed - %s\n",
2051                     name, u_errorName(errorCode));
2052         } else if(
2053             !uset_containsRange(set, nameRanges[i].start, nameRanges[i].end) ||
2054             (nameRanges[i].start2>=0 && !uset_containsRange(set, nameRanges[i].start2, nameRanges[i].end2))
2055         ) {
2056             log_err("error: ucnv_getUnicodeSet(%s) does not contain the expected ranges\n", name);
2057         } else if(nameRanges[i].notStart>=0) {
2058             /* simulate containsAny() with the C API */
2059             uset_complement(set);
2060             if(!uset_containsRange(set, nameRanges[i].notStart, nameRanges[i].notEnd)) {
2061                 log_err("error: ucnv_getUnicodeSet(%s) contains part of the unexpected range\n", name);
2062             }
2063         }
2064 
2065         ucnv_close(cnv);
2066     }
2067 
2068     errorCode = U_ZERO_ERROR;
2069     ucnv_getUnicodeSet(NULL, set, UCNV_ROUNDTRIP_SET, &errorCode);
2070     if (errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
2071         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2072     }
2073     errorCode = U_PARSE_ERROR;
2074     /* Make sure that it does nothing if an error is passed in. Difficult to proper test for. */
2075     ucnv_getUnicodeSet(NULL, NULL, UCNV_ROUNDTRIP_SET, &errorCode);
2076     if (errorCode != U_PARSE_ERROR) {
2077         log_err("error: ucnv_getUnicodeSet(NULL) returned wrong status code %s\n", u_errorName(errorCode));
2078     }
2079 
2080     uset_close(set);
2081 }
2082