• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2001-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File custrtrn.C
11 *
12 * Modification History:
13 *        Name                     Description
14 *        Ram                      String transformations test
15 *********************************************************************************
16 */
17 /****************************************************************************/
18 
19 
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ures.h"
27 #include "ustr_imp.h"
28 #include "cintltst.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "cwchar.h"
32 
33 void addUCharTransformTest(TestNode** root);
34 
35 static void Test_strToUTF32(void);
36 static void Test_strToUTF32_surrogates(void);
37 static void Test_strFromUTF32(void);
38 static void Test_strFromUTF32_surrogates(void);
39 static void Test_UChar_UTF8_API(void);
40 static void Test_FromUTF8(void);
41 static void Test_FromUTF8Lenient(void);
42 static void Test_UChar_WCHART_API(void);
43 static void Test_widestrs(void);
44 static void Test_WCHART_LongString(void);
45 static void Test_strToJavaModifiedUTF8(void);
46 static void Test_strFromJavaModifiedUTF8(void);
47 static void TestNullEmptySource(void);
48 
49 void
addUCharTransformTest(TestNode ** root)50 addUCharTransformTest(TestNode** root)
51 {
52    addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
53    addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
54    addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
55    addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
56    addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
57    addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
58    addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
59    addTest(root, &Test_UChar_WCHART_API,  "custrtrn/Test_UChar_WCHART_API");
60    addTest(root, &Test_widestrs,  "custrtrn/Test_widestrs");
61 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
62    addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
63 #endif
64    addTest(root, &Test_strToJavaModifiedUTF8,  "custrtrn/Test_strToJavaModifiedUTF8");
65    addTest(root, &Test_strFromJavaModifiedUTF8,  "custrtrn/Test_strFromJavaModifiedUTF8");
66    addTest(root, &TestNullEmptySource,  "custrtrn/TestNullEmptySource");
67 }
68 
69 static const UChar32 src32[]={
70     0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
71     0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
72     0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
73     0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
74     0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
75     0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
76     0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
77     0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
78     0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
79     0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
80     0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
81     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
82     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
83     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
84     0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
85     0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
86     0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
87     0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
88     0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
89     /* test non-BMP code points */
90     0x0002A699,
91     0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
92     0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
93     0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
94     0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
95     0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
96 
97     0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
98     0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
99     0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
100     0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
101     0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
102 };
103 
104 static const UChar src16[] = {
105     0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
106     0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
107     0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
108     0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
109     0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
110     0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
111     0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
112     0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
113     0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
114     0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
115     0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
116     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
117     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
118     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
119     0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
120     0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
121     0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
122     0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
123     0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
124 
125     /* test non-BMP code points */
126     0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
127     0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
128     0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
129     0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
130     0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
131     0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
132     0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
133     0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
134     0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
135     0xD869, 0xDED5,
136 
137     0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
138     0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
139     0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
140     0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
141     0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
142 };
143 
144 
Test_strToUTF32(void)145 static void Test_strToUTF32(void){
146     UErrorCode err = U_ZERO_ERROR;
147     UChar32 u32Target[400];
148     int32_t u32DestLen;
149     int i= 0;
150 
151     /* first with length */
152     u32DestLen = -2;
153     u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
154     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
155         log_err("u_strToUTF32(preflight with length): "
156                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
157                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
158         return;
159     }
160     err = U_ZERO_ERROR;
161     u32DestLen = -2;
162     u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
163     if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
164         log_err("u_strToUTF32(with length): "
165                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
166                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
167         return;
168     }
169     /*for(i=0; i< u32DestLen; i++){
170         printf("0x%08X, ",uTarget[i]);
171         if(i%10==0){
172             printf("\n");
173         }
174     }*/
175     for(i=0; i< UPRV_LENGTHOF(src32); i++){
176         if(u32Target[i] != src32[i]){
177             log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
178         }
179     }
180     if(u32Target[i] != 0){
181         log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
182     }
183 
184     /* now NUL-terminated */
185     u32DestLen = -2;
186     u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
187     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
188         log_err("u_strToUTF32(preflight with NUL-termination): "
189                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
190                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
191         return;
192     }
193     err = U_ZERO_ERROR;
194     u32DestLen = -2;
195     u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
196     if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
197         log_err("u_strToUTF32(with NUL-termination): "
198                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
199                 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
200         return;
201     }
202 
203     for(i=0; i< UPRV_LENGTHOF(src32); i++){
204         if(u32Target[i] != src32[i]){
205             log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
206         }
207     }
208 }
209 
210 /* test unpaired surrogates */
Test_strToUTF32_surrogates()211 static void Test_strToUTF32_surrogates() {
212     UErrorCode err = U_ZERO_ERROR;
213     UChar32 u32Target[400];
214     int32_t len16, u32DestLen;
215     int32_t numSubstitutions;
216     int i;
217 
218     static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
219     static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
220     static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
221     static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
222     len16 = UPRV_LENGTHOF(surr16);
223     for(i = 0; i < 4; ++i) {
224         err = U_ZERO_ERROR;
225         u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
226         if(err != U_INVALID_CHAR_FOUND) {
227             log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
228                     (long)i, u_errorName(err));
229             return;
230         }
231 
232         err = U_ZERO_ERROR;
233         u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
234         if(err != U_INVALID_CHAR_FOUND) {
235             log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
236                     (long)i, u_errorName(err));
237             return;
238         }
239 
240         err = U_ZERO_ERROR;
241         u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
242         if(err != U_INVALID_CHAR_FOUND) {
243             log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
244                     (long)i, u_errorName(err));
245             return;
246         }
247 
248         err = U_ZERO_ERROR;
249         u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
250         if(err != U_INVALID_CHAR_FOUND) {
251             log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
252                     (long)i, u_errorName(err));
253             return;
254         }
255     }
256 
257     err = U_ZERO_ERROR;
258     u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
259     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
260         log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
261                 u_errorName(err));
262         return;
263     }
264 
265     err = U_ZERO_ERROR;
266     u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
267     if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
268         log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
269                 u_errorName(err));
270         return;
271     }
272 
273     err = U_ZERO_ERROR;
274     u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
275     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
276         log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
277                 u_errorName(err));
278         return;
279     }
280 
281     err = U_ZERO_ERROR;
282     u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
283     if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
284         log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
285                 u_errorName(err));
286         return;
287     }
288 
289     /* with substitution character */
290     numSubstitutions = -1;
291     err = U_ZERO_ERROR;
292     u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
293     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
294         log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
295                 u_errorName(err));
296         return;
297     }
298 
299     err = U_ZERO_ERROR;
300     u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
301     if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
302         log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
303                 u_errorName(err));
304         return;
305     }
306 
307     err = U_ZERO_ERROR;
308     u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
309     if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
310         log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
311                 u_errorName(err));
312         return;
313     }
314 
315     err = U_ZERO_ERROR;
316     u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
317     if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
318         log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
319                 u_errorName(err));
320         return;
321     }
322 }
323 
Test_strFromUTF32(void)324 static void Test_strFromUTF32(void){
325     UErrorCode err = U_ZERO_ERROR;
326     UChar uTarget[400];
327     int32_t uDestLen;
328     int i= 0;
329 
330     /* first with length */
331     uDestLen = -2;
332     u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
333     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
334         log_err("u_strFromUTF32(preflight with length): "
335                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
336                 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
337         return;
338     }
339     err = U_ZERO_ERROR;
340     uDestLen = -2;
341     u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
342     if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
343         log_err("u_strFromUTF32(with length): "
344                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
345                 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
346         return;
347     }
348     /*for(i=0; i< uDestLen; i++){
349         printf("0x%04X, ",uTarget[i]);
350         if(i%10==0){
351             printf("\n");
352         }
353     }*/
354 
355     for(i=0; i< uDestLen; i++){
356         if(uTarget[i] != src16[i]){
357             log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
358         }
359     }
360     if(uTarget[i] != 0){
361         log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
362     }
363 
364     /* now NUL-terminated */
365     uDestLen = -2;
366     u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
367     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
368         log_err("u_strFromUTF32(preflight with NUL-termination): "
369                 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
370                 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
371         return;
372     }
373     err = U_ZERO_ERROR;
374     uDestLen = -2;
375     u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
376     if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
377         log_err("u_strFromUTF32(with NUL-termination): "
378                 "length %ld != %ld and %s != U_ZERO_ERROR\n",
379                 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
380         return;
381     }
382 
383     for(i=0; i< uDestLen; i++){
384         if(uTarget[i] != src16[i]){
385             log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
386         }
387     }
388 }
389 
390 /* test surrogate code points */
Test_strFromUTF32_surrogates()391 static void Test_strFromUTF32_surrogates() {
392     UErrorCode err = U_ZERO_ERROR;
393     UChar uTarget[400];
394     int32_t len32, uDestLen;
395     int32_t numSubstitutions;
396     int i;
397 
398     static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
399     static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400     static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
401     static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
402                                             0x5a, 0xd900, 0xdc00, 0x7a, 0 };
403     len32 = UPRV_LENGTHOF(surr32);
404     for(i = 0; i < 6; ++i) {
405         err = U_ZERO_ERROR;
406         u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
407         if(err != U_INVALID_CHAR_FOUND) {
408             log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
409                     (long)i, u_errorName(err));
410             return;
411         }
412 
413         err = U_ZERO_ERROR;
414         u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
415         if(err != U_INVALID_CHAR_FOUND) {
416             log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
417                     (long)i, u_errorName(err));
418             return;
419         }
420 
421         err = U_ZERO_ERROR;
422         u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
423         if(err != U_INVALID_CHAR_FOUND) {
424             log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
425                     (long)i, u_errorName(err));
426             return;
427         }
428 
429         err = U_ZERO_ERROR;
430         u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
431         if(err != U_INVALID_CHAR_FOUND) {
432             log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
433                     (long)i, u_errorName(err));
434             return;
435         }
436     }
437 
438     err = U_ZERO_ERROR;
439     u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
440     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
441         log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
442                 u_errorName(err));
443         return;
444     }
445 
446     err = U_ZERO_ERROR;
447     u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
448     if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
449         log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
450                 u_errorName(err));
451         return;
452     }
453 
454     err = U_ZERO_ERROR;
455     u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
456     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
457         log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
458                 u_errorName(err));
459         return;
460     }
461 
462     err = U_ZERO_ERROR;
463     u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
464     if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
465         log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
466                 u_errorName(err));
467         return;
468     }
469 
470     /* with substitution character */
471     numSubstitutions = -1;
472     err = U_ZERO_ERROR;
473     u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
474     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
475         log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
476                 u_errorName(err));
477         return;
478     }
479 
480     err = U_ZERO_ERROR;
481     u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
482     if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
483         log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
484                 u_errorName(err));
485         return;
486     }
487 
488     err = U_ZERO_ERROR;
489     u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
490     if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
491         log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
492                 u_errorName(err));
493         return;
494     }
495 
496     err = U_ZERO_ERROR;
497     u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
498     if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
499         log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
500                 u_errorName(err));
501         return;
502     }
503 }
504 
Test_UChar_UTF8_API(void)505 static void Test_UChar_UTF8_API(void){
506 
507     UErrorCode err = U_ZERO_ERROR;
508     UChar uTemp[1];
509     char u8Temp[1];
510     UChar* uTarget=uTemp;
511     const char* u8Src;
512     int32_t u8SrcLen = 0;
513     int32_t uTargetLength = 0;
514     int32_t uDestLen=0;
515     const UChar* uSrc = src16;
516     int32_t uSrcLen   = sizeof(src16)/2;
517     char* u8Target = u8Temp;
518     int32_t u8TargetLength =0;
519     int32_t u8DestLen =0;
520     UBool failed = false;
521     int i= 0;
522     int32_t numSubstitutions;
523 
524     {
525         /* preflight */
526         u8Temp[0] = 0x12;
527         u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
528         if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
529             err = U_ZERO_ERROR;
530             u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
531             u8TargetLength = u8DestLen;
532 
533             u8Target[u8TargetLength] = (char)0xfe;
534             u8DestLen = -1;
535             u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
536             if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
537                 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
538                 return;
539             }
540 
541         }
542         else {
543             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
544         }
545         failed = false;
546         /*for(i=0; i< u8DestLen; i++){
547             printf("0x%04X, ",u8Target[i]);
548             if(i%10==0){
549                 printf("\n");
550             }
551         }*/
552         /*for(i=0; i< u8DestLen; i++){
553             if(u8Target[i] != src8[i]){
554                 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
555                 failed =true;
556             }
557         }
558         if(failed){
559             log_err("u_strToUTF8() failed \n");
560         }*/
561         u8Src = u8Target;
562         u8SrcLen = u8DestLen;
563 
564         /* preflight */
565         uTemp[0] = 0x1234;
566         u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
567         if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
568             err = U_ZERO_ERROR;
569             uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
570             uTargetLength =  uDestLen;
571 
572             uTarget[uTargetLength] = 0xfff0;
573             uDestLen = -1;
574             u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
575         }
576         else {
577             log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
578         }
579         /*for(i=0; i< uDestLen; i++){
580             printf("0x%04X, ",uTarget[i]);
581             if(i%10==0){
582                 printf("\n");
583             }
584         }*/
585 
586         if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
587             failed = true;
588         }
589         for(i=0; i< uSrcLen; i++){
590             if(uTarget[i] != src16[i]){
591                 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
592                 failed =true;
593             }
594         }
595         if(failed){
596             log_err("error: u_strFromUTF8(after preflighting) failed\n");
597         }
598 
599         free(u8Target);
600         free(uTarget);
601     }
602     {
603         u8SrcLen = -1;
604         uTargetLength = 0;
605         uSrcLen =-1;
606         u8TargetLength=0;
607         failed = false;
608         /* preflight */
609         u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
610         if(err == U_BUFFER_OVERFLOW_ERROR){
611             err = U_ZERO_ERROR;
612             u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
613             u8TargetLength = u8DestLen;
614 
615             u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
616 
617         }
618         else {
619             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
620         }
621         failed = false;
622         /*for(i=0; i< u8DestLen; i++){
623             printf("0x%04X, ",u8Target[i]);
624             if(i%10==0){
625                 printf("\n");
626             }
627         }*/
628         /*for(i=0; i< u8DestLen; i++){
629             if(u8Target[i] != src8[i]){
630                 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
631                 failed =true;
632             }
633         }
634         if(failed){
635             log_err("u_strToUTF8() failed \n");
636         }*/
637         u8Src = u8Target;
638         u8SrcLen = u8DestLen;
639 
640         /* preflight */
641         u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
642         if(err == U_BUFFER_OVERFLOW_ERROR){
643             err = U_ZERO_ERROR;
644             uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
645             uTargetLength =  uDestLen;
646 
647             u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
648         }
649         else {
650             log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
651         }
652         /*for(i=0; i< uDestLen; i++){
653             printf("0x%04X, ",uTarget[i]);
654             if(i%10==0){
655                 printf("\n");
656             }
657         }*/
658 
659         for(i=0; i< uSrcLen; i++){
660             if(uTarget[i] != src16[i]){
661                 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
662                 failed =true;
663             }
664         }
665         if(failed){
666             log_err("u_strToUTF8() failed \n");
667         }
668 
669         free(u8Target);
670         free(uTarget);
671     }
672 
673     /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
674     // Since ICU 60, each surrogate byte sequence is treated as 3 single-byte errors.
675     {
676         static const UChar
677             withLead16[]={ 0x1800, 0xd89a, 0x0061 },
678             withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
679             withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0xfffd, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
680             withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0xd900, 0xdc05, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
681         static const uint8_t
682             withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
683             withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
684             withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
685             withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
686         UChar out16[10];
687         char out8[10];
688 
689         if(
690             (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
691             (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
692             (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
693             (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
694         ) {
695             log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
696         }
697 
698         /* test error handling with substitution characters */
699 
700         /* from UTF-8 with length */
701         err=U_ZERO_ERROR;
702         numSubstitutions=-1;
703         out16[0]=0x55aa;
704         uDestLen=0;
705         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
706                              (const char *)withTrail8, (int32_t)uprv_strlen((const char *)withTrail8),
707                              0x50005, &numSubstitutions,
708                              &err);
709         if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
710                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
711                              numSubstitutions!=3) {
712             log_err("error: u_strFromUTF8WithSub(length) failed\n");
713         }
714 
715         /* from UTF-8 with NUL termination */
716         err=U_ZERO_ERROR;
717         numSubstitutions=-1;
718         out16[0]=0x55aa;
719         uDestLen=0;
720         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
721                              (const char *)withTrail8, -1,
722                              0xfffd, &numSubstitutions,
723                              &err);
724         if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
725                              0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
726                              numSubstitutions!=3) {
727             log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
728         }
729 
730         /* preflight from UTF-8 with NUL termination */
731         err=U_ZERO_ERROR;
732         numSubstitutions=-1;
733         out16[0]=0x55aa;
734         uDestLen=0;
735         u_strFromUTF8WithSub(out16, 1, &uDestLen,
736                              (const char *)withTrail8, -1,
737                              0x50005, &numSubstitutions,
738                              &err);
739         if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=3) {
740             log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
741         }
742 
743         /* to UTF-8 with length */
744         err=U_ZERO_ERROR;
745         numSubstitutions=-1;
746         out8[0]=(char)0xf5;
747         u8DestLen=0;
748         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
749                            withTrail16, u_strlen(withTrail16),
750                            0xfffd, &numSubstitutions,
751                            &err);
752         if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
753                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
754                              numSubstitutions!=1) {
755             log_err("error: u_strToUTF8WithSub(length) failed\n");
756         }
757 
758         /* to UTF-8 with NUL termination */
759         err=U_ZERO_ERROR;
760         numSubstitutions=-1;
761         out8[0]=(char)0xf5;
762         u8DestLen=0;
763         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
764                            withTrail16, -1,
765                            0x1a, &numSubstitutions,
766                            &err);
767         if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8Sub1A) ||
768                              0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
769                              numSubstitutions!=1) {
770             log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
771         }
772 
773         /* preflight to UTF-8 with NUL termination */
774         err=U_ZERO_ERROR;
775         numSubstitutions=-1;
776         out8[0]=(char)0xf5;
777         u8DestLen=0;
778         u_strToUTF8WithSub(out8, 1, &u8DestLen,
779                            withTrail16, -1,
780                            0xfffd, &numSubstitutions,
781                            &err);
782         if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
783                                            numSubstitutions!=1) {
784             log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
785         }
786 
787         /* test that numSubstitutions==0 if there are no substitutions */
788 
789         /* from UTF-8 with length (just first 3 bytes which are valid) */
790         err=U_ZERO_ERROR;
791         numSubstitutions=-1;
792         out16[0]=0x55aa;
793         uDestLen=0;
794         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
795                              (const char *)withTrail8, 3,
796                              0x50005, &numSubstitutions,
797                              &err);
798         if(U_FAILURE(err) || uDestLen!=1 ||
799                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
800                              numSubstitutions!=0) {
801             log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
802         }
803 
804         /* to UTF-8 with length (just first UChar which is valid) */
805         err=U_ZERO_ERROR;
806         numSubstitutions=-1;
807         out8[0]=(char)0xf5;
808         u8DestLen=0;
809         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
810                            withTrail16, 1,
811                            0xfffd, &numSubstitutions,
812                            &err);
813         if(U_FAILURE(err) || u8DestLen!=3 ||
814                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
815                              numSubstitutions!=0) {
816             log_err("error: u_strToUTF8WithSub(no subs) failed\n");
817         }
818 
819         /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
820 
821         /* from UTF-8 with length (just first 3 bytes which are valid) */
822         err=U_ZERO_ERROR;
823         numSubstitutions=-1;
824         out16[0]=0x55aa;
825         uDestLen=0;
826         u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
827                              (const char *)withTrail8, 3,
828                              U_SENTINEL, &numSubstitutions,
829                              &err);
830         if(U_FAILURE(err) || uDestLen!=1 ||
831                              0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
832                              numSubstitutions!=0) {
833             log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
834         }
835 
836         /* to UTF-8 with length (just first UChar which is valid) */
837         err=U_ZERO_ERROR;
838         numSubstitutions=-1;
839         out8[0]=(char)0xf5;
840         u8DestLen=0;
841         u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
842                            withTrail16, 1,
843                            U_SENTINEL, &numSubstitutions,
844                            &err);
845         if(U_FAILURE(err) || u8DestLen!=3 ||
846                              0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
847                              numSubstitutions!=0) {
848             log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
849         }
850     }
851     {
852         /*
853          * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
854          * See ticket #10371.
855          */
856         static const char src[1]={ (char)0xf8 };
857         UChar out16[10];
858         err=U_ZERO_ERROR;
859         u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
860         if(err!=U_INVALID_CHAR_FOUND) {
861             log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
862         }
863     }
864 }
865 
866 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
867 static UBool
equalAnyFFFD(const UChar * s,const UChar * t,int32_t length)868 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
869     UChar c1, c2;
870 
871     while(length>0) {
872         c1=*s++;
873         c2=*t++;
874         if(c1!=c2 && c2!=0xfffd) {
875             return false;
876         }
877         --length;
878     }
879     return true;
880 }
881 
882 /* test u_strFromUTF8Lenient() */
883 static void
Test_FromUTF8(void)884 Test_FromUTF8(void) {
885     /*
886      * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
887      */
888     static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
889     UChar dest[64];
890     UChar *destPointer;
891     int32_t destLength;
892     UErrorCode errorCode;
893 
894     /* 3 bytes input, one UChar output (U+095C) */
895     errorCode=U_ZERO_ERROR;
896     destLength=-99;
897     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
898     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
899         log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
900                 (long)destLength, u_errorName(errorCode));
901     }
902 
903     /* 4 bytes input, two UChars output (U+095C U+0000) */
904     errorCode=U_ZERO_ERROR;
905     destLength=-99;
906     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
907     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
908         log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
909                 (long)destLength, u_errorName(errorCode));
910     }
911 
912     /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
913     errorCode=U_ZERO_ERROR;
914     destLength=-99;
915     destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
916     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
917         log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
918                 (long)destLength, u_errorName(errorCode));
919     }
920 
921     /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
922     errorCode=U_ZERO_ERROR;
923     dest[0]=dest[1]=99;
924     destLength=-99;
925     destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
926     if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
927         log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
928                 (long)destLength, u_errorName(errorCode));
929     }
930 }
931 
932 /* test u_strFromUTF8Lenient() */
933 static void
Test_FromUTF8Lenient(void)934 Test_FromUTF8Lenient(void) {
935     /*
936      * Multiple input strings, each NUL-terminated.
937      * Terminate with a string starting with 0xff.
938      */
939     static const uint8_t bytes[]={
940         /* well-formed UTF-8 */
941         0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80,  0xf0, 0xa0, 0x80, 0x80,
942         0x62,  0xc3, 0xa0,  0xe0, 0xa0, 0x81,  0xf0, 0xa0, 0x80, 0x81, 0,
943 
944         /* various malformed sequences */
945         0xc3, 0xc3, 0x9f,  0xc3, 0xa0,  0xe0, 0x80, 0x8a,  0xf0, 0x41, 0x42, 0x43, 0,
946 
947         /* truncated input */
948         0xc3, 0,
949         0xe0, 0,
950         0xe0, 0xa0, 0,
951         0xf0, 0,
952         0xf0, 0x90, 0,
953         0xf0, 0x90, 0x80, 0,
954 
955         /* non-ASCII characters in the last few bytes */
956         0x61,  0xc3, 0x9f,  0xe0, 0xa0, 0x80, 0,
957         0x61,  0xe0, 0xa0, 0x80,  0xc3, 0x9f, 0,
958 
959         /* empty string */
960         0,
961 
962         /* finish */
963         0xff, 0
964     };
965 
966     /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
967     static const UChar uchars[]={
968         0x61, 0xdf, 0x800,  0xd840, 0xdc00,
969         0x62, 0xe0, 0x801,  0xd840, 0xdc01,  0,
970 
971         0xfffd, 0x9f, 0xe0, 0xa,  0xfffd, 0xfffd,  0,
972 
973         0xfffd, 0,
974         0xfffd, 0,
975         0xfffd, 0,
976         0xfffd, 0,
977         0xfffd, 0,
978         0xfffd, 0,
979 
980         0x61, 0xdf, 0x800,  0,
981         0x61, 0x800, 0xdf,  0,
982 
983         0,
984 
985         0
986     };
987 
988     UChar dest[64];
989     const char *pb;
990     const UChar *pu, *pDest;
991     int32_t srcLength, destLength0, destLength;
992     int number;
993     UErrorCode errorCode;
994 
995     /* verify checking for some illegal arguments */
996     dest[0]=0x1234;
997     destLength=-1;
998     errorCode=U_ZERO_ERROR;
999     pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
1000     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
1001         log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1002     }
1003 
1004     dest[0]=0x1234;
1005     destLength=-1;
1006     errorCode=U_ZERO_ERROR;
1007     pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1008     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1009         log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1010     }
1011 
1012     dest[0]=0x1234;
1013     destLength=-1;
1014     errorCode=U_MEMORY_ALLOCATION_ERROR;
1015     pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1016     if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1017         log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1018     }
1019 
1020     /* test normal behavior */
1021     number=0; /* string number for log_err() */
1022 
1023     for(pb=(const char *)bytes, pu=uchars;
1024         *pb!=(char)0xff;
1025         pb+=srcLength+1, pu+=destLength0+1, ++number
1026     ) {
1027         srcLength=(int32_t)uprv_strlen(pb);
1028         destLength0=u_strlen(pu);
1029 
1030         /* preflighting with NUL-termination */
1031         dest[0]=0x1234;
1032         destLength=-1;
1033         errorCode=U_ZERO_ERROR;
1034         pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1035         if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1036             pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1037         ) {
1038             log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1039         }
1040 
1041         /* preflighting/some capacity with NUL-termination */
1042         if(srcLength>0) {
1043             dest[destLength0-1]=0x1234;
1044             destLength=-1;
1045             errorCode=U_ZERO_ERROR;
1046             pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1047             if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1048                 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1049             ) {
1050                 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1051             }
1052         }
1053 
1054         /* conversion with NUL-termination, much capacity */
1055         dest[0]=dest[destLength0]=0x1234;
1056         destLength=-1;
1057         errorCode=U_ZERO_ERROR;
1058         pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1059         if (errorCode!=U_ZERO_ERROR ||
1060             pDest!=dest || dest[destLength0]!=0 ||
1061             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1062         ) {
1063             log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1064         }
1065 
1066         /* conversion with NUL-termination, exact capacity */
1067         dest[0]=dest[destLength0]=0x1234;
1068         destLength=-1;
1069         errorCode=U_ZERO_ERROR;
1070         pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1071         if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1072             pDest!=dest || dest[destLength0]!=0x1234 ||
1073             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1074         ) {
1075             log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1076         }
1077 
1078         /* preflighting with length */
1079         dest[0]=0x1234;
1080         destLength=-1;
1081         errorCode=U_ZERO_ERROR;
1082         pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1083         if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1084             pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1085         ) {
1086             log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1087         }
1088 
1089         /* preflighting/some capacity with length */
1090         if(srcLength>0) {
1091             dest[srcLength-1]=0x1234;
1092             destLength=-1;
1093             errorCode=U_ZERO_ERROR;
1094             pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1095             if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1096                 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1097             ) {
1098                 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1099             }
1100         }
1101 
1102         /* conversion with length, much capacity */
1103         dest[0]=dest[destLength0]=0x1234;
1104         destLength=-1;
1105         errorCode=U_ZERO_ERROR;
1106         pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1107         if (errorCode!=U_ZERO_ERROR ||
1108             pDest!=dest || dest[destLength0]!=0 ||
1109             destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1110         ) {
1111             log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1112         }
1113 
1114         /* conversion with length, srcLength capacity */
1115         dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1116         destLength=-1;
1117         errorCode=U_ZERO_ERROR;
1118         pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1119         if(srcLength==destLength0) {
1120             if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1121                 pDest!=dest || dest[destLength0]!=0x1234 ||
1122                 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1123             ) {
1124                 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1125             }
1126         } else {
1127             if (errorCode!=U_ZERO_ERROR ||
1128                 pDest!=dest || dest[destLength0]!=0 ||
1129                 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1130             ) {
1131                 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1132             }
1133         }
1134     }
1135 }
1136 
1137 static const uint16_t src16j[] = {
1138     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1139     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1140     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1141     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1142     0x0000,
1143     /* Test only ASCII */
1144 
1145 };
1146 static const uint16_t src16WithNulls[] = {
1147     0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1148     0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1149     0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1150     0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1151     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1152     0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1153     0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1154     0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1155     /* test only ASCII */
1156     /*
1157     0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1158     0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1159     0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1160     0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1161     0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1162     0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1163     0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1164     0x0054, 0x0000 */
1165 
1166 };
Test_UChar_WCHART_API(void)1167 static void Test_UChar_WCHART_API(void){
1168 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1169     UErrorCode err = U_ZERO_ERROR;
1170     const UChar* uSrc = src16j;
1171     int32_t uSrcLen = sizeof(src16j)/2;
1172     wchar_t* wDest = NULL;
1173     int32_t wDestLen = 0;
1174     int32_t reqLen= 0 ;
1175     UBool failed = false;
1176     UChar* uDest = NULL;
1177     int32_t uDestLen = 0;
1178     int i =0;
1179     {
1180         /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1181         if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1182             log_err("u_strFromWCS() should return NULL with a bad argument\n");
1183         }
1184         if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1185             log_err("u_strToWCS() should return NULL with a bad argument\n");
1186         }
1187 
1188         /* NULL source & destination. */
1189         err = U_ZERO_ERROR;
1190         u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1191         if (err != U_STRING_NOT_TERMINATED_WARNING) {
1192             log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1193         }
1194         err = U_ZERO_ERROR;
1195         u_strToWCS(NULL,0,NULL,NULL,0,&err);
1196         if (err != U_STRING_NOT_TERMINATED_WARNING) {
1197             log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1198         }
1199         err = U_ZERO_ERROR;
1200 
1201         /* pre-flight*/
1202         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1203 
1204         if(err == U_BUFFER_OVERFLOW_ERROR){
1205             err=U_ZERO_ERROR;
1206             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1207             wDestLen = reqLen+1;
1208             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1209         }
1210 
1211         /* pre-flight */
1212         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1213 
1214 
1215         if(err == U_BUFFER_OVERFLOW_ERROR){
1216             err =U_ZERO_ERROR;
1217             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1218             uDestLen = reqLen + 1;
1219             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1220         }else if(U_FAILURE(err)){
1221 
1222             log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1223             return;
1224         }
1225 
1226         for(i=0; i< uSrcLen; i++){
1227             if(uDest[i] != src16j[i]){
1228                 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1229                 failed =true;
1230             }
1231         }
1232 
1233         if(U_FAILURE(err)){
1234             failed = true;
1235         }
1236         if(failed){
1237             log_err("u_strToWCS() failed \n");
1238         }
1239         free(wDest);
1240         free(uDest);
1241 
1242 
1243         /* test with embedded nulls */
1244         uSrc = src16WithNulls;
1245         uSrcLen = sizeof(src16WithNulls)/2;
1246         wDestLen =0;
1247         uDestLen =0;
1248         wDest = NULL;
1249         uDest = NULL;
1250         /* pre-flight*/
1251         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1252 
1253         if(err == U_BUFFER_OVERFLOW_ERROR){
1254             err=U_ZERO_ERROR;
1255             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1256             wDestLen = reqLen+1;
1257             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1258         }
1259 
1260         /* pre-flight */
1261         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1262 
1263         if(err == U_BUFFER_OVERFLOW_ERROR){
1264             err =U_ZERO_ERROR;
1265             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1266             uDestLen = reqLen + 1;
1267             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1268         }
1269 
1270         if(!U_FAILURE(err)) {
1271          for(i=0; i< uSrcLen; i++){
1272             if(uDest[i] != src16WithNulls[i]){
1273                 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1274                 failed =true;
1275             }
1276          }
1277         }
1278 
1279         if(U_FAILURE(err)){
1280             failed = true;
1281         }
1282         if(failed){
1283             log_err("u_strToWCS() failed \n");
1284         }
1285         free(wDest);
1286         free(uDest);
1287 
1288     }
1289 
1290     {
1291 
1292         uSrc = src16j;
1293         uSrcLen = sizeof(src16j)/2;
1294         wDestLen =0;
1295         uDestLen =0;
1296         wDest = NULL;
1297         uDest = NULL;
1298         wDestLen = 0;
1299         /* pre-flight*/
1300         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1301 
1302         if(err == U_BUFFER_OVERFLOW_ERROR){
1303             err=U_ZERO_ERROR;
1304             wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1305             wDestLen = reqLen+1;
1306             u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1307         }
1308         uDestLen = 0;
1309         /* pre-flight */
1310         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1311 
1312         if(err == U_BUFFER_OVERFLOW_ERROR){
1313             err =U_ZERO_ERROR;
1314             uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1315             uDestLen = reqLen + 1;
1316             u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1317         }
1318 
1319 
1320         if(!U_FAILURE(err)) {
1321          for(i=0; i< uSrcLen; i++){
1322             if(uDest[i] != src16j[i]){
1323                 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1324                 failed =true;
1325             }
1326          }
1327         }
1328 
1329         if(U_FAILURE(err)){
1330             failed = true;
1331         }
1332         if(failed){
1333             log_err("u_strToWCS() failed \n");
1334         }
1335         free(wDest);
1336         free(uDest);
1337     }
1338 
1339     /*
1340      * Test u_terminateWChars().
1341      * All u_terminateXYZ() use the same implementation macro;
1342      * we test this function to improve API coverage.
1343      */
1344     {
1345         wchar_t buffer[10];
1346 
1347         err=U_ZERO_ERROR;
1348         buffer[3]=0x20ac;
1349         wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1350         if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1351             log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1352                     u_errorName(err), wDestLen, buffer[3]);
1353         }
1354 
1355         err=U_ZERO_ERROR;
1356         buffer[3]=0x20ac;
1357         wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1358         if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1359             log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1360                     u_errorName(err), wDestLen, buffer[3]);
1361         }
1362 
1363         err=U_STRING_NOT_TERMINATED_WARNING;
1364         buffer[3]=0x20ac;
1365         wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1366         if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1367             log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1368                     u_errorName(err), wDestLen, buffer[3]);
1369         }
1370 
1371         err=U_ZERO_ERROR;
1372         buffer[3]=0x20ac;
1373         wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1374         if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1375             log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1376                     u_errorName(err), wDestLen, buffer[3]);
1377         }
1378     }
1379 #else
1380     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1381 #endif
1382 }
1383 
Test_widestrs()1384 static void Test_widestrs()
1385 {
1386 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1387         wchar_t ws[100];
1388         UChar rts[100];
1389         int32_t wcap = UPRV_LENGTHOF(ws);
1390         int32_t wl;
1391         int32_t rtcap = UPRV_LENGTHOF(rts);
1392         int32_t rtl;
1393         wchar_t *wcs;
1394         UChar *cp;
1395         const char *errname;
1396         UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1397         int32_t ul = UPRV_LENGTHOF(ustr) -1;
1398         char astr[100];
1399 
1400         UErrorCode err;
1401 
1402         err = U_ZERO_ERROR;
1403         wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1404         if (U_FAILURE(err)) {
1405                 errname = u_errorName(err);
1406                 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1407         }
1408         if(ul!=wl){
1409             log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1410         }
1411         err = U_ZERO_ERROR;
1412         wl = (int32_t)uprv_wcslen(wcs);
1413         cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1414         (void)cp;    /* Suppress set but not used warning. */
1415         if (U_FAILURE(err)) {
1416                 errname = u_errorName(err);
1417                 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1418         }
1419         if(wl != rtl){
1420             log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1421         }
1422 #else
1423     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1424 #endif
1425 }
1426 
1427 static void
Test_WCHART_LongString()1428 Test_WCHART_LongString(){
1429 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1430     UErrorCode status = U_ZERO_ERROR;
1431     const char* testdatapath=loadTestData(&status);
1432     UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1433     int32_t strLen =0;
1434     const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1435     const UChar* uSrc = str;
1436     int32_t uSrcLen = strLen;
1437     int32_t wDestLen =0, reqLen=0, i=0;
1438     int32_t uDestLen =0;
1439     wchar_t* wDest = NULL;
1440     UChar* uDest = NULL;
1441     UBool failed = false;
1442 
1443     log_verbose("Loaded string of %d UChars\n", uSrcLen);
1444 
1445     if(U_FAILURE(status)){
1446         log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1447         return;
1448     }
1449 
1450     /* pre-flight*/
1451     u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1452 
1453     if(status == U_BUFFER_OVERFLOW_ERROR){
1454         status=U_ZERO_ERROR;
1455         wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1456         wDestLen = reqLen+1;
1457         u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1458         log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1459     }
1460 
1461     {
1462       int j;
1463       for(j=0;j>=0&&j<reqLen;j++) {
1464         if(wDest[j]!=uSrc[j]) {
1465           log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1466           break;
1467         }
1468       }
1469     }
1470 
1471     uDestLen = 0;
1472     /* pre-flight */
1473     u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1474     if(status == U_BUFFER_OVERFLOW_ERROR){
1475         status =U_ZERO_ERROR;
1476         uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1477         u_memset(uDest,0xFFFF,reqLen+1);
1478         uDestLen = reqLen + 1;
1479         u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1480         log_verbose("Back to %d UChars\n", reqLen);
1481     }
1482 #if defined(U_WCHAR_IS_UTF16)
1483     log_verbose("U_WCHAR_IS_UTF16\n");
1484 #elif defined(U_WCHAR_IS_UTF32)
1485     log_verbose("U_WCHAR_IS_UTF32\n");
1486 #else
1487     log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1488 #endif
1489 
1490     if(reqLen!=uSrcLen) {
1491         log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1492     }
1493 
1494     for(i=0; i< uSrcLen; i++){
1495         if(uDest[i] != str[i]){
1496             log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1497             failed =true;
1498         }
1499     }
1500 
1501     if(U_FAILURE(status)){
1502         failed = true;
1503     }
1504     if(failed){
1505         log_err("u_strToWCS() failed \n");
1506     }
1507     free(wDest);
1508     free(uDest);
1509     /* close the bundle */
1510     ures_close(theBundle);
1511 #else
1512     log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1513 #endif
1514 }
1515 
Test_strToJavaModifiedUTF8()1516 static void Test_strToJavaModifiedUTF8() {
1517     static const UChar src[]={
1518         0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1519         0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1520         0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1521         0xdbff, 0xdfff,
1522         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1523     };
1524     static const uint8_t expected[]={
1525         0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1526         0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1527         0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1528         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1529         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1530         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1531     };
1532     static const UChar shortSrc[]={
1533         0xe01, 0xe1, 0x61
1534     };
1535     static const uint8_t shortExpected[]={
1536         0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1537     };
1538     static const UChar asciiNul[]={
1539         0x61, 0x62, 0x63, 0
1540     };
1541     static const uint8_t asciiNulExpected[]={
1542         0x61, 0x62, 0x63
1543     };
1544     char dest[200];
1545     char *p;
1546     int32_t length, expectedTerminatedLength;
1547     UErrorCode errorCode;
1548 
1549     expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1550                                        (const char *)expected);
1551 
1552     errorCode=U_ZERO_ERROR;
1553     length=-5;
1554     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1555                               src, UPRV_LENGTHOF(src), &errorCode);
1556     if( U_FAILURE(errorCode) || p!=dest ||
1557         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1558         dest[length]!=0
1559     ) {
1560         log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1561     }
1562     memset(dest, 0xff, sizeof(dest));
1563     errorCode=U_ZERO_ERROR;
1564     length=-5;
1565     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1566                               src, UPRV_LENGTHOF(src), &errorCode);
1567     if( U_FAILURE(errorCode) || p!=dest ||
1568         0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1569         dest[UPRV_LENGTHOF(expected)]!=0
1570     ) {
1571         log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1572     }
1573     memset(dest, 0xff, sizeof(dest));
1574     errorCode=U_ZERO_ERROR;
1575     length=-5;
1576     p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1577                               src, UPRV_LENGTHOF(src), &errorCode);
1578     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1579         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1580         dest[length]!=(char)0xff
1581     ) {
1582         log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1583     }
1584     memset(dest, 0xff, sizeof(dest));
1585     errorCode=U_ZERO_ERROR;
1586     length=-5;
1587     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1588     if( U_FAILURE(errorCode) || p!=dest ||
1589         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1590         dest[length]!=0
1591     ) {
1592         log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1593     }
1594     memset(dest, 0xff, sizeof(dest));
1595     errorCode=U_ZERO_ERROR;
1596     length=-5;
1597     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1598     if( U_FAILURE(errorCode) || p!=dest ||
1599         0!=memcmp(dest, expected, expectedTerminatedLength) ||
1600         dest[expectedTerminatedLength]!=0
1601     ) {
1602         log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1603     }
1604     memset(dest, 0xff, sizeof(dest));
1605     errorCode=U_ZERO_ERROR;
1606     length=-5;
1607     p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1608                               src, UPRV_LENGTHOF(src), &errorCode);
1609     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1610         length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1611     ) {
1612         log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1613     }
1614     memset(dest, 0xff, sizeof(dest));
1615     errorCode=U_ZERO_ERROR;
1616     length=-5;
1617     p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1618                               src, UPRV_LENGTHOF(src), &errorCode);
1619     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1620         length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1621     ) {
1622         log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1623     }
1624     memset(dest, 0xff, sizeof(dest));
1625     errorCode=U_ZERO_ERROR;
1626     length=-5;
1627     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1628                               shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1629     if( U_FAILURE(errorCode) || p!=dest ||
1630         length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1631         dest[length]!=0
1632     ) {
1633         log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1634     }
1635     memset(dest, 0xff, sizeof(dest));
1636     errorCode=U_ZERO_ERROR;
1637     length=-5;
1638     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1639                               asciiNul, -1, &errorCode);
1640     if( U_FAILURE(errorCode) || p!=dest ||
1641         length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1642         dest[length]!=0
1643     ) {
1644         log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1645     }
1646     memset(dest, 0xff, sizeof(dest));
1647     errorCode=U_ZERO_ERROR;
1648     length=-5;
1649     p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1650                               NULL, 0, &errorCode);
1651     if( U_FAILURE(errorCode) || p!=dest ||
1652         length!=0 || dest[0]!=0
1653     ) {
1654         log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1655     }
1656 
1657     /* illegal arguments */
1658     memset(dest, 0xff, sizeof(dest));
1659     errorCode=U_ZERO_ERROR;
1660     length=-5;
1661     p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1662                               src, UPRV_LENGTHOF(src), &errorCode);
1663     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1664         log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1665     }
1666     memset(dest, 0xff, sizeof(dest));
1667     errorCode=U_ZERO_ERROR;
1668     length=-5;
1669     p=u_strToJavaModifiedUTF8(dest, -1, &length,
1670                               src, UPRV_LENGTHOF(src), &errorCode);
1671     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1672         log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1673     }
1674     memset(dest, 0xff, sizeof(dest));
1675     errorCode=U_ZERO_ERROR;
1676     length=-5;
1677     p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1678                               NULL, UPRV_LENGTHOF(src), &errorCode);
1679     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1680         log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1681     }
1682     memset(dest, 0xff, sizeof(dest));
1683     errorCode=U_ZERO_ERROR;
1684     length=-5;
1685     p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1686                               NULL, -1, &errorCode);
1687     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1688         log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1689     }
1690 }
1691 
Test_strFromJavaModifiedUTF8()1692 static void Test_strFromJavaModifiedUTF8() {
1693     static const uint8_t src[]={
1694         0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1695         0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1696         0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1697         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1698         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1699         0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80,  /* invalid sequences */
1700         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1701         0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad,  /* non-shortest forms are allowed */
1702         0xe0, 0xb8, 0x8e, 0x6f
1703     };
1704     static const UChar expected[]={
1705         0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1706         0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1707         0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1708         0xdbff, 0xdfff,
1709         0xfffd, 0xfffd, 0xfffd, 0xfffd,
1710         0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1711         0x6c, 0xed,
1712         0xe0e, 0x6f
1713     };
1714     static const uint8_t shortSrc[]={
1715         0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1716     };
1717     static const UChar shortExpected[]={
1718         0xe01, 0xe1, 0x61
1719     };
1720     static const uint8_t asciiNul[]={
1721         0x61, 0x62, 0x63, 0
1722     };
1723     static const UChar asciiNulExpected[]={
1724         0x61, 0x62, 0x63
1725     };
1726     static const uint8_t invalid[]={
1727         0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1728     };
1729     static const UChar invalidExpectedFFFD[]={
1730         0xfffd, 0xfffd, 0xfffd, 0xfffd
1731     };
1732     static const UChar invalidExpected50000[]={
1733         0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1734     };
1735     UChar dest[200];
1736     UChar *p;
1737     int32_t length, expectedTerminatedLength;
1738     int32_t numSubstitutions;
1739     UErrorCode errorCode;
1740 
1741     expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1742 
1743     errorCode=U_ZERO_ERROR;
1744     length=numSubstitutions=-5;
1745     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1746                                        (const char *)src, UPRV_LENGTHOF(src),
1747                                        0xfffd, &numSubstitutions, &errorCode);
1748     if( U_FAILURE(errorCode) || p!=dest ||
1749         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1750         dest[length]!=0 ||
1751         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1752     ) {
1753         log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1754     }
1755     memset(dest, 0xff, sizeof(dest));
1756     errorCode=U_ZERO_ERROR;
1757     length=numSubstitutions=-5;
1758     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1759                                        (const char *)src, UPRV_LENGTHOF(src),
1760                                        0xfffd, &numSubstitutions, &errorCode);
1761     if( U_FAILURE(errorCode) || p!=dest ||
1762         0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1763         dest[UPRV_LENGTHOF(expected)]!=0 ||
1764         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1765     ) {
1766         log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1767     }
1768     memset(dest, 0xff, sizeof(dest));
1769     errorCode=U_ZERO_ERROR;
1770     length=numSubstitutions=-5;
1771     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1772                                        (const char *)src, UPRV_LENGTHOF(src),
1773                                        0xfffd, NULL, &errorCode);
1774     if( U_FAILURE(errorCode) || p!=dest ||
1775         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1776         dest[length]!=0
1777     ) {
1778         log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1779     }
1780     memset(dest, 0xff, sizeof(dest));
1781     errorCode=U_ZERO_ERROR;
1782     length=numSubstitutions=-5;
1783     p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1784                                        (const char *)src, UPRV_LENGTHOF(src),
1785                                        0xfffd, &numSubstitutions, &errorCode);
1786     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1787         length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1788         dest[length]!=0xffff ||
1789         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1790     ) {
1791         log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1792     }
1793     memset(dest, 0xff, sizeof(dest));
1794     errorCode=U_ZERO_ERROR;
1795     length=numSubstitutions=-5;
1796     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1797                                        (const char *)src, -1,
1798                                        0xfffd, &numSubstitutions, &errorCode);
1799     if( U_FAILURE(errorCode) || p!=dest ||
1800         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1801         dest[length]!=0 ||
1802         numSubstitutions!=0
1803     ) {
1804         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1805     }
1806     memset(dest, 0xff, sizeof(dest));
1807     errorCode=U_ZERO_ERROR;
1808     length=numSubstitutions=-5;
1809     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1810                                        (const char *)src, -1,
1811                                        0xfffd, &numSubstitutions, &errorCode);
1812     if( U_FAILURE(errorCode) || p!=dest ||
1813         0!=memcmp(dest, expected, expectedTerminatedLength) ||
1814         dest[expectedTerminatedLength]!=0 ||
1815         numSubstitutions!=0
1816     ) {
1817         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1818     }
1819     memset(dest, 0xff, sizeof(dest));
1820     errorCode=U_ZERO_ERROR;
1821     length=numSubstitutions=-5;
1822     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1823                                        (const char *)src, -1,
1824                                        0xfffd, NULL, &errorCode);
1825     if( U_FAILURE(errorCode) || p!=dest ||
1826         length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1827         dest[length]!=0
1828     ) {
1829         log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1830     }
1831     memset(dest, 0xff, sizeof(dest));
1832     errorCode=U_ZERO_ERROR;
1833     length=numSubstitutions=-5;
1834     p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1835                                        (const char *)src, UPRV_LENGTHOF(src),
1836                                        0xfffd, &numSubstitutions, &errorCode);
1837     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1838         length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1839     ) {
1840         log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1841     }
1842     memset(dest, 0xff, sizeof(dest));
1843     errorCode=U_ZERO_ERROR;
1844     length=numSubstitutions=-5;
1845     p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1846                                        (const char *)src, UPRV_LENGTHOF(src),
1847                                        0xfffd, &numSubstitutions, &errorCode);
1848     if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1849         length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1850     ) {
1851         log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1852     }
1853     memset(dest, 0xff, sizeof(dest));
1854     errorCode=U_ZERO_ERROR;
1855     length=numSubstitutions=-5;
1856     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1857                                        (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1858                                        0xfffd, &numSubstitutions, &errorCode);
1859     if( U_FAILURE(errorCode) || p!=dest ||
1860         length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1861         dest[length]!=0 ||
1862         numSubstitutions!=0
1863     ) {
1864         log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1865     }
1866     memset(dest, 0xff, sizeof(dest));
1867     errorCode=U_ZERO_ERROR;
1868     length=numSubstitutions=-5;
1869     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1870                                        (const char *)asciiNul, -1,
1871                                        0xfffd, &numSubstitutions, &errorCode);
1872     if( U_FAILURE(errorCode) || p!=dest ||
1873         length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1874         dest[length]!=0 ||
1875         numSubstitutions!=0
1876     ) {
1877         log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1878     }
1879     memset(dest, 0xff, sizeof(dest));
1880     errorCode=U_ZERO_ERROR;
1881     length=numSubstitutions=-5;
1882     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1883                                        NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1884     if( U_FAILURE(errorCode) || p!=dest ||
1885         length!=0 || dest[0]!=0 ||
1886         numSubstitutions!=0
1887     ) {
1888         log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1889     }
1890     memset(dest, 0xff, sizeof(dest));
1891     errorCode=U_ZERO_ERROR;
1892     length=numSubstitutions=-5;
1893     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1894                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1895                                        0xfffd, &numSubstitutions, &errorCode);
1896     if( U_FAILURE(errorCode) || p!=dest ||
1897         length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1898         dest[length]!=0 ||
1899         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1900     ) {
1901         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1902     }
1903     memset(dest, 0xff, sizeof(dest));
1904     errorCode=U_ZERO_ERROR;
1905     length=numSubstitutions=-5;
1906     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1907                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1908                                        0x50000, &numSubstitutions, &errorCode);
1909     if( U_FAILURE(errorCode) || p!=dest ||
1910         length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1911         dest[length]!=0 ||
1912         numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)  /* not ...50000 */
1913     ) {
1914         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1915     }
1916     memset(dest, 0xff, sizeof(dest));
1917     errorCode=U_ZERO_ERROR;
1918     length=numSubstitutions=-5;
1919     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1920                                        (const char *)invalid, UPRV_LENGTHOF(invalid),
1921                                        U_SENTINEL, &numSubstitutions, &errorCode);
1922     if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1923         log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1924     }
1925     memset(dest, 0xff, sizeof(dest));
1926     errorCode=U_ZERO_ERROR;
1927     length=numSubstitutions=-5;
1928     p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1929                                        (const char *)src, UPRV_LENGTHOF(src),
1930                                        U_SENTINEL, &numSubstitutions, &errorCode);
1931     if( errorCode!=U_INVALID_CHAR_FOUND ||
1932         length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1933         numSubstitutions!=0
1934     ) {
1935         log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1936     }
1937 
1938     /* illegal arguments */
1939     memset(dest, 0xff, sizeof(dest));
1940     errorCode=U_ZERO_ERROR;
1941     length=numSubstitutions=-5;
1942     p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1943                                        (const char *)src, UPRV_LENGTHOF(src),
1944                                        0xfffd, &numSubstitutions, &errorCode);
1945     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1946         log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1947     }
1948     memset(dest, 0xff, sizeof(dest));
1949     errorCode=U_ZERO_ERROR;
1950     length=numSubstitutions=-5;
1951     p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1952                                        (const char *)src, UPRV_LENGTHOF(src),
1953                                        0xfffd, &numSubstitutions, &errorCode);
1954     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1955         log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1956     }
1957     memset(dest, 0xff, sizeof(dest));
1958     errorCode=U_ZERO_ERROR;
1959     length=numSubstitutions=-5;
1960     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1961                                        NULL, UPRV_LENGTHOF(src),
1962                                        0xfffd, &numSubstitutions, &errorCode);
1963     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1964         log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1965     }
1966     memset(dest, 0xff, sizeof(dest));
1967     errorCode=U_ZERO_ERROR;
1968     length=numSubstitutions=-5;
1969     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1970                                        NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1971     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1972         log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1973     }
1974     memset(dest, 0xff, sizeof(dest));
1975     errorCode=U_ZERO_ERROR;
1976     length=numSubstitutions=-5;
1977     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1978                                        (const char *)src, UPRV_LENGTHOF(src),
1979                                        0x110000, &numSubstitutions, &errorCode);
1980     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1981         log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1982     }
1983     memset(dest, 0xff, sizeof(dest));
1984     errorCode=U_ZERO_ERROR;
1985     length=numSubstitutions=-5;
1986     p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1987                                        (const char *)src, UPRV_LENGTHOF(src),
1988                                        0xdfff, &numSubstitutions, &errorCode);
1989     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1990         log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1991     }
1992 }
1993 
1994 /* test that string transformation functions permit NULL source pointer when source length==0 */
TestNullEmptySource()1995 static void TestNullEmptySource() {
1996     char dest8[4]={ 3, 3, 3, 3 };
1997     UChar dest16[4]={ 3, 3, 3, 3 };
1998     UChar32 dest32[4]={ 3, 3, 3, 3 };
1999 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2000     wchar_t destW[4]={ 3, 3, 3, 3 };
2001 #endif
2002 
2003     int32_t length;
2004     UErrorCode errorCode;
2005 
2006     /* u_strFromXyz() */
2007 
2008     dest16[0]=3;
2009     length=3;
2010     errorCode=U_ZERO_ERROR;
2011     u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2012     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2013         log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2014     }
2015 
2016     dest16[0]=3;
2017     length=3;
2018     errorCode=U_ZERO_ERROR;
2019     u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2020     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2021         log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2022     }
2023 
2024     dest16[0]=3;
2025     length=3;
2026     errorCode=U_ZERO_ERROR;
2027     u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2028     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2029         log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2030     }
2031 
2032     dest16[0]=3;
2033     length=3;
2034     errorCode=U_ZERO_ERROR;
2035     u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2036     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2037         log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2038     }
2039 
2040     dest16[0]=3;
2041     length=3;
2042     errorCode=U_ZERO_ERROR;
2043     u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2044     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2045         log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2046     }
2047 
2048     dest16[0]=3;
2049     length=3;
2050     errorCode=U_ZERO_ERROR;
2051     u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2052     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2053         log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2054     }
2055 
2056     /* u_strToXyz() */
2057 
2058     dest8[0]=3;
2059     length=3;
2060     errorCode=U_ZERO_ERROR;
2061     u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2062     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2063         log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2064     }
2065 
2066     dest8[0]=3;
2067     length=3;
2068     errorCode=U_ZERO_ERROR;
2069     u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2070     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2071         log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2072     }
2073 
2074     dest32[0]=3;
2075     length=3;
2076     errorCode=U_ZERO_ERROR;
2077     u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2078     if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2079         log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2080     }
2081 
2082     dest32[0]=3;
2083     length=3;
2084     errorCode=U_ZERO_ERROR;
2085     u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2086     if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2087         log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2088     }
2089 
2090     dest8[0]=3;
2091     length=3;
2092     errorCode=U_ZERO_ERROR;
2093     u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2094     if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2095         log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2096     }
2097 
2098 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2099 
2100     dest16[0]=3;
2101     length=3;
2102     errorCode=U_ZERO_ERROR;
2103     u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2104     if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2105         log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2106     }
2107 
2108     destW[0]=3;
2109     length=3;
2110     errorCode=U_ZERO_ERROR;
2111     u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2112     if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2113         log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2114     }
2115 
2116 #endif
2117 }
2118