1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File custrtrn.C
11 *
12 * Modification History:
13 * Name Description
14 * Ram String transformations test
15 *********************************************************************************
16 */
17 /****************************************************************************/
18
19
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "unicode/ures.h"
26 #include "ustr_imp.h"
27 #include "cintltst.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "cwchar.h"
31
32 void addUCharTransformTest(TestNode** root);
33
34 static void Test_strToUTF32(void);
35 static void Test_strToUTF32_surrogates(void);
36 static void Test_strFromUTF32(void);
37 static void Test_strFromUTF32_surrogates(void);
38 static void Test_UChar_UTF8_API(void);
39 static void Test_FromUTF8(void);
40 static void Test_FromUTF8Lenient(void);
41 static void Test_UChar_WCHART_API(void);
42 static void Test_widestrs(void);
43 static void Test_WCHART_LongString(void);
44 static void Test_strToJavaModifiedUTF8(void);
45 static void Test_strFromJavaModifiedUTF8(void);
46 static void TestNullEmptySource(void);
47
48 void
addUCharTransformTest(TestNode ** root)49 addUCharTransformTest(TestNode** root)
50 {
51 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
52 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
53 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
54 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
55 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
56 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
57 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
58 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API");
59 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs");
60 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
61 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
62 #endif
63 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8");
64 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8");
65 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource");
66 }
67
68 static const UChar32 src32[]={
69 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
70 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
71 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
72 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
73 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
74 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
75 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
76 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
77 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
78 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
79 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
80 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
81 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
82 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
83 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
84 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
85 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
86 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
87 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
88 /* test non-BMP code points */
89 0x0002A699,
90 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
91 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
92 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
93 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
94 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
95
96 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
97 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
98 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
99 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
100 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
101 };
102
103 static const UChar src16[] = {
104 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
105 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
106 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
107 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
108 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
109 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
110 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
111 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
112 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
113 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
114 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
115 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
116 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
117 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
118 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
119 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
120 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
121 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
122 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
123
124 /* test non-BMP code points */
125 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
126 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
127 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
128 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
129 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
130 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
131 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
132 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
133 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
134 0xD869, 0xDED5,
135
136 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
137 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
138 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
139 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
140 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
141 };
142
143
Test_strToUTF32(void)144 static void Test_strToUTF32(void){
145 UErrorCode err = U_ZERO_ERROR;
146 UChar32 u32Target[400];
147 int32_t u32DestLen;
148 int i= 0;
149
150 /* first with length */
151 u32DestLen = -2;
152 u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
153 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
154 log_err("u_strToUTF32(preflight with length): "
155 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
156 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
157 return;
158 }
159 err = U_ZERO_ERROR;
160 u32DestLen = -2;
161 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
162 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
163 log_err("u_strToUTF32(with length): "
164 "length %ld != %ld and %s != U_ZERO_ERROR\n",
165 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
166 return;
167 }
168 /*for(i=0; i< u32DestLen; i++){
169 printf("0x%08X, ",uTarget[i]);
170 if(i%10==0){
171 printf("\n");
172 }
173 }*/
174 for(i=0; i< UPRV_LENGTHOF(src32); i++){
175 if(u32Target[i] != src32[i]){
176 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
177 }
178 }
179 if(u32Target[i] != 0){
180 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
181 }
182
183 /* now NUL-terminated */
184 u32DestLen = -2;
185 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
186 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
187 log_err("u_strToUTF32(preflight with NUL-termination): "
188 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
189 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
190 return;
191 }
192 err = U_ZERO_ERROR;
193 u32DestLen = -2;
194 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
195 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
196 log_err("u_strToUTF32(with NUL-termination): "
197 "length %ld != %ld and %s != U_ZERO_ERROR\n",
198 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
199 return;
200 }
201
202 for(i=0; i< UPRV_LENGTHOF(src32); i++){
203 if(u32Target[i] != src32[i]){
204 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
205 }
206 }
207 }
208
209 /* test unpaired surrogates */
Test_strToUTF32_surrogates()210 static void Test_strToUTF32_surrogates() {
211 UErrorCode err = U_ZERO_ERROR;
212 UChar32 u32Target[400];
213 int32_t len16, u32DestLen;
214 int32_t numSubstitutions;
215 int i;
216
217 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
218 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
219 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
220 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
221 len16 = UPRV_LENGTHOF(surr16);
222 for(i = 0; i < 4; ++i) {
223 err = U_ZERO_ERROR;
224 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
225 if(err != U_INVALID_CHAR_FOUND) {
226 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
227 (long)i, u_errorName(err));
228 return;
229 }
230
231 err = U_ZERO_ERROR;
232 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
233 if(err != U_INVALID_CHAR_FOUND) {
234 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
235 (long)i, u_errorName(err));
236 return;
237 }
238
239 err = U_ZERO_ERROR;
240 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
241 if(err != U_INVALID_CHAR_FOUND) {
242 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
243 (long)i, u_errorName(err));
244 return;
245 }
246
247 err = U_ZERO_ERROR;
248 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
249 if(err != U_INVALID_CHAR_FOUND) {
250 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
251 (long)i, u_errorName(err));
252 return;
253 }
254 }
255
256 err = U_ZERO_ERROR;
257 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
258 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
259 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
260 u_errorName(err));
261 return;
262 }
263
264 err = U_ZERO_ERROR;
265 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
266 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
267 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
268 u_errorName(err));
269 return;
270 }
271
272 err = U_ZERO_ERROR;
273 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
274 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
275 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
276 u_errorName(err));
277 return;
278 }
279
280 err = U_ZERO_ERROR;
281 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
282 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
283 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
284 u_errorName(err));
285 return;
286 }
287
288 /* with substitution character */
289 numSubstitutions = -1;
290 err = U_ZERO_ERROR;
291 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
292 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
293 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
294 u_errorName(err));
295 return;
296 }
297
298 err = U_ZERO_ERROR;
299 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
300 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
301 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
302 u_errorName(err));
303 return;
304 }
305
306 err = U_ZERO_ERROR;
307 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
308 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
309 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
310 u_errorName(err));
311 return;
312 }
313
314 err = U_ZERO_ERROR;
315 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
316 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
317 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
318 u_errorName(err));
319 return;
320 }
321 }
322
Test_strFromUTF32(void)323 static void Test_strFromUTF32(void){
324 UErrorCode err = U_ZERO_ERROR;
325 UChar uTarget[400];
326 int32_t uDestLen;
327 int i= 0;
328
329 /* first with length */
330 uDestLen = -2;
331 u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
332 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
333 log_err("u_strFromUTF32(preflight with length): "
334 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
335 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
336 return;
337 }
338 err = U_ZERO_ERROR;
339 uDestLen = -2;
340 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
341 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
342 log_err("u_strFromUTF32(with length): "
343 "length %ld != %ld and %s != U_ZERO_ERROR\n",
344 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
345 return;
346 }
347 /*for(i=0; i< uDestLen; i++){
348 printf("0x%04X, ",uTarget[i]);
349 if(i%10==0){
350 printf("\n");
351 }
352 }*/
353
354 for(i=0; i< uDestLen; i++){
355 if(uTarget[i] != src16[i]){
356 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
357 }
358 }
359 if(uTarget[i] != 0){
360 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
361 }
362
363 /* now NUL-terminated */
364 uDestLen = -2;
365 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
366 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
367 log_err("u_strFromUTF32(preflight with NUL-termination): "
368 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
369 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
370 return;
371 }
372 err = U_ZERO_ERROR;
373 uDestLen = -2;
374 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
375 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
376 log_err("u_strFromUTF32(with NUL-termination): "
377 "length %ld != %ld and %s != U_ZERO_ERROR\n",
378 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
379 return;
380 }
381
382 for(i=0; i< uDestLen; i++){
383 if(uTarget[i] != src16[i]){
384 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
385 }
386 }
387 }
388
389 /* test surrogate code points */
Test_strFromUTF32_surrogates()390 static void Test_strFromUTF32_surrogates() {
391 UErrorCode err = U_ZERO_ERROR;
392 UChar uTarget[400];
393 int32_t len32, uDestLen;
394 int32_t numSubstitutions;
395 int i;
396
397 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
398 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
399 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
401 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
402 len32 = UPRV_LENGTHOF(surr32);
403 for(i = 0; i < 6; ++i) {
404 err = U_ZERO_ERROR;
405 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
406 if(err != U_INVALID_CHAR_FOUND) {
407 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
408 (long)i, u_errorName(err));
409 return;
410 }
411
412 err = U_ZERO_ERROR;
413 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
414 if(err != U_INVALID_CHAR_FOUND) {
415 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
416 (long)i, u_errorName(err));
417 return;
418 }
419
420 err = U_ZERO_ERROR;
421 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
422 if(err != U_INVALID_CHAR_FOUND) {
423 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
424 (long)i, u_errorName(err));
425 return;
426 }
427
428 err = U_ZERO_ERROR;
429 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
430 if(err != U_INVALID_CHAR_FOUND) {
431 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
432 (long)i, u_errorName(err));
433 return;
434 }
435 }
436
437 err = U_ZERO_ERROR;
438 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
439 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
440 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
441 u_errorName(err));
442 return;
443 }
444
445 err = U_ZERO_ERROR;
446 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
447 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
448 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
449 u_errorName(err));
450 return;
451 }
452
453 err = U_ZERO_ERROR;
454 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
455 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
456 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
457 u_errorName(err));
458 return;
459 }
460
461 err = U_ZERO_ERROR;
462 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
463 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
464 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
465 u_errorName(err));
466 return;
467 }
468
469 /* with substitution character */
470 numSubstitutions = -1;
471 err = U_ZERO_ERROR;
472 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
473 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
474 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
475 u_errorName(err));
476 return;
477 }
478
479 err = U_ZERO_ERROR;
480 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
481 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
482 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
483 u_errorName(err));
484 return;
485 }
486
487 err = U_ZERO_ERROR;
488 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
489 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
490 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
491 u_errorName(err));
492 return;
493 }
494
495 err = U_ZERO_ERROR;
496 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
497 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
498 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
499 u_errorName(err));
500 return;
501 }
502 }
503
Test_UChar_UTF8_API(void)504 static void Test_UChar_UTF8_API(void){
505
506 UErrorCode err = U_ZERO_ERROR;
507 UChar uTemp[1];
508 char u8Temp[1];
509 UChar* uTarget=uTemp;
510 const char* u8Src;
511 int32_t u8SrcLen = 0;
512 int32_t uTargetLength = 0;
513 int32_t uDestLen=0;
514 const UChar* uSrc = src16;
515 int32_t uSrcLen = sizeof(src16)/2;
516 char* u8Target = u8Temp;
517 int32_t u8TargetLength =0;
518 int32_t u8DestLen =0;
519 UBool failed = FALSE;
520 int i= 0;
521 int32_t numSubstitutions;
522
523 {
524 /* preflight */
525 u8Temp[0] = 0x12;
526 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
527 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
528 err = U_ZERO_ERROR;
529 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
530 u8TargetLength = u8DestLen;
531
532 u8Target[u8TargetLength] = (char)0xfe;
533 u8DestLen = -1;
534 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
535 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
536 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
537 return;
538 }
539
540 }
541 else {
542 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
543 }
544 failed = FALSE;
545 /*for(i=0; i< u8DestLen; i++){
546 printf("0x%04X, ",u8Target[i]);
547 if(i%10==0){
548 printf("\n");
549 }
550 }*/
551 /*for(i=0; i< u8DestLen; i++){
552 if(u8Target[i] != src8[i]){
553 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
554 failed =TRUE;
555 }
556 }
557 if(failed){
558 log_err("u_strToUTF8() failed \n");
559 }*/
560 u8Src = u8Target;
561 u8SrcLen = u8DestLen;
562
563 /* preflight */
564 uTemp[0] = 0x1234;
565 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
566 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
567 err = U_ZERO_ERROR;
568 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
569 uTargetLength = uDestLen;
570
571 uTarget[uTargetLength] = 0xfff0;
572 uDestLen = -1;
573 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
574 }
575 else {
576 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
577 }
578 /*for(i=0; i< uDestLen; i++){
579 printf("0x%04X, ",uTarget[i]);
580 if(i%10==0){
581 printf("\n");
582 }
583 }*/
584
585 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
586 failed = TRUE;
587 }
588 for(i=0; i< uSrcLen; i++){
589 if(uTarget[i] != src16[i]){
590 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
591 failed =TRUE;
592 }
593 }
594 if(failed){
595 log_err("error: u_strFromUTF8(after preflighting) failed\n");
596 }
597
598 free(u8Target);
599 free(uTarget);
600 }
601 {
602 u8SrcLen = -1;
603 uTargetLength = 0;
604 uSrcLen =-1;
605 u8TargetLength=0;
606 failed = FALSE;
607 /* preflight */
608 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
609 if(err == U_BUFFER_OVERFLOW_ERROR){
610 err = U_ZERO_ERROR;
611 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
612 u8TargetLength = u8DestLen;
613
614 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
615
616 }
617 else {
618 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
619 }
620 failed = FALSE;
621 /*for(i=0; i< u8DestLen; i++){
622 printf("0x%04X, ",u8Target[i]);
623 if(i%10==0){
624 printf("\n");
625 }
626 }*/
627 /*for(i=0; i< u8DestLen; i++){
628 if(u8Target[i] != src8[i]){
629 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
630 failed =TRUE;
631 }
632 }
633 if(failed){
634 log_err("u_strToUTF8() failed \n");
635 }*/
636 u8Src = u8Target;
637 u8SrcLen = u8DestLen;
638
639 /* preflight */
640 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
641 if(err == U_BUFFER_OVERFLOW_ERROR){
642 err = U_ZERO_ERROR;
643 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
644 uTargetLength = uDestLen;
645
646 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
647 }
648 else {
649 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
650 }
651 /*for(i=0; i< uDestLen; i++){
652 printf("0x%04X, ",uTarget[i]);
653 if(i%10==0){
654 printf("\n");
655 }
656 }*/
657
658 for(i=0; i< uSrcLen; i++){
659 if(uTarget[i] != src16[i]){
660 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
661 failed =TRUE;
662 }
663 }
664 if(failed){
665 log_err("u_strToUTF8() failed \n");
666 }
667
668 free(u8Target);
669 free(uTarget);
670 }
671
672 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
673 // Since ICU 60, each surrogate byte sequence is treated as 3 single-byte errors.
674 {
675 static const UChar
676 withLead16[]={ 0x1800, 0xd89a, 0x0061 },
677 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
678 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0xfffd, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
679 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0xd900, 0xdc05, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
680 static const uint8_t
681 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
682 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
683 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
684 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
685 UChar out16[10];
686 char out8[10];
687
688 if(
689 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
690 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
691 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
692 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
693 ) {
694 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
695 }
696
697 /* test error handling with substitution characters */
698
699 /* from UTF-8 with length */
700 err=U_ZERO_ERROR;
701 numSubstitutions=-1;
702 out16[0]=0x55aa;
703 uDestLen=0;
704 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
705 (const char *)withTrail8, uprv_strlen((const char *)withTrail8),
706 0x50005, &numSubstitutions,
707 &err);
708 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
709 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
710 numSubstitutions!=3) {
711 log_err("error: u_strFromUTF8WithSub(length) failed\n");
712 }
713
714 /* from UTF-8 with NUL termination */
715 err=U_ZERO_ERROR;
716 numSubstitutions=-1;
717 out16[0]=0x55aa;
718 uDestLen=0;
719 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
720 (const char *)withTrail8, -1,
721 0xfffd, &numSubstitutions,
722 &err);
723 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
724 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
725 numSubstitutions!=3) {
726 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
727 }
728
729 /* preflight from UTF-8 with NUL termination */
730 err=U_ZERO_ERROR;
731 numSubstitutions=-1;
732 out16[0]=0x55aa;
733 uDestLen=0;
734 u_strFromUTF8WithSub(out16, 1, &uDestLen,
735 (const char *)withTrail8, -1,
736 0x50005, &numSubstitutions,
737 &err);
738 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=3) {
739 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
740 }
741
742 /* to UTF-8 with length */
743 err=U_ZERO_ERROR;
744 numSubstitutions=-1;
745 out8[0]=(char)0xf5;
746 u8DestLen=0;
747 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
748 withTrail16, u_strlen(withTrail16),
749 0xfffd, &numSubstitutions,
750 &err);
751 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
752 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
753 numSubstitutions!=1) {
754 log_err("error: u_strToUTF8WithSub(length) failed\n");
755 }
756
757 /* to UTF-8 with NUL termination */
758 err=U_ZERO_ERROR;
759 numSubstitutions=-1;
760 out8[0]=(char)0xf5;
761 u8DestLen=0;
762 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
763 withTrail16, -1,
764 0x1a, &numSubstitutions,
765 &err);
766 if(U_FAILURE(err) || u8DestLen!=uprv_strlen((const char *)withTrail8Sub1A) ||
767 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
768 numSubstitutions!=1) {
769 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
770 }
771
772 /* preflight to UTF-8 with NUL termination */
773 err=U_ZERO_ERROR;
774 numSubstitutions=-1;
775 out8[0]=(char)0xf5;
776 u8DestLen=0;
777 u_strToUTF8WithSub(out8, 1, &u8DestLen,
778 withTrail16, -1,
779 0xfffd, &numSubstitutions,
780 &err);
781 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=uprv_strlen((const char *)withTrail8SubFFFD) ||
782 numSubstitutions!=1) {
783 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
784 }
785
786 /* test that numSubstitutions==0 if there are no substitutions */
787
788 /* from UTF-8 with length (just first 3 bytes which are valid) */
789 err=U_ZERO_ERROR;
790 numSubstitutions=-1;
791 out16[0]=0x55aa;
792 uDestLen=0;
793 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
794 (const char *)withTrail8, 3,
795 0x50005, &numSubstitutions,
796 &err);
797 if(U_FAILURE(err) || uDestLen!=1 ||
798 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
799 numSubstitutions!=0) {
800 log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
801 }
802
803 /* to UTF-8 with length (just first UChar which is valid) */
804 err=U_ZERO_ERROR;
805 numSubstitutions=-1;
806 out8[0]=(char)0xf5;
807 u8DestLen=0;
808 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
809 withTrail16, 1,
810 0xfffd, &numSubstitutions,
811 &err);
812 if(U_FAILURE(err) || u8DestLen!=3 ||
813 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
814 numSubstitutions!=0) {
815 log_err("error: u_strToUTF8WithSub(no subs) failed\n");
816 }
817
818 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
819
820 /* from UTF-8 with length (just first 3 bytes which are valid) */
821 err=U_ZERO_ERROR;
822 numSubstitutions=-1;
823 out16[0]=0x55aa;
824 uDestLen=0;
825 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
826 (const char *)withTrail8, 3,
827 U_SENTINEL, &numSubstitutions,
828 &err);
829 if(U_FAILURE(err) || uDestLen!=1 ||
830 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
831 numSubstitutions!=0) {
832 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
833 }
834
835 /* to UTF-8 with length (just first UChar which is valid) */
836 err=U_ZERO_ERROR;
837 numSubstitutions=-1;
838 out8[0]=(char)0xf5;
839 u8DestLen=0;
840 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
841 withTrail16, 1,
842 U_SENTINEL, &numSubstitutions,
843 &err);
844 if(U_FAILURE(err) || u8DestLen!=3 ||
845 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
846 numSubstitutions!=0) {
847 log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
848 }
849 }
850 {
851 /*
852 * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
853 * See ticket #10371.
854 */
855 static const char src[1]={ (char)0xf8 };
856 UChar out16[10];
857 err=U_ZERO_ERROR;
858 u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
859 if(err!=U_INVALID_CHAR_FOUND) {
860 log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
861 }
862 }
863 }
864
865 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
866 static UBool
equalAnyFFFD(const UChar * s,const UChar * t,int32_t length)867 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
868 UChar c1, c2;
869
870 while(length>0) {
871 c1=*s++;
872 c2=*t++;
873 if(c1!=c2 && c2!=0xfffd) {
874 return FALSE;
875 }
876 --length;
877 }
878 return TRUE;
879 }
880
881 /* test u_strFromUTF8Lenient() */
882 static void
Test_FromUTF8(void)883 Test_FromUTF8(void) {
884 /*
885 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
886 */
887 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
888 UChar dest[64];
889 UChar *destPointer;
890 int32_t destLength;
891 UErrorCode errorCode;
892
893 /* 3 bytes input, one UChar output (U+095C) */
894 errorCode=U_ZERO_ERROR;
895 destLength=-99;
896 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
897 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
898 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
899 (long)destLength, u_errorName(errorCode));
900 }
901
902 /* 4 bytes input, two UChars output (U+095C U+0000) */
903 errorCode=U_ZERO_ERROR;
904 destLength=-99;
905 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
906 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
907 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
908 (long)destLength, u_errorName(errorCode));
909 }
910
911 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
912 errorCode=U_ZERO_ERROR;
913 destLength=-99;
914 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
915 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
916 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
917 (long)destLength, u_errorName(errorCode));
918 }
919
920 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
921 errorCode=U_ZERO_ERROR;
922 dest[0]=dest[1]=99;
923 destLength=-99;
924 destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
925 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
926 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
927 (long)destLength, u_errorName(errorCode));
928 }
929 }
930
931 /* test u_strFromUTF8Lenient() */
932 static void
Test_FromUTF8Lenient(void)933 Test_FromUTF8Lenient(void) {
934 /*
935 * Multiple input strings, each NUL-terminated.
936 * Terminate with a string starting with 0xff.
937 */
938 static const uint8_t bytes[]={
939 /* well-formed UTF-8 */
940 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80,
941 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0,
942
943 /* various malformed sequences */
944 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0,
945
946 /* truncated input */
947 0xc3, 0,
948 0xe0, 0,
949 0xe0, 0xa0, 0,
950 0xf0, 0,
951 0xf0, 0x90, 0,
952 0xf0, 0x90, 0x80, 0,
953
954 /* non-ASCII characters in the last few bytes */
955 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0,
956 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0,
957
958 /* empty string */
959 0,
960
961 /* finish */
962 0xff, 0
963 };
964
965 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
966 static const UChar uchars[]={
967 0x61, 0xdf, 0x800, 0xd840, 0xdc00,
968 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0,
969
970 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0,
971
972 0xfffd, 0,
973 0xfffd, 0,
974 0xfffd, 0,
975 0xfffd, 0,
976 0xfffd, 0,
977 0xfffd, 0,
978
979 0x61, 0xdf, 0x800, 0,
980 0x61, 0x800, 0xdf, 0,
981
982 0,
983
984 0
985 };
986
987 UChar dest[64];
988 const char *pb;
989 const UChar *pu, *pDest;
990 int32_t srcLength, destLength0, destLength;
991 int number;
992 UErrorCode errorCode;
993
994 /* verify checking for some illegal arguments */
995 dest[0]=0x1234;
996 destLength=-1;
997 errorCode=U_ZERO_ERROR;
998 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
999 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
1000 log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1001 }
1002
1003 dest[0]=0x1234;
1004 destLength=-1;
1005 errorCode=U_ZERO_ERROR;
1006 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1007 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1008 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1009 }
1010
1011 dest[0]=0x1234;
1012 destLength=-1;
1013 errorCode=U_MEMORY_ALLOCATION_ERROR;
1014 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1015 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1016 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1017 }
1018
1019 /* test normal behavior */
1020 number=0; /* string number for log_err() */
1021
1022 for(pb=(const char *)bytes, pu=uchars;
1023 *pb!=(char)0xff;
1024 pb+=srcLength+1, pu+=destLength0+1, ++number
1025 ) {
1026 srcLength=uprv_strlen(pb);
1027 destLength0=u_strlen(pu);
1028
1029 /* preflighting with NUL-termination */
1030 dest[0]=0x1234;
1031 destLength=-1;
1032 errorCode=U_ZERO_ERROR;
1033 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1034 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1035 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1036 ) {
1037 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1038 }
1039
1040 /* preflighting/some capacity with NUL-termination */
1041 if(srcLength>0) {
1042 dest[destLength0-1]=0x1234;
1043 destLength=-1;
1044 errorCode=U_ZERO_ERROR;
1045 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1046 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1047 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1048 ) {
1049 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1050 }
1051 }
1052
1053 /* conversion with NUL-termination, much capacity */
1054 dest[0]=dest[destLength0]=0x1234;
1055 destLength=-1;
1056 errorCode=U_ZERO_ERROR;
1057 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1058 if (errorCode!=U_ZERO_ERROR ||
1059 pDest!=dest || dest[destLength0]!=0 ||
1060 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1061 ) {
1062 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1063 }
1064
1065 /* conversion with NUL-termination, exact capacity */
1066 dest[0]=dest[destLength0]=0x1234;
1067 destLength=-1;
1068 errorCode=U_ZERO_ERROR;
1069 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1070 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1071 pDest!=dest || dest[destLength0]!=0x1234 ||
1072 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1073 ) {
1074 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1075 }
1076
1077 /* preflighting with length */
1078 dest[0]=0x1234;
1079 destLength=-1;
1080 errorCode=U_ZERO_ERROR;
1081 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1082 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1083 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1084 ) {
1085 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1086 }
1087
1088 /* preflighting/some capacity with length */
1089 if(srcLength>0) {
1090 dest[srcLength-1]=0x1234;
1091 destLength=-1;
1092 errorCode=U_ZERO_ERROR;
1093 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1094 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1095 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1096 ) {
1097 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1098 }
1099 }
1100
1101 /* conversion with length, much capacity */
1102 dest[0]=dest[destLength0]=0x1234;
1103 destLength=-1;
1104 errorCode=U_ZERO_ERROR;
1105 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1106 if (errorCode!=U_ZERO_ERROR ||
1107 pDest!=dest || dest[destLength0]!=0 ||
1108 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1109 ) {
1110 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1111 }
1112
1113 /* conversion with length, srcLength capacity */
1114 dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1115 destLength=-1;
1116 errorCode=U_ZERO_ERROR;
1117 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1118 if(srcLength==destLength0) {
1119 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1120 pDest!=dest || dest[destLength0]!=0x1234 ||
1121 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1122 ) {
1123 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1124 }
1125 } else {
1126 if (errorCode!=U_ZERO_ERROR ||
1127 pDest!=dest || dest[destLength0]!=0 ||
1128 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1129 ) {
1130 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1131 }
1132 }
1133 }
1134 }
1135
1136 static const uint16_t src16j[] = {
1137 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1138 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1139 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1140 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1141 0x0000,
1142 /* Test only ASCII */
1143
1144 };
1145 static const uint16_t src16WithNulls[] = {
1146 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1147 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1148 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1149 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1150 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1151 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1152 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1153 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1154 /* test only ASCII */
1155 /*
1156 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1157 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1158 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1159 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1160 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1161 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1162 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1163 0x0054, 0x0000 */
1164
1165 };
Test_UChar_WCHART_API(void)1166 static void Test_UChar_WCHART_API(void){
1167 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1168 UErrorCode err = U_ZERO_ERROR;
1169 const UChar* uSrc = src16j;
1170 int32_t uSrcLen = sizeof(src16j)/2;
1171 wchar_t* wDest = NULL;
1172 int32_t wDestLen = 0;
1173 int32_t reqLen= 0 ;
1174 UBool failed = FALSE;
1175 UChar* uDest = NULL;
1176 int32_t uDestLen = 0;
1177 int i =0;
1178 {
1179 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1180 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1181 log_err("u_strFromWCS() should return NULL with a bad argument\n");
1182 }
1183 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1184 log_err("u_strToWCS() should return NULL with a bad argument\n");
1185 }
1186
1187 /* NULL source & destination. */
1188 err = U_ZERO_ERROR;
1189 u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1190 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1191 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1192 }
1193 err = U_ZERO_ERROR;
1194 u_strToWCS(NULL,0,NULL,NULL,0,&err);
1195 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1196 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1197 }
1198 err = U_ZERO_ERROR;
1199
1200 /* pre-flight*/
1201 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1202
1203 if(err == U_BUFFER_OVERFLOW_ERROR){
1204 err=U_ZERO_ERROR;
1205 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1206 wDestLen = reqLen+1;
1207 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1208 }
1209
1210 /* pre-flight */
1211 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1212
1213
1214 if(err == U_BUFFER_OVERFLOW_ERROR){
1215 err =U_ZERO_ERROR;
1216 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1217 uDestLen = reqLen + 1;
1218 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1219 }else if(U_FAILURE(err)){
1220
1221 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1222 return;
1223 }
1224
1225 for(i=0; i< uSrcLen; i++){
1226 if(uDest[i] != src16j[i]){
1227 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1228 failed =TRUE;
1229 }
1230 }
1231
1232 if(U_FAILURE(err)){
1233 failed = TRUE;
1234 }
1235 if(failed){
1236 log_err("u_strToWCS() failed \n");
1237 }
1238 free(wDest);
1239 free(uDest);
1240
1241
1242 /* test with embeded nulls */
1243 uSrc = src16WithNulls;
1244 uSrcLen = sizeof(src16WithNulls)/2;
1245 wDestLen =0;
1246 uDestLen =0;
1247 wDest = NULL;
1248 uDest = NULL;
1249 /* pre-flight*/
1250 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1251
1252 if(err == U_BUFFER_OVERFLOW_ERROR){
1253 err=U_ZERO_ERROR;
1254 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1255 wDestLen = reqLen+1;
1256 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1257 }
1258
1259 /* pre-flight */
1260 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1261
1262 if(err == U_BUFFER_OVERFLOW_ERROR){
1263 err =U_ZERO_ERROR;
1264 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1265 uDestLen = reqLen + 1;
1266 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1267 }
1268
1269 if(!U_FAILURE(err)) {
1270 for(i=0; i< uSrcLen; i++){
1271 if(uDest[i] != src16WithNulls[i]){
1272 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1273 failed =TRUE;
1274 }
1275 }
1276 }
1277
1278 if(U_FAILURE(err)){
1279 failed = TRUE;
1280 }
1281 if(failed){
1282 log_err("u_strToWCS() failed \n");
1283 }
1284 free(wDest);
1285 free(uDest);
1286
1287 }
1288
1289 {
1290
1291 uSrc = src16j;
1292 uSrcLen = sizeof(src16j)/2;
1293 wDestLen =0;
1294 uDestLen =0;
1295 wDest = NULL;
1296 uDest = NULL;
1297 wDestLen = 0;
1298 /* pre-flight*/
1299 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1300
1301 if(err == U_BUFFER_OVERFLOW_ERROR){
1302 err=U_ZERO_ERROR;
1303 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1304 wDestLen = reqLen+1;
1305 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1306 }
1307 uDestLen = 0;
1308 /* pre-flight */
1309 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1310
1311 if(err == U_BUFFER_OVERFLOW_ERROR){
1312 err =U_ZERO_ERROR;
1313 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1314 uDestLen = reqLen + 1;
1315 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1316 }
1317
1318
1319 if(!U_FAILURE(err)) {
1320 for(i=0; i< uSrcLen; i++){
1321 if(uDest[i] != src16j[i]){
1322 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1323 failed =TRUE;
1324 }
1325 }
1326 }
1327
1328 if(U_FAILURE(err)){
1329 failed = TRUE;
1330 }
1331 if(failed){
1332 log_err("u_strToWCS() failed \n");
1333 }
1334 free(wDest);
1335 free(uDest);
1336 }
1337
1338 /*
1339 * Test u_terminateWChars().
1340 * All u_terminateXYZ() use the same implementation macro;
1341 * we test this function to improve API coverage.
1342 */
1343 {
1344 wchar_t buffer[10];
1345
1346 err=U_ZERO_ERROR;
1347 buffer[3]=0x20ac;
1348 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1349 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1350 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1351 u_errorName(err), wDestLen, buffer[3]);
1352 }
1353
1354 err=U_ZERO_ERROR;
1355 buffer[3]=0x20ac;
1356 wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1357 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1358 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1359 u_errorName(err), wDestLen, buffer[3]);
1360 }
1361
1362 err=U_STRING_NOT_TERMINATED_WARNING;
1363 buffer[3]=0x20ac;
1364 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1365 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1366 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1367 u_errorName(err), wDestLen, buffer[3]);
1368 }
1369
1370 err=U_ZERO_ERROR;
1371 buffer[3]=0x20ac;
1372 wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1373 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1374 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1375 u_errorName(err), wDestLen, buffer[3]);
1376 }
1377 }
1378 #else
1379 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1380 #endif
1381 }
1382
Test_widestrs()1383 static void Test_widestrs()
1384 {
1385 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1386 wchar_t ws[100];
1387 UChar rts[100];
1388 int32_t wcap = UPRV_LENGTHOF(ws);
1389 int32_t wl;
1390 int32_t rtcap = UPRV_LENGTHOF(rts);
1391 int32_t rtl;
1392 wchar_t *wcs;
1393 UChar *cp;
1394 const char *errname;
1395 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1396 int32_t ul = UPRV_LENGTHOF(ustr) -1;
1397 char astr[100];
1398
1399 UErrorCode err;
1400
1401 err = U_ZERO_ERROR;
1402 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1403 if (U_FAILURE(err)) {
1404 errname = u_errorName(err);
1405 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1406 }
1407 if(ul!=wl){
1408 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1409 }
1410 err = U_ZERO_ERROR;
1411 wl = (int32_t)uprv_wcslen(wcs);
1412 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1413 (void)cp; /* Suppress set but not used warning. */
1414 if (U_FAILURE(err)) {
1415 errname = u_errorName(err);
1416 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1417 }
1418 if(wl != rtl){
1419 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1420 }
1421 #else
1422 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1423 #endif
1424 }
1425
1426 static void
Test_WCHART_LongString()1427 Test_WCHART_LongString(){
1428 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1429 UErrorCode status = U_ZERO_ERROR;
1430 const char* testdatapath=loadTestData(&status);
1431 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1432 int32_t strLen =0;
1433 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1434 const UChar* uSrc = str;
1435 int32_t uSrcLen = strLen;
1436 int32_t wDestLen =0, reqLen=0, i=0;
1437 int32_t uDestLen =0;
1438 wchar_t* wDest = NULL;
1439 UChar* uDest = NULL;
1440 UBool failed = FALSE;
1441
1442 log_verbose("Loaded string of %d UChars\n", uSrcLen);
1443
1444 if(U_FAILURE(status)){
1445 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1446 return;
1447 }
1448
1449 /* pre-flight*/
1450 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1451
1452 if(status == U_BUFFER_OVERFLOW_ERROR){
1453 status=U_ZERO_ERROR;
1454 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1455 wDestLen = reqLen+1;
1456 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1457 log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1458 }
1459
1460 {
1461 int j;
1462 for(j=0;j>=0&&j<reqLen;j++) {
1463 if(wDest[j]!=uSrc[j]) {
1464 log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1465 break;
1466 }
1467 }
1468 }
1469
1470 uDestLen = 0;
1471 /* pre-flight */
1472 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1473 if(status == U_BUFFER_OVERFLOW_ERROR){
1474 status =U_ZERO_ERROR;
1475 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1476 u_memset(uDest,0xFFFF,reqLen+1);
1477 uDestLen = reqLen + 1;
1478 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1479 log_verbose("Back to %d UChars\n", reqLen);
1480 }
1481 #if defined(U_WCHAR_IS_UTF16)
1482 log_verbose("U_WCHAR_IS_UTF16\n");
1483 #elif defined(U_WCHAR_IS_UTF32)
1484 log_verbose("U_WCHAR_IS_UTF32\n");
1485 #else
1486 log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1487 #endif
1488
1489 if(reqLen!=uSrcLen) {
1490 log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1491 }
1492
1493 for(i=0; i< uSrcLen; i++){
1494 if(uDest[i] != str[i]){
1495 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1496 failed =TRUE;
1497 }
1498 }
1499
1500 if(U_FAILURE(status)){
1501 failed = TRUE;
1502 }
1503 if(failed){
1504 log_err("u_strToWCS() failed \n");
1505 }
1506 free(wDest);
1507 free(uDest);
1508 /* close the bundle */
1509 ures_close(theBundle);
1510 #else
1511 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1512 #endif
1513 }
1514
Test_strToJavaModifiedUTF8()1515 static void Test_strToJavaModifiedUTF8() {
1516 static const UChar src[]={
1517 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1518 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1519 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1520 0xdbff, 0xdfff,
1521 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1522 };
1523 static const uint8_t expected[]={
1524 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1525 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1526 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1527 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1528 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1529 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1530 };
1531 static const UChar shortSrc[]={
1532 0xe01, 0xe1, 0x61
1533 };
1534 static const uint8_t shortExpected[]={
1535 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1536 };
1537 static const UChar asciiNul[]={
1538 0x61, 0x62, 0x63, 0
1539 };
1540 static const uint8_t asciiNulExpected[]={
1541 0x61, 0x62, 0x63
1542 };
1543 char dest[200];
1544 char *p;
1545 int32_t length, expectedTerminatedLength;
1546 UErrorCode errorCode;
1547
1548 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1549 (const char *)expected);
1550
1551 errorCode=U_ZERO_ERROR;
1552 length=-5;
1553 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1554 src, UPRV_LENGTHOF(src), &errorCode);
1555 if( U_FAILURE(errorCode) || p!=dest ||
1556 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1557 dest[length]!=0
1558 ) {
1559 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1560 }
1561 memset(dest, 0xff, sizeof(dest));
1562 errorCode=U_ZERO_ERROR;
1563 length=-5;
1564 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1565 src, UPRV_LENGTHOF(src), &errorCode);
1566 if( U_FAILURE(errorCode) || p!=dest ||
1567 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1568 dest[UPRV_LENGTHOF(expected)]!=0
1569 ) {
1570 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1571 }
1572 memset(dest, 0xff, sizeof(dest));
1573 errorCode=U_ZERO_ERROR;
1574 length=-5;
1575 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1576 src, UPRV_LENGTHOF(src), &errorCode);
1577 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1578 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1579 dest[length]!=(char)0xff
1580 ) {
1581 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1582 }
1583 memset(dest, 0xff, sizeof(dest));
1584 errorCode=U_ZERO_ERROR;
1585 length=-5;
1586 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1587 if( U_FAILURE(errorCode) || p!=dest ||
1588 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1589 dest[length]!=0
1590 ) {
1591 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1592 }
1593 memset(dest, 0xff, sizeof(dest));
1594 errorCode=U_ZERO_ERROR;
1595 length=-5;
1596 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1597 if( U_FAILURE(errorCode) || p!=dest ||
1598 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1599 dest[expectedTerminatedLength]!=0
1600 ) {
1601 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1602 }
1603 memset(dest, 0xff, sizeof(dest));
1604 errorCode=U_ZERO_ERROR;
1605 length=-5;
1606 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1607 src, UPRV_LENGTHOF(src), &errorCode);
1608 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1609 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1610 ) {
1611 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1612 }
1613 memset(dest, 0xff, sizeof(dest));
1614 errorCode=U_ZERO_ERROR;
1615 length=-5;
1616 p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1617 src, UPRV_LENGTHOF(src), &errorCode);
1618 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1619 length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1620 ) {
1621 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1622 }
1623 memset(dest, 0xff, sizeof(dest));
1624 errorCode=U_ZERO_ERROR;
1625 length=-5;
1626 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1627 shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1628 if( U_FAILURE(errorCode) || p!=dest ||
1629 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1630 dest[length]!=0
1631 ) {
1632 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1633 }
1634 memset(dest, 0xff, sizeof(dest));
1635 errorCode=U_ZERO_ERROR;
1636 length=-5;
1637 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1638 asciiNul, -1, &errorCode);
1639 if( U_FAILURE(errorCode) || p!=dest ||
1640 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1641 dest[length]!=0
1642 ) {
1643 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1644 }
1645 memset(dest, 0xff, sizeof(dest));
1646 errorCode=U_ZERO_ERROR;
1647 length=-5;
1648 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1649 NULL, 0, &errorCode);
1650 if( U_FAILURE(errorCode) || p!=dest ||
1651 length!=0 || dest[0]!=0
1652 ) {
1653 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1654 }
1655
1656 /* illegal arguments */
1657 memset(dest, 0xff, sizeof(dest));
1658 errorCode=U_ZERO_ERROR;
1659 length=-5;
1660 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1661 src, UPRV_LENGTHOF(src), &errorCode);
1662 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1663 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1664 }
1665 memset(dest, 0xff, sizeof(dest));
1666 errorCode=U_ZERO_ERROR;
1667 length=-5;
1668 p=u_strToJavaModifiedUTF8(dest, -1, &length,
1669 src, UPRV_LENGTHOF(src), &errorCode);
1670 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1671 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1672 }
1673 memset(dest, 0xff, sizeof(dest));
1674 errorCode=U_ZERO_ERROR;
1675 length=-5;
1676 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1677 NULL, UPRV_LENGTHOF(src), &errorCode);
1678 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1679 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1680 }
1681 memset(dest, 0xff, sizeof(dest));
1682 errorCode=U_ZERO_ERROR;
1683 length=-5;
1684 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1685 NULL, -1, &errorCode);
1686 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1687 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1688 }
1689 }
1690
Test_strFromJavaModifiedUTF8()1691 static void Test_strFromJavaModifiedUTF8() {
1692 static const uint8_t src[]={
1693 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1694 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1695 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1696 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1697 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1698 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */
1699 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1700 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */
1701 0xe0, 0xb8, 0x8e, 0x6f
1702 };
1703 static const UChar expected[]={
1704 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1705 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1706 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1707 0xdbff, 0xdfff,
1708 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1709 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1710 0x6c, 0xed,
1711 0xe0e, 0x6f
1712 };
1713 static const uint8_t shortSrc[]={
1714 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1715 };
1716 static const UChar shortExpected[]={
1717 0xe01, 0xe1, 0x61
1718 };
1719 static const uint8_t asciiNul[]={
1720 0x61, 0x62, 0x63, 0
1721 };
1722 static const UChar asciiNulExpected[]={
1723 0x61, 0x62, 0x63
1724 };
1725 static const uint8_t invalid[]={
1726 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1727 };
1728 static const UChar invalidExpectedFFFD[]={
1729 0xfffd, 0xfffd, 0xfffd, 0xfffd
1730 };
1731 static const UChar invalidExpected50000[]={
1732 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1733 };
1734 UChar dest[200];
1735 UChar *p;
1736 int32_t length, expectedTerminatedLength;
1737 int32_t numSubstitutions;
1738 UErrorCode errorCode;
1739
1740 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1741
1742 errorCode=U_ZERO_ERROR;
1743 length=numSubstitutions=-5;
1744 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1745 (const char *)src, UPRV_LENGTHOF(src),
1746 0xfffd, &numSubstitutions, &errorCode);
1747 if( U_FAILURE(errorCode) || p!=dest ||
1748 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1749 dest[length]!=0 ||
1750 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1751 ) {
1752 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1753 }
1754 memset(dest, 0xff, sizeof(dest));
1755 errorCode=U_ZERO_ERROR;
1756 length=numSubstitutions=-5;
1757 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1758 (const char *)src, UPRV_LENGTHOF(src),
1759 0xfffd, &numSubstitutions, &errorCode);
1760 if( U_FAILURE(errorCode) || p!=dest ||
1761 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1762 dest[UPRV_LENGTHOF(expected)]!=0 ||
1763 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1764 ) {
1765 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1766 }
1767 memset(dest, 0xff, sizeof(dest));
1768 errorCode=U_ZERO_ERROR;
1769 length=numSubstitutions=-5;
1770 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1771 (const char *)src, UPRV_LENGTHOF(src),
1772 0xfffd, NULL, &errorCode);
1773 if( U_FAILURE(errorCode) || p!=dest ||
1774 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1775 dest[length]!=0
1776 ) {
1777 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1778 }
1779 memset(dest, 0xff, sizeof(dest));
1780 errorCode=U_ZERO_ERROR;
1781 length=numSubstitutions=-5;
1782 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1783 (const char *)src, UPRV_LENGTHOF(src),
1784 0xfffd, &numSubstitutions, &errorCode);
1785 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1786 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1787 dest[length]!=0xffff ||
1788 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1789 ) {
1790 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1791 }
1792 memset(dest, 0xff, sizeof(dest));
1793 errorCode=U_ZERO_ERROR;
1794 length=numSubstitutions=-5;
1795 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1796 (const char *)src, -1,
1797 0xfffd, &numSubstitutions, &errorCode);
1798 if( U_FAILURE(errorCode) || p!=dest ||
1799 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1800 dest[length]!=0 ||
1801 numSubstitutions!=0
1802 ) {
1803 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1804 }
1805 memset(dest, 0xff, sizeof(dest));
1806 errorCode=U_ZERO_ERROR;
1807 length=numSubstitutions=-5;
1808 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1809 (const char *)src, -1,
1810 0xfffd, &numSubstitutions, &errorCode);
1811 if( U_FAILURE(errorCode) || p!=dest ||
1812 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1813 dest[expectedTerminatedLength]!=0 ||
1814 numSubstitutions!=0
1815 ) {
1816 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1817 }
1818 memset(dest, 0xff, sizeof(dest));
1819 errorCode=U_ZERO_ERROR;
1820 length=numSubstitutions=-5;
1821 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1822 (const char *)src, -1,
1823 0xfffd, NULL, &errorCode);
1824 if( U_FAILURE(errorCode) || p!=dest ||
1825 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1826 dest[length]!=0
1827 ) {
1828 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1829 }
1830 memset(dest, 0xff, sizeof(dest));
1831 errorCode=U_ZERO_ERROR;
1832 length=numSubstitutions=-5;
1833 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1834 (const char *)src, UPRV_LENGTHOF(src),
1835 0xfffd, &numSubstitutions, &errorCode);
1836 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1837 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1838 ) {
1839 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1840 }
1841 memset(dest, 0xff, sizeof(dest));
1842 errorCode=U_ZERO_ERROR;
1843 length=numSubstitutions=-5;
1844 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1845 (const char *)src, UPRV_LENGTHOF(src),
1846 0xfffd, &numSubstitutions, &errorCode);
1847 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1848 length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1849 ) {
1850 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1851 }
1852 memset(dest, 0xff, sizeof(dest));
1853 errorCode=U_ZERO_ERROR;
1854 length=numSubstitutions=-5;
1855 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1856 (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1857 0xfffd, &numSubstitutions, &errorCode);
1858 if( U_FAILURE(errorCode) || p!=dest ||
1859 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1860 dest[length]!=0 ||
1861 numSubstitutions!=0
1862 ) {
1863 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1864 }
1865 memset(dest, 0xff, sizeof(dest));
1866 errorCode=U_ZERO_ERROR;
1867 length=numSubstitutions=-5;
1868 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1869 (const char *)asciiNul, -1,
1870 0xfffd, &numSubstitutions, &errorCode);
1871 if( U_FAILURE(errorCode) || p!=dest ||
1872 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1873 dest[length]!=0 ||
1874 numSubstitutions!=0
1875 ) {
1876 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1877 }
1878 memset(dest, 0xff, sizeof(dest));
1879 errorCode=U_ZERO_ERROR;
1880 length=numSubstitutions=-5;
1881 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1882 NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1883 if( U_FAILURE(errorCode) || p!=dest ||
1884 length!=0 || dest[0]!=0 ||
1885 numSubstitutions!=0
1886 ) {
1887 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1888 }
1889 memset(dest, 0xff, sizeof(dest));
1890 errorCode=U_ZERO_ERROR;
1891 length=numSubstitutions=-5;
1892 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1893 (const char *)invalid, UPRV_LENGTHOF(invalid),
1894 0xfffd, &numSubstitutions, &errorCode);
1895 if( U_FAILURE(errorCode) || p!=dest ||
1896 length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1897 dest[length]!=0 ||
1898 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1899 ) {
1900 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1901 }
1902 memset(dest, 0xff, sizeof(dest));
1903 errorCode=U_ZERO_ERROR;
1904 length=numSubstitutions=-5;
1905 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1906 (const char *)invalid, UPRV_LENGTHOF(invalid),
1907 0x50000, &numSubstitutions, &errorCode);
1908 if( U_FAILURE(errorCode) || p!=dest ||
1909 length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1910 dest[length]!=0 ||
1911 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD) /* not ...50000 */
1912 ) {
1913 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1914 }
1915 memset(dest, 0xff, sizeof(dest));
1916 errorCode=U_ZERO_ERROR;
1917 length=numSubstitutions=-5;
1918 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1919 (const char *)invalid, UPRV_LENGTHOF(invalid),
1920 U_SENTINEL, &numSubstitutions, &errorCode);
1921 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1922 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1923 }
1924 memset(dest, 0xff, sizeof(dest));
1925 errorCode=U_ZERO_ERROR;
1926 length=numSubstitutions=-5;
1927 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1928 (const char *)src, UPRV_LENGTHOF(src),
1929 U_SENTINEL, &numSubstitutions, &errorCode);
1930 if( errorCode!=U_INVALID_CHAR_FOUND ||
1931 length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1932 numSubstitutions!=0
1933 ) {
1934 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1935 }
1936
1937 /* illegal arguments */
1938 memset(dest, 0xff, sizeof(dest));
1939 errorCode=U_ZERO_ERROR;
1940 length=numSubstitutions=-5;
1941 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1942 (const char *)src, UPRV_LENGTHOF(src),
1943 0xfffd, &numSubstitutions, &errorCode);
1944 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1945 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1946 }
1947 memset(dest, 0xff, sizeof(dest));
1948 errorCode=U_ZERO_ERROR;
1949 length=numSubstitutions=-5;
1950 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1951 (const char *)src, UPRV_LENGTHOF(src),
1952 0xfffd, &numSubstitutions, &errorCode);
1953 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1954 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1955 }
1956 memset(dest, 0xff, sizeof(dest));
1957 errorCode=U_ZERO_ERROR;
1958 length=numSubstitutions=-5;
1959 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1960 NULL, UPRV_LENGTHOF(src),
1961 0xfffd, &numSubstitutions, &errorCode);
1962 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1963 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1964 }
1965 memset(dest, 0xff, sizeof(dest));
1966 errorCode=U_ZERO_ERROR;
1967 length=numSubstitutions=-5;
1968 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1969 NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1970 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1971 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1972 }
1973 memset(dest, 0xff, sizeof(dest));
1974 errorCode=U_ZERO_ERROR;
1975 length=numSubstitutions=-5;
1976 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1977 (const char *)src, UPRV_LENGTHOF(src),
1978 0x110000, &numSubstitutions, &errorCode);
1979 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1980 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1981 }
1982 memset(dest, 0xff, sizeof(dest));
1983 errorCode=U_ZERO_ERROR;
1984 length=numSubstitutions=-5;
1985 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1986 (const char *)src, UPRV_LENGTHOF(src),
1987 0xdfff, &numSubstitutions, &errorCode);
1988 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1989 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1990 }
1991 }
1992
1993 /* test that string transformation functions permit NULL source pointer when source length==0 */
TestNullEmptySource()1994 static void TestNullEmptySource() {
1995 char dest8[4]={ 3, 3, 3, 3 };
1996 UChar dest16[4]={ 3, 3, 3, 3 };
1997 UChar32 dest32[4]={ 3, 3, 3, 3 };
1998 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1999 wchar_t destW[4]={ 3, 3, 3, 3 };
2000 #endif
2001
2002 int32_t length;
2003 UErrorCode errorCode;
2004
2005 /* u_strFromXyz() */
2006
2007 dest16[0]=3;
2008 length=3;
2009 errorCode=U_ZERO_ERROR;
2010 u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2011 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2012 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2013 }
2014
2015 dest16[0]=3;
2016 length=3;
2017 errorCode=U_ZERO_ERROR;
2018 u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2019 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2020 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2021 }
2022
2023 dest16[0]=3;
2024 length=3;
2025 errorCode=U_ZERO_ERROR;
2026 u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2027 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2028 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2029 }
2030
2031 dest16[0]=3;
2032 length=3;
2033 errorCode=U_ZERO_ERROR;
2034 u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2035 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2036 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2037 }
2038
2039 dest16[0]=3;
2040 length=3;
2041 errorCode=U_ZERO_ERROR;
2042 u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2043 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2044 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2045 }
2046
2047 dest16[0]=3;
2048 length=3;
2049 errorCode=U_ZERO_ERROR;
2050 u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2051 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2052 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2053 }
2054
2055 /* u_strToXyz() */
2056
2057 dest8[0]=3;
2058 length=3;
2059 errorCode=U_ZERO_ERROR;
2060 u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2061 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2062 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2063 }
2064
2065 dest8[0]=3;
2066 length=3;
2067 errorCode=U_ZERO_ERROR;
2068 u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2069 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2070 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2071 }
2072
2073 dest32[0]=3;
2074 length=3;
2075 errorCode=U_ZERO_ERROR;
2076 u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2077 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2078 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2079 }
2080
2081 dest32[0]=3;
2082 length=3;
2083 errorCode=U_ZERO_ERROR;
2084 u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2085 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2086 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2087 }
2088
2089 dest8[0]=3;
2090 length=3;
2091 errorCode=U_ZERO_ERROR;
2092 u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2093 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2094 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2095 }
2096
2097 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2098
2099 dest16[0]=3;
2100 length=3;
2101 errorCode=U_ZERO_ERROR;
2102 u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2103 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2104 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2105 }
2106
2107 destW[0]=3;
2108 length=3;
2109 errorCode=U_ZERO_ERROR;
2110 u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2111 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2112 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2113 }
2114
2115 #endif
2116 }
2117