1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2001-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File custrtrn.C
11 *
12 * Modification History:
13 * Name Description
14 * Ram String transformations test
15 *********************************************************************************
16 */
17 /****************************************************************************/
18
19
20 #include <stdbool.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ures.h"
27 #include "ustr_imp.h"
28 #include "cintltst.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "cwchar.h"
32
33 void addUCharTransformTest(TestNode** root);
34
35 static void Test_strToUTF32(void);
36 static void Test_strToUTF32_surrogates(void);
37 static void Test_strFromUTF32(void);
38 static void Test_strFromUTF32_surrogates(void);
39 static void Test_UChar_UTF8_API(void);
40 static void Test_FromUTF8(void);
41 static void Test_FromUTF8Lenient(void);
42 static void Test_UChar_WCHART_API(void);
43 static void Test_widestrs(void);
44 static void Test_WCHART_LongString(void);
45 static void Test_strToJavaModifiedUTF8(void);
46 static void Test_strFromJavaModifiedUTF8(void);
47 static void TestNullEmptySource(void);
48
49 void
addUCharTransformTest(TestNode ** root)50 addUCharTransformTest(TestNode** root)
51 {
52 addTest(root, &Test_strToUTF32, "custrtrn/Test_strToUTF32");
53 addTest(root, &Test_strToUTF32_surrogates, "custrtrn/Test_strToUTF32_surrogates");
54 addTest(root, &Test_strFromUTF32, "custrtrn/Test_strFromUTF32");
55 addTest(root, &Test_strFromUTF32_surrogates, "custrtrn/Test_strFromUTF32_surrogates");
56 addTest(root, &Test_UChar_UTF8_API, "custrtrn/Test_UChar_UTF8_API");
57 addTest(root, &Test_FromUTF8, "custrtrn/Test_FromUTF8");
58 addTest(root, &Test_FromUTF8Lenient, "custrtrn/Test_FromUTF8Lenient");
59 addTest(root, &Test_UChar_WCHART_API, "custrtrn/Test_UChar_WCHART_API");
60 addTest(root, &Test_widestrs, "custrtrn/Test_widestrs");
61 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
62 addTest(root, &Test_WCHART_LongString, "custrtrn/Test_WCHART_LongString");
63 #endif
64 addTest(root, &Test_strToJavaModifiedUTF8, "custrtrn/Test_strToJavaModifiedUTF8");
65 addTest(root, &Test_strFromJavaModifiedUTF8, "custrtrn/Test_strFromJavaModifiedUTF8");
66 addTest(root, &TestNullEmptySource, "custrtrn/TestNullEmptySource");
67 }
68
69 static const UChar32 src32[]={
70 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
71 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
72 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
73 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
74 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
75 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
76 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
77 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
78 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
79 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
80 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
81 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
82 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
83 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
84 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
85 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
86 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
87 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
88 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
89 /* test non-BMP code points */
90 0x0002A699,
91 0x0002A69C, 0x0002A69D, 0x0002A69E, 0x0002A69F, 0x0002A6A0, 0x0002A6A5, 0x0002A6A6, 0x0002A6A7, 0x0002A6A8, 0x0002A6AB,
92 0x0002A6AC, 0x0002A6AD, 0x0002A6AE, 0x0002A6AF, 0x0002A6B0, 0x0002A6B1, 0x0002A6B3, 0x0002A6B5, 0x0002A6B6, 0x0002A6B7,
93 0x0002A6B8, 0x0002A6B9, 0x0002A6BA, 0x0002A6BB, 0x0002A6BC, 0x0002A6BD, 0x0002A6BE, 0x0002A6BF, 0x0002A6C0, 0x0002A6C1,
94 0x0002A6C2, 0x0002A6C3, 0x0002A6C4, 0x0002A6C8, 0x0002A6CA, 0x0002A6CB, 0x0002A6CD, 0x0002A6CE, 0x0002A6CF, 0x0002A6D0,
95 0x0002A6D1, 0x0002A6D2, 0x0002A6D3, 0x0002A6D4, 0x0002A6D5,
96
97 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
98 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
99 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
100 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
101 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
102 };
103
104 static const UChar src16[] = {
105 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
106 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
107 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
108 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
109 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
110 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
111 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
112 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
113 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
114 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
115 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
116 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
117 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
118 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
119 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
120 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
121 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
122 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
123 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
124
125 /* test non-BMP code points */
126 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
127 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
128 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
129 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
130 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
131 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
132 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
133 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
134 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
135 0xD869, 0xDED5,
136
137 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
138 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
139 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
140 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
141 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,0x0000
142 };
143
144
Test_strToUTF32(void)145 static void Test_strToUTF32(void){
146 UErrorCode err = U_ZERO_ERROR;
147 UChar32 u32Target[400];
148 int32_t u32DestLen;
149 int i= 0;
150
151 /* first with length */
152 u32DestLen = -2;
153 u_strToUTF32(u32Target, 0, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
154 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
155 log_err("u_strToUTF32(preflight with length): "
156 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
157 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
158 return;
159 }
160 err = U_ZERO_ERROR;
161 u32DestLen = -2;
162 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32)+1, &u32DestLen, src16, UPRV_LENGTHOF(src16),&err);
163 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)) {
164 log_err("u_strToUTF32(with length): "
165 "length %ld != %ld and %s != U_ZERO_ERROR\n",
166 (long)u32DestLen, (long)UPRV_LENGTHOF(src32), u_errorName(err));
167 return;
168 }
169 /*for(i=0; i< u32DestLen; i++){
170 printf("0x%08X, ",uTarget[i]);
171 if(i%10==0){
172 printf("\n");
173 }
174 }*/
175 for(i=0; i< UPRV_LENGTHOF(src32); i++){
176 if(u32Target[i] != src32[i]){
177 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src32[i], u32Target[i],i);
178 }
179 }
180 if(u32Target[i] != 0){
181 log_verbose("u_strToUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0, u32Target[i],i);
182 }
183
184 /* now NUL-terminated */
185 u32DestLen = -2;
186 u_strToUTF32(NULL,0, &u32DestLen, src16, -1,&err);
187 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
188 log_err("u_strToUTF32(preflight with NUL-termination): "
189 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
190 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
191 return;
192 }
193 err = U_ZERO_ERROR;
194 u32DestLen = -2;
195 u_strToUTF32(u32Target, UPRV_LENGTHOF(src32), &u32DestLen, src16, -1,&err);
196 if(err != U_ZERO_ERROR || u32DestLen != UPRV_LENGTHOF(src32)-1) {
197 log_err("u_strToUTF32(with NUL-termination): "
198 "length %ld != %ld and %s != U_ZERO_ERROR\n",
199 (long)u32DestLen, (long)UPRV_LENGTHOF(src32)-1, u_errorName(err));
200 return;
201 }
202
203 for(i=0; i< UPRV_LENGTHOF(src32); i++){
204 if(u32Target[i] != src32[i]){
205 log_verbose("u_strToUTF32(NUL-termination) failed expected: %04X got: %04X \n", src32[i], u32Target[i]);
206 }
207 }
208 }
209
210 /* test unpaired surrogates */
Test_strToUTF32_surrogates()211 static void Test_strToUTF32_surrogates() {
212 UErrorCode err = U_ZERO_ERROR;
213 UChar32 u32Target[400];
214 int32_t len16, u32DestLen;
215 int32_t numSubstitutions;
216 int i;
217
218 static const UChar surr16[] = { 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
219 static const UChar32 expected[] = { 0x5a, 0x50000, 0x7a, 0 };
220 static const UChar32 expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0 };
221 static const UChar32 expected_12345[] = { 0x41, 0x12345, 0x61, 0x12345, 0x5a, 0x50000, 0x7a, 0 };
222 len16 = UPRV_LENGTHOF(surr16);
223 for(i = 0; i < 4; ++i) {
224 err = U_ZERO_ERROR;
225 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+i, len16-i, &err);
226 if(err != U_INVALID_CHAR_FOUND) {
227 log_err("u_strToUTF32(preflight surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
228 (long)i, u_errorName(err));
229 return;
230 }
231
232 err = U_ZERO_ERROR;
233 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, len16-i, &err);
234 if(err != U_INVALID_CHAR_FOUND) {
235 log_err("u_strToUTF32(surr16+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
236 (long)i, u_errorName(err));
237 return;
238 }
239
240 err = U_ZERO_ERROR;
241 u_strToUTF32(NULL, 0, &u32DestLen, surr16+i, -1, &err);
242 if(err != U_INVALID_CHAR_FOUND) {
243 log_err("u_strToUTF32(preflight surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
244 (long)i, u_errorName(err));
245 return;
246 }
247
248 err = U_ZERO_ERROR;
249 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+i, -1, &err);
250 if(err != U_INVALID_CHAR_FOUND) {
251 log_err("u_strToUTF32(surr16+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
252 (long)i, u_errorName(err));
253 return;
254 }
255 }
256
257 err = U_ZERO_ERROR;
258 u_strToUTF32(u32Target, 0, &u32DestLen, surr16+4, len16-4-1, &err);
259 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
260 log_err("u_strToUTF32(preflight surr16+4) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
261 u_errorName(err));
262 return;
263 }
264
265 err = U_ZERO_ERROR;
266 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, len16-4-1, &err);
267 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
268 log_err("u_strToUTF32(surr16+4) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
269 u_errorName(err));
270 return;
271 }
272
273 err = U_ZERO_ERROR;
274 u_strToUTF32(NULL, 0, &u32DestLen, surr16+4, -1, &err);
275 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 3) {
276 log_err("u_strToUTF32(preflight surr16+4/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
277 u_errorName(err));
278 return;
279 }
280
281 err = U_ZERO_ERROR;
282 u_strToUTF32(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16+4, -1, &err);
283 if(err != U_ZERO_ERROR || u32DestLen != 3 || uprv_memcmp(u32Target, expected, 4*4)) {
284 log_err("u_strToUTF32(surr16+4/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
285 u_errorName(err));
286 return;
287 }
288
289 /* with substitution character */
290 numSubstitutions = -1;
291 err = U_ZERO_ERROR;
292 u_strToUTF32WithSub(u32Target, 0, &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
293 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
294 log_err("u_strToUTF32WithSub(preflight surr16) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
295 u_errorName(err));
296 return;
297 }
298
299 err = U_ZERO_ERROR;
300 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, len16-1, 0xfffd, &numSubstitutions, &err);
301 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_FFFD, 8*4)) {
302 log_err("u_strToUTF32WithSub(surr16) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
303 u_errorName(err));
304 return;
305 }
306
307 err = U_ZERO_ERROR;
308 u_strToUTF32WithSub(NULL, 0, &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
309 if(err != U_BUFFER_OVERFLOW_ERROR || u32DestLen != 7 || numSubstitutions != 2) {
310 log_err("u_strToUTF32WithSub(preflight surr16/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
311 u_errorName(err));
312 return;
313 }
314
315 err = U_ZERO_ERROR;
316 u_strToUTF32WithSub(u32Target, UPRV_LENGTHOF(u32Target), &u32DestLen, surr16, -1, 0x12345, &numSubstitutions, &err);
317 if(err != U_ZERO_ERROR || u32DestLen != 7 || numSubstitutions != 2 || uprv_memcmp(u32Target, expected_12345, 8*4)) {
318 log_err("u_strToUTF32WithSub(surr16/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
319 u_errorName(err));
320 return;
321 }
322 }
323
Test_strFromUTF32(void)324 static void Test_strFromUTF32(void){
325 UErrorCode err = U_ZERO_ERROR;
326 UChar uTarget[400];
327 int32_t uDestLen;
328 int i= 0;
329
330 /* first with length */
331 uDestLen = -2;
332 u_strFromUTF32(uTarget,0,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
333 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
334 log_err("u_strFromUTF32(preflight with length): "
335 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
336 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
337 return;
338 }
339 err = U_ZERO_ERROR;
340 uDestLen = -2;
341 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16)+1,&uDestLen,src32,UPRV_LENGTHOF(src32),&err);
342 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)) {
343 log_err("u_strFromUTF32(with length): "
344 "length %ld != %ld and %s != U_ZERO_ERROR\n",
345 (long)uDestLen, (long)UPRV_LENGTHOF(src16), u_errorName(err));
346 return;
347 }
348 /*for(i=0; i< uDestLen; i++){
349 printf("0x%04X, ",uTarget[i]);
350 if(i%10==0){
351 printf("\n");
352 }
353 }*/
354
355 for(i=0; i< uDestLen; i++){
356 if(uTarget[i] != src16[i]){
357 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", src16[i] ,uTarget[i],i);
358 }
359 }
360 if(uTarget[i] != 0){
361 log_verbose("u_strFromUTF32(with length) failed expected: %04X got: %04X at index: %i \n", 0,uTarget[i],i);
362 }
363
364 /* now NUL-terminated */
365 uDestLen = -2;
366 u_strFromUTF32(NULL,0,&uDestLen,src32,-1,&err);
367 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
368 log_err("u_strFromUTF32(preflight with NUL-termination): "
369 "length %ld != %ld and %s != U_BUFFER_OVERFLOW_ERROR\n",
370 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
371 return;
372 }
373 err = U_ZERO_ERROR;
374 uDestLen = -2;
375 u_strFromUTF32(uTarget, UPRV_LENGTHOF(src16),&uDestLen,src32,-1,&err);
376 if(err != U_ZERO_ERROR || uDestLen != UPRV_LENGTHOF(src16)-1) {
377 log_err("u_strFromUTF32(with NUL-termination): "
378 "length %ld != %ld and %s != U_ZERO_ERROR\n",
379 (long)uDestLen, (long)UPRV_LENGTHOF(src16)-1, u_errorName(err));
380 return;
381 }
382
383 for(i=0; i< uDestLen; i++){
384 if(uTarget[i] != src16[i]){
385 log_verbose("u_strFromUTF32(with NUL-termination) failed expected: %04X got: %04X \n", src16[i] ,uTarget[i]);
386 }
387 }
388 }
389
390 /* test surrogate code points */
Test_strFromUTF32_surrogates()391 static void Test_strFromUTF32_surrogates() {
392 UErrorCode err = U_ZERO_ERROR;
393 UChar uTarget[400];
394 int32_t len32, uDestLen;
395 int32_t numSubstitutions;
396 int i;
397
398 static const UChar32 surr32[] = { 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a, 0 };
399 static const UChar expected[] = { 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
400 static const UChar expected_FFFD[] = { 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
401 static const UChar expected_12345[] = { 0x41, 0xd808, 0xdf45, 0x61, 0xd808, 0xdf45, 0xd808, 0xdf45, 0xd808, 0xdf45,
402 0x5a, 0xd900, 0xdc00, 0x7a, 0 };
403 len32 = UPRV_LENGTHOF(surr32);
404 for(i = 0; i < 6; ++i) {
405 err = U_ZERO_ERROR;
406 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+i, len32-i, &err);
407 if(err != U_INVALID_CHAR_FOUND) {
408 log_err("u_strFromUTF32(preflight surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
409 (long)i, u_errorName(err));
410 return;
411 }
412
413 err = U_ZERO_ERROR;
414 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, len32-i, &err);
415 if(err != U_INVALID_CHAR_FOUND) {
416 log_err("u_strFromUTF32(surr32+%ld) sets %s != U_INVALID_CHAR_FOUND\n",
417 (long)i, u_errorName(err));
418 return;
419 }
420
421 err = U_ZERO_ERROR;
422 u_strFromUTF32(NULL, 0, &uDestLen, surr32+i, -1, &err);
423 if(err != U_INVALID_CHAR_FOUND) {
424 log_err("u_strFromUTF32(preflight surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
425 (long)i, u_errorName(err));
426 return;
427 }
428
429 err = U_ZERO_ERROR;
430 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+i, -1, &err);
431 if(err != U_INVALID_CHAR_FOUND) {
432 log_err("u_strFromUTF32(surr32+%ld/NUL) sets %s != U_INVALID_CHAR_FOUND\n",
433 (long)i, u_errorName(err));
434 return;
435 }
436 }
437
438 err = U_ZERO_ERROR;
439 u_strFromUTF32(uTarget, 0, &uDestLen, surr32+6, len32-6-1, &err);
440 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
441 log_err("u_strFromUTF32(preflight surr32+6) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
442 u_errorName(err));
443 return;
444 }
445
446 err = U_ZERO_ERROR;
447 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, len32-6-1, &err);
448 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
449 log_err("u_strFromUTF32(surr32+6) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
450 u_errorName(err));
451 return;
452 }
453
454 err = U_ZERO_ERROR;
455 u_strFromUTF32(NULL, 0, &uDestLen, surr32+6, -1, &err);
456 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 4) {
457 log_err("u_strFromUTF32(preflight surr32+6/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
458 u_errorName(err));
459 return;
460 }
461
462 err = U_ZERO_ERROR;
463 u_strFromUTF32(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32+6, -1, &err);
464 if(err != U_ZERO_ERROR || uDestLen != 4 || u_memcmp(uTarget, expected, 5)) {
465 log_err("u_strFromUTF32(surr32+6/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
466 u_errorName(err));
467 return;
468 }
469
470 /* with substitution character */
471 numSubstitutions = -1;
472 err = U_ZERO_ERROR;
473 u_strFromUTF32WithSub(uTarget, 0, &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
474 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 10 || numSubstitutions != 4) {
475 log_err("u_strFromUTF32WithSub(preflight surr32) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
476 u_errorName(err));
477 return;
478 }
479
480 err = U_ZERO_ERROR;
481 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, len32-1, 0xfffd, &numSubstitutions, &err);
482 if(err != U_ZERO_ERROR || uDestLen != 10 || numSubstitutions != 4 || u_memcmp(uTarget, expected_FFFD, 11)) {
483 log_err("u_strFromUTF32WithSub(surr32) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
484 u_errorName(err));
485 return;
486 }
487
488 err = U_ZERO_ERROR;
489 u_strFromUTF32WithSub(NULL, 0, &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
490 if(err != U_BUFFER_OVERFLOW_ERROR || uDestLen != 14 || numSubstitutions != 4) {
491 log_err("u_strFromUTF32WithSub(preflight surr32/NUL) sets %s != U_BUFFER_OVERFLOW_ERROR or an unexpected length\n",
492 u_errorName(err));
493 return;
494 }
495
496 err = U_ZERO_ERROR;
497 u_strFromUTF32WithSub(uTarget, UPRV_LENGTHOF(uTarget), &uDestLen, surr32, -1, 0x12345, &numSubstitutions, &err);
498 if(err != U_ZERO_ERROR || uDestLen != 14 || numSubstitutions != 4 || u_memcmp(uTarget, expected_12345, 15)) {
499 log_err("u_strFromUTF32WithSub(surr32/NUL) sets %s != U_ZERO_ERROR or does not produce the expected string\n",
500 u_errorName(err));
501 return;
502 }
503 }
504
Test_UChar_UTF8_API(void)505 static void Test_UChar_UTF8_API(void){
506
507 UErrorCode err = U_ZERO_ERROR;
508 UChar uTemp[1];
509 char u8Temp[1];
510 UChar* uTarget=uTemp;
511 const char* u8Src;
512 int32_t u8SrcLen = 0;
513 int32_t uTargetLength = 0;
514 int32_t uDestLen=0;
515 const UChar* uSrc = src16;
516 int32_t uSrcLen = sizeof(src16)/2;
517 char* u8Target = u8Temp;
518 int32_t u8TargetLength =0;
519 int32_t u8DestLen =0;
520 UBool failed = false;
521 int i= 0;
522 int32_t numSubstitutions;
523
524 {
525 /* preflight */
526 u8Temp[0] = 0x12;
527 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
528 if(err == U_BUFFER_OVERFLOW_ERROR && u8Temp[0] == 0x12){
529 err = U_ZERO_ERROR;
530 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
531 u8TargetLength = u8DestLen;
532
533 u8Target[u8TargetLength] = (char)0xfe;
534 u8DestLen = -1;
535 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
536 if(U_FAILURE(err) || u8DestLen != u8TargetLength || u8Target[u8TargetLength] != (char)0xfe){
537 log_err("u_strToUTF8 failed after preflight. Error: %s\n", u_errorName(err));
538 return;
539 }
540
541 }
542 else {
543 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
544 }
545 failed = false;
546 /*for(i=0; i< u8DestLen; i++){
547 printf("0x%04X, ",u8Target[i]);
548 if(i%10==0){
549 printf("\n");
550 }
551 }*/
552 /*for(i=0; i< u8DestLen; i++){
553 if(u8Target[i] != src8[i]){
554 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
555 failed =true;
556 }
557 }
558 if(failed){
559 log_err("u_strToUTF8() failed \n");
560 }*/
561 u8Src = u8Target;
562 u8SrcLen = u8DestLen;
563
564 /* preflight */
565 uTemp[0] = 0x1234;
566 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
567 if(err == U_BUFFER_OVERFLOW_ERROR && uTemp[0] == 0x1234){
568 err = U_ZERO_ERROR;
569 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
570 uTargetLength = uDestLen;
571
572 uTarget[uTargetLength] = 0xfff0;
573 uDestLen = -1;
574 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
575 }
576 else {
577 log_err("error: u_strFromUTF8(preflight) should have gotten U_BUFFER_OVERFLOW_ERROR\n");
578 }
579 /*for(i=0; i< uDestLen; i++){
580 printf("0x%04X, ",uTarget[i]);
581 if(i%10==0){
582 printf("\n");
583 }
584 }*/
585
586 if(U_FAILURE(err) || uDestLen != uTargetLength || uTarget[uTargetLength] != 0xfff0) {
587 failed = true;
588 }
589 for(i=0; i< uSrcLen; i++){
590 if(uTarget[i] != src16[i]){
591 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
592 failed =true;
593 }
594 }
595 if(failed){
596 log_err("error: u_strFromUTF8(after preflighting) failed\n");
597 }
598
599 free(u8Target);
600 free(uTarget);
601 }
602 {
603 u8SrcLen = -1;
604 uTargetLength = 0;
605 uSrcLen =-1;
606 u8TargetLength=0;
607 failed = false;
608 /* preflight */
609 u_strToUTF8(NULL,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
610 if(err == U_BUFFER_OVERFLOW_ERROR){
611 err = U_ZERO_ERROR;
612 u8Target = (char*) malloc (sizeof(uint8_t) * (u8DestLen+1));
613 u8TargetLength = u8DestLen;
614
615 u_strToUTF8(u8Target,u8TargetLength, &u8DestLen, uSrc, uSrcLen,&err);
616
617 }
618 else {
619 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
620 }
621 failed = false;
622 /*for(i=0; i< u8DestLen; i++){
623 printf("0x%04X, ",u8Target[i]);
624 if(i%10==0){
625 printf("\n");
626 }
627 }*/
628 /*for(i=0; i< u8DestLen; i++){
629 if(u8Target[i] != src8[i]){
630 log_verbose("u_strToUTF8() failed expected: %04X got: %04X \n", src8[i], u8Target[i]);
631 failed =true;
632 }
633 }
634 if(failed){
635 log_err("u_strToUTF8() failed \n");
636 }*/
637 u8Src = u8Target;
638 u8SrcLen = u8DestLen;
639
640 /* preflight */
641 u_strFromUTF8(NULL,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
642 if(err == U_BUFFER_OVERFLOW_ERROR){
643 err = U_ZERO_ERROR;
644 uTarget = (UChar*) malloc( sizeof(UChar) * (uDestLen+1));
645 uTargetLength = uDestLen;
646
647 u_strFromUTF8(uTarget,uTargetLength,&uDestLen,u8Src,u8SrcLen,&err);
648 }
649 else {
650 log_err("Should have gotten U_BUFFER_OVERFLOW_ERROR");
651 }
652 /*for(i=0; i< uDestLen; i++){
653 printf("0x%04X, ",uTarget[i]);
654 if(i%10==0){
655 printf("\n");
656 }
657 }*/
658
659 for(i=0; i< uSrcLen; i++){
660 if(uTarget[i] != src16[i]){
661 log_verbose("u_strFromUTF8() failed expected: \\u%04X got: \\u%04X at index: %i \n", src16[i] ,uTarget[i],i);
662 failed =true;
663 }
664 }
665 if(failed){
666 log_err("u_strToUTF8() failed \n");
667 }
668
669 free(u8Target);
670 free(uTarget);
671 }
672
673 /* test UTF-8 with single surrogates - illegal in Unicode 3.2 */
674 // Since ICU 60, each surrogate byte sequence is treated as 3 single-byte errors.
675 {
676 static const UChar
677 withLead16[]={ 0x1800, 0xd89a, 0x0061 },
678 withTrail16[]={ 0x1800, 0xdcba, 0x0061, 0 },
679 withTrail16SubFFFD[]={ 0x1800, 0xfffd, 0xfffd, 0xfffd, 0x0061, 0 }, /* sub==U+FFFD */
680 withTrail16Sub50005[]={ 0x1800, 0xd900, 0xdc05, 0xd900, 0xdc05, 0xd900, 0xdc05, 0x0061, 0 }; /* sub==U+50005 */
681 static const uint8_t
682 withLead8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xa2, 0x9a, 0x61 },
683 withTrail8[]={ 0xe1, 0xa0, 0x80, 0xed, 0xb2, 0xba, 0x61, 0 },
684 withTrail8Sub1A[]={ 0xe1, 0xa0, 0x80, 0x1a, 0x61, 0 }, /* sub==U+001A */
685 withTrail8SubFFFD[]={ 0xe1, 0xa0, 0x80, 0xef, 0xbf, 0xbd, 0x61, 0 }; /* sub==U+FFFD */
686 UChar out16[10];
687 char out8[10];
688
689 if(
690 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withLead16, UPRV_LENGTHOF(withLead16), &err), err!=U_INVALID_CHAR_FOUND) ||
691 (err=U_ZERO_ERROR, u_strToUTF8(out8, UPRV_LENGTHOF(out8), NULL, withTrail16, -1, &err), err!=U_INVALID_CHAR_FOUND) ||
692 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withLead8, UPRV_LENGTHOF(withLead8), &err), err!=U_INVALID_CHAR_FOUND) ||
693 (err=U_ZERO_ERROR, u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, (const char *)withTrail8, -1, &err), err!=U_INVALID_CHAR_FOUND)
694 ) {
695 log_err("error: u_strTo/FromUTF8(string with single surrogate) fails to report error\n");
696 }
697
698 /* test error handling with substitution characters */
699
700 /* from UTF-8 with length */
701 err=U_ZERO_ERROR;
702 numSubstitutions=-1;
703 out16[0]=0x55aa;
704 uDestLen=0;
705 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
706 (const char *)withTrail8, (int32_t)uprv_strlen((const char *)withTrail8),
707 0x50005, &numSubstitutions,
708 &err);
709 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16Sub50005) ||
710 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen+1) ||
711 numSubstitutions!=3) {
712 log_err("error: u_strFromUTF8WithSub(length) failed\n");
713 }
714
715 /* from UTF-8 with NUL termination */
716 err=U_ZERO_ERROR;
717 numSubstitutions=-1;
718 out16[0]=0x55aa;
719 uDestLen=0;
720 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
721 (const char *)withTrail8, -1,
722 0xfffd, &numSubstitutions,
723 &err);
724 if(U_FAILURE(err) || uDestLen!=u_strlen(withTrail16SubFFFD) ||
725 0!=u_memcmp(withTrail16SubFFFD, out16, uDestLen+1) ||
726 numSubstitutions!=3) {
727 log_err("error: u_strFromUTF8WithSub(NUL termination) failed\n");
728 }
729
730 /* preflight from UTF-8 with NUL termination */
731 err=U_ZERO_ERROR;
732 numSubstitutions=-1;
733 out16[0]=0x55aa;
734 uDestLen=0;
735 u_strFromUTF8WithSub(out16, 1, &uDestLen,
736 (const char *)withTrail8, -1,
737 0x50005, &numSubstitutions,
738 &err);
739 if(err!=U_BUFFER_OVERFLOW_ERROR || uDestLen!=u_strlen(withTrail16Sub50005) || numSubstitutions!=3) {
740 log_err("error: u_strFromUTF8WithSub(preflight/NUL termination) failed\n");
741 }
742
743 /* to UTF-8 with length */
744 err=U_ZERO_ERROR;
745 numSubstitutions=-1;
746 out8[0]=(char)0xf5;
747 u8DestLen=0;
748 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
749 withTrail16, u_strlen(withTrail16),
750 0xfffd, &numSubstitutions,
751 &err);
752 if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
753 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen+1) ||
754 numSubstitutions!=1) {
755 log_err("error: u_strToUTF8WithSub(length) failed\n");
756 }
757
758 /* to UTF-8 with NUL termination */
759 err=U_ZERO_ERROR;
760 numSubstitutions=-1;
761 out8[0]=(char)0xf5;
762 u8DestLen=0;
763 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
764 withTrail16, -1,
765 0x1a, &numSubstitutions,
766 &err);
767 if(U_FAILURE(err) || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8Sub1A) ||
768 0!=uprv_memcmp((const char *)withTrail8Sub1A, out8, u8DestLen+1) ||
769 numSubstitutions!=1) {
770 log_err("error: u_strToUTF8WithSub(NUL termination) failed\n");
771 }
772
773 /* preflight to UTF-8 with NUL termination */
774 err=U_ZERO_ERROR;
775 numSubstitutions=-1;
776 out8[0]=(char)0xf5;
777 u8DestLen=0;
778 u_strToUTF8WithSub(out8, 1, &u8DestLen,
779 withTrail16, -1,
780 0xfffd, &numSubstitutions,
781 &err);
782 if(err!=U_BUFFER_OVERFLOW_ERROR || u8DestLen!=(int32_t)uprv_strlen((const char *)withTrail8SubFFFD) ||
783 numSubstitutions!=1) {
784 log_err("error: u_strToUTF8WithSub(preflight/NUL termination) failed\n");
785 }
786
787 /* test that numSubstitutions==0 if there are no substitutions */
788
789 /* from UTF-8 with length (just first 3 bytes which are valid) */
790 err=U_ZERO_ERROR;
791 numSubstitutions=-1;
792 out16[0]=0x55aa;
793 uDestLen=0;
794 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
795 (const char *)withTrail8, 3,
796 0x50005, &numSubstitutions,
797 &err);
798 if(U_FAILURE(err) || uDestLen!=1 ||
799 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
800 numSubstitutions!=0) {
801 log_err("error: u_strFromUTF8WithSub(no subs) failed\n");
802 }
803
804 /* to UTF-8 with length (just first UChar which is valid) */
805 err=U_ZERO_ERROR;
806 numSubstitutions=-1;
807 out8[0]=(char)0xf5;
808 u8DestLen=0;
809 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
810 withTrail16, 1,
811 0xfffd, &numSubstitutions,
812 &err);
813 if(U_FAILURE(err) || u8DestLen!=3 ||
814 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
815 numSubstitutions!=0) {
816 log_err("error: u_strToUTF8WithSub(no subs) failed\n");
817 }
818
819 /* test that numSubstitutions==0 if subchar==U_SENTINEL (no subchar) */
820
821 /* from UTF-8 with length (just first 3 bytes which are valid) */
822 err=U_ZERO_ERROR;
823 numSubstitutions=-1;
824 out16[0]=0x55aa;
825 uDestLen=0;
826 u_strFromUTF8WithSub(out16, UPRV_LENGTHOF(out16), &uDestLen,
827 (const char *)withTrail8, 3,
828 U_SENTINEL, &numSubstitutions,
829 &err);
830 if(U_FAILURE(err) || uDestLen!=1 ||
831 0!=u_memcmp(withTrail16Sub50005, out16, uDestLen) ||
832 numSubstitutions!=0) {
833 log_err("error: u_strFromUTF8WithSub(no subchar) failed\n");
834 }
835
836 /* to UTF-8 with length (just first UChar which is valid) */
837 err=U_ZERO_ERROR;
838 numSubstitutions=-1;
839 out8[0]=(char)0xf5;
840 u8DestLen=0;
841 u_strToUTF8WithSub(out8, UPRV_LENGTHOF(out8), &u8DestLen,
842 withTrail16, 1,
843 U_SENTINEL, &numSubstitutions,
844 &err);
845 if(U_FAILURE(err) || u8DestLen!=3 ||
846 0!=uprv_memcmp((const char *)withTrail8SubFFFD, out8, u8DestLen) ||
847 numSubstitutions!=0) {
848 log_err("error: u_strToUTF8WithSub(no subchar) failed\n");
849 }
850 }
851 {
852 /*
853 * Test with an illegal lead byte that would be followed by more than 3 trail bytes.
854 * See ticket #10371.
855 */
856 static const char src[1]={ (char)0xf8 };
857 UChar out16[10];
858 err=U_ZERO_ERROR;
859 u_strFromUTF8(out16, UPRV_LENGTHOF(out16), NULL, src, 1, &err);
860 if(err!=U_INVALID_CHAR_FOUND) {
861 log_err("error: u_strFromUTF8(5-byte lead byte) failed\n");
862 }
863 }
864 }
865
866 /* compare if two strings are equal, but match 0xfffd in the second string with anything in the first */
867 static UBool
equalAnyFFFD(const UChar * s,const UChar * t,int32_t length)868 equalAnyFFFD(const UChar *s, const UChar *t, int32_t length) {
869 UChar c1, c2;
870
871 while(length>0) {
872 c1=*s++;
873 c2=*t++;
874 if(c1!=c2 && c2!=0xfffd) {
875 return false;
876 }
877 --length;
878 }
879 return true;
880 }
881
882 /* test u_strFromUTF8Lenient() */
883 static void
Test_FromUTF8(void)884 Test_FromUTF8(void) {
885 /*
886 * Test case from icu-support list 20071130 "u_strFromUTF8() returns U_INVALID_CHAR_FOUND(10)"
887 */
888 static const uint8_t bytes[]={ 0xe0, 0xa5, 0x9c, 0 };
889 UChar dest[64];
890 UChar *destPointer;
891 int32_t destLength;
892 UErrorCode errorCode;
893
894 /* 3 bytes input, one UChar output (U+095C) */
895 errorCode=U_ZERO_ERROR;
896 destLength=-99;
897 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 3, &errorCode);
898 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
899 log_err("error: u_strFromUTF8(preflight srcLength=3) fails: destLength=%ld - %s\n",
900 (long)destLength, u_errorName(errorCode));
901 }
902
903 /* 4 bytes input, two UChars output (U+095C U+0000) */
904 errorCode=U_ZERO_ERROR;
905 destLength=-99;
906 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, 4, &errorCode);
907 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=2) {
908 log_err("error: u_strFromUTF8(preflight srcLength=4) fails: destLength=%ld - %s\n",
909 (long)destLength, u_errorName(errorCode));
910 }
911
912 /* NUL-terminated 3 bytes input, one UChar output (U+095C) */
913 errorCode=U_ZERO_ERROR;
914 destLength=-99;
915 destPointer=u_strFromUTF8(NULL, 0, &destLength, (const char *)bytes, -1, &errorCode);
916 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || destPointer!=NULL || destLength!=1) {
917 log_err("error: u_strFromUTF8(preflight srcLength=-1) fails: destLength=%ld - %s\n",
918 (long)destLength, u_errorName(errorCode));
919 }
920
921 /* 3 bytes input, one UChar output (U+095C), transform not just preflight */
922 errorCode=U_ZERO_ERROR;
923 dest[0]=dest[1]=99;
924 destLength=-99;
925 destPointer=u_strFromUTF8(dest, UPRV_LENGTHOF(dest), &destLength, (const char *)bytes, 3, &errorCode);
926 if(U_FAILURE(errorCode) || destPointer!=dest || destLength!=1 || dest[0]!=0x95c || dest[1]!=0) {
927 log_err("error: u_strFromUTF8(transform srcLength=3) fails: destLength=%ld - %s\n",
928 (long)destLength, u_errorName(errorCode));
929 }
930 }
931
932 /* test u_strFromUTF8Lenient() */
933 static void
Test_FromUTF8Lenient(void)934 Test_FromUTF8Lenient(void) {
935 /*
936 * Multiple input strings, each NUL-terminated.
937 * Terminate with a string starting with 0xff.
938 */
939 static const uint8_t bytes[]={
940 /* well-formed UTF-8 */
941 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0xf0, 0xa0, 0x80, 0x80,
942 0x62, 0xc3, 0xa0, 0xe0, 0xa0, 0x81, 0xf0, 0xa0, 0x80, 0x81, 0,
943
944 /* various malformed sequences */
945 0xc3, 0xc3, 0x9f, 0xc3, 0xa0, 0xe0, 0x80, 0x8a, 0xf0, 0x41, 0x42, 0x43, 0,
946
947 /* truncated input */
948 0xc3, 0,
949 0xe0, 0,
950 0xe0, 0xa0, 0,
951 0xf0, 0,
952 0xf0, 0x90, 0,
953 0xf0, 0x90, 0x80, 0,
954
955 /* non-ASCII characters in the last few bytes */
956 0x61, 0xc3, 0x9f, 0xe0, 0xa0, 0x80, 0,
957 0x61, 0xe0, 0xa0, 0x80, 0xc3, 0x9f, 0,
958
959 /* empty string */
960 0,
961
962 /* finish */
963 0xff, 0
964 };
965
966 /* Multiple output strings, each NUL-terminated. 0xfffd matches anything. */
967 static const UChar uchars[]={
968 0x61, 0xdf, 0x800, 0xd840, 0xdc00,
969 0x62, 0xe0, 0x801, 0xd840, 0xdc01, 0,
970
971 0xfffd, 0x9f, 0xe0, 0xa, 0xfffd, 0xfffd, 0,
972
973 0xfffd, 0,
974 0xfffd, 0,
975 0xfffd, 0,
976 0xfffd, 0,
977 0xfffd, 0,
978 0xfffd, 0,
979
980 0x61, 0xdf, 0x800, 0,
981 0x61, 0x800, 0xdf, 0,
982
983 0,
984
985 0
986 };
987
988 UChar dest[64];
989 const char *pb;
990 const UChar *pu, *pDest;
991 int32_t srcLength, destLength0, destLength;
992 int number;
993 UErrorCode errorCode;
994
995 /* verify checking for some illegal arguments */
996 dest[0]=0x1234;
997 destLength=-1;
998 errorCode=U_ZERO_ERROR;
999 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, NULL, -1, &errorCode);
1000 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0x1234) {
1001 log_err("u_strFromUTF8Lenient(src=NULL) failed\n");
1002 }
1003
1004 dest[0]=0x1234;
1005 destLength=-1;
1006 errorCode=U_ZERO_ERROR;
1007 pDest=u_strFromUTF8Lenient(NULL, 1, &destLength, (const char *)bytes, -1, &errorCode);
1008 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
1009 log_err("u_strFromUTF8Lenient(dest=NULL[1]) failed\n");
1010 }
1011
1012 dest[0]=0x1234;
1013 destLength=-1;
1014 errorCode=U_MEMORY_ALLOCATION_ERROR;
1015 pDest=u_strFromUTF8Lenient(dest, 1, &destLength, (const char *)bytes, -1, &errorCode);
1016 if(errorCode!=U_MEMORY_ALLOCATION_ERROR || dest[0]!=0x1234) {
1017 log_err("u_strFromUTF8Lenient(U_MEMORY_ALLOCATION_ERROR) failed\n");
1018 }
1019
1020 /* test normal behavior */
1021 number=0; /* string number for log_err() */
1022
1023 for(pb=(const char *)bytes, pu=uchars;
1024 *pb!=(char)0xff;
1025 pb+=srcLength+1, pu+=destLength0+1, ++number
1026 ) {
1027 srcLength=(int32_t)uprv_strlen(pb);
1028 destLength0=u_strlen(pu);
1029
1030 /* preflighting with NUL-termination */
1031 dest[0]=0x1234;
1032 destLength=-1;
1033 errorCode=U_ZERO_ERROR;
1034 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, -1, &errorCode);
1035 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1036 pDest!=NULL || dest[0]!=0x1234 || destLength!=destLength0
1037 ) {
1038 log_err("u_strFromUTF8Lenient(%d preflighting with NUL-termination) failed\n", number);
1039 }
1040
1041 /* preflighting/some capacity with NUL-termination */
1042 if(srcLength>0) {
1043 dest[destLength0-1]=0x1234;
1044 destLength=-1;
1045 errorCode=U_ZERO_ERROR;
1046 pDest=u_strFromUTF8Lenient(dest, destLength0-1, &destLength, pb, -1, &errorCode);
1047 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1048 dest[destLength0-1]!=0x1234 || destLength!=destLength0
1049 ) {
1050 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with NUL-termination) failed\n", number);
1051 }
1052 }
1053
1054 /* conversion with NUL-termination, much capacity */
1055 dest[0]=dest[destLength0]=0x1234;
1056 destLength=-1;
1057 errorCode=U_ZERO_ERROR;
1058 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, -1, &errorCode);
1059 if (errorCode!=U_ZERO_ERROR ||
1060 pDest!=dest || dest[destLength0]!=0 ||
1061 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1062 ) {
1063 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, much capacity) failed\n", number);
1064 }
1065
1066 /* conversion with NUL-termination, exact capacity */
1067 dest[0]=dest[destLength0]=0x1234;
1068 destLength=-1;
1069 errorCode=U_ZERO_ERROR;
1070 pDest=u_strFromUTF8Lenient(dest, destLength0, &destLength, pb, -1, &errorCode);
1071 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1072 pDest!=dest || dest[destLength0]!=0x1234 ||
1073 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1074 ) {
1075 log_err("u_strFromUTF8Lenient(%d conversion with NUL-termination, exact capacity) failed\n", number);
1076 }
1077
1078 /* preflighting with length */
1079 dest[0]=0x1234;
1080 destLength=-1;
1081 errorCode=U_ZERO_ERROR;
1082 pDest=u_strFromUTF8Lenient(NULL, 0, &destLength, pb, srcLength, &errorCode);
1083 if (errorCode!= (destLength0==0 ? U_STRING_NOT_TERMINATED_WARNING : U_BUFFER_OVERFLOW_ERROR) ||
1084 pDest!=NULL || dest[0]!=0x1234 || destLength!=srcLength
1085 ) {
1086 log_err("u_strFromUTF8Lenient(%d preflighting with length) failed\n", number);
1087 }
1088
1089 /* preflighting/some capacity with length */
1090 if(srcLength>0) {
1091 dest[srcLength-1]=0x1234;
1092 destLength=-1;
1093 errorCode=U_ZERO_ERROR;
1094 pDest=u_strFromUTF8Lenient(dest, srcLength-1, &destLength, pb, srcLength, &errorCode);
1095 if (errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1096 dest[srcLength-1]!=0x1234 || destLength!=srcLength
1097 ) {
1098 log_err("u_strFromUTF8Lenient(%d preflighting/some capacity with length) failed\n", number);
1099 }
1100 }
1101
1102 /* conversion with length, much capacity */
1103 dest[0]=dest[destLength0]=0x1234;
1104 destLength=-1;
1105 errorCode=U_ZERO_ERROR;
1106 pDest=u_strFromUTF8Lenient(dest, UPRV_LENGTHOF(dest), &destLength, pb, srcLength, &errorCode);
1107 if (errorCode!=U_ZERO_ERROR ||
1108 pDest!=dest || dest[destLength0]!=0 ||
1109 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1110 ) {
1111 log_err("u_strFromUTF8Lenient(%d conversion with length, much capacity) failed\n", number);
1112 }
1113
1114 /* conversion with length, srcLength capacity */
1115 dest[0]=dest[srcLength]=dest[destLength0]=0x1234;
1116 destLength=-1;
1117 errorCode=U_ZERO_ERROR;
1118 pDest=u_strFromUTF8Lenient(dest, srcLength, &destLength, pb, srcLength, &errorCode);
1119 if(srcLength==destLength0) {
1120 if (errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
1121 pDest!=dest || dest[destLength0]!=0x1234 ||
1122 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1123 ) {
1124 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/not terminated) failed\n", number);
1125 }
1126 } else {
1127 if (errorCode!=U_ZERO_ERROR ||
1128 pDest!=dest || dest[destLength0]!=0 ||
1129 destLength!=destLength0 || !equalAnyFFFD(dest, pu, destLength)
1130 ) {
1131 log_err("u_strFromUTF8Lenient(%d conversion with length, srcLength capacity/terminated) failed\n", number);
1132 }
1133 }
1134 }
1135 }
1136
1137 static const uint16_t src16j[] = {
1138 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
1139 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
1140 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1141 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
1142 0x0000,
1143 /* Test only ASCII */
1144
1145 };
1146 static const uint16_t src16WithNulls[] = {
1147 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0000,
1148 0x0048, 0x0049, 0x004A, 0x000D, 0x000A, 0x0000,
1149 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0000,
1150 0x0050, 0x0051, 0x0052, 0x000D, 0x000A, 0x0000,
1151 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1152 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1153 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0000,
1154 0x0058, 0x0059, 0x005A, 0x000D, 0x000A, 0x0000,
1155 /* test only ASCII */
1156 /*
1157 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD,
1158 0x00AE, 0x00AF, 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1159 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, 0x00C0, 0x00C1,
1160 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB,
1161 0x00CC, 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5,
1162 0x00D6, 0x00D7, 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1163 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9,
1164 0x0054, 0x0000 */
1165
1166 };
Test_UChar_WCHART_API(void)1167 static void Test_UChar_WCHART_API(void){
1168 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1169 UErrorCode err = U_ZERO_ERROR;
1170 const UChar* uSrc = src16j;
1171 int32_t uSrcLen = sizeof(src16j)/2;
1172 wchar_t* wDest = NULL;
1173 int32_t wDestLen = 0;
1174 int32_t reqLen= 0 ;
1175 UBool failed = false;
1176 UChar* uDest = NULL;
1177 int32_t uDestLen = 0;
1178 int i =0;
1179 {
1180 /* Bad UErrorCode arguments. Make sure that the API doesn't crash, and that Purify doesn't complain. */
1181 if (u_strFromWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1182 log_err("u_strFromWCS() should return NULL with a bad argument\n");
1183 }
1184 if (u_strToWCS(NULL,0,NULL,NULL,0,NULL) != NULL) {
1185 log_err("u_strToWCS() should return NULL with a bad argument\n");
1186 }
1187
1188 /* NULL source & destination. */
1189 err = U_ZERO_ERROR;
1190 u_strFromWCS(NULL,0,NULL,NULL,0,&err);
1191 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1192 log_err("u_strFromWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1193 }
1194 err = U_ZERO_ERROR;
1195 u_strToWCS(NULL,0,NULL,NULL,0,&err);
1196 if (err != U_STRING_NOT_TERMINATED_WARNING) {
1197 log_err("u_strToWCS(NULL, NULL) failed. Error: %s \n", u_errorName(err));
1198 }
1199 err = U_ZERO_ERROR;
1200
1201 /* pre-flight*/
1202 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1203
1204 if(err == U_BUFFER_OVERFLOW_ERROR){
1205 err=U_ZERO_ERROR;
1206 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1207 wDestLen = reqLen+1;
1208 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1209 }
1210
1211 /* pre-flight */
1212 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1213
1214
1215 if(err == U_BUFFER_OVERFLOW_ERROR){
1216 err =U_ZERO_ERROR;
1217 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1218 uDestLen = reqLen + 1;
1219 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1220 }else if(U_FAILURE(err)){
1221
1222 log_err("u_strFromWCS() failed. Error: %s \n", u_errorName(err));
1223 return;
1224 }
1225
1226 for(i=0; i< uSrcLen; i++){
1227 if(uDest[i] != src16j[i]){
1228 log_verbose("u_str*WCS() failed for unterminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1229 failed =true;
1230 }
1231 }
1232
1233 if(U_FAILURE(err)){
1234 failed = true;
1235 }
1236 if(failed){
1237 log_err("u_strToWCS() failed \n");
1238 }
1239 free(wDest);
1240 free(uDest);
1241
1242
1243 /* test with embedded nulls */
1244 uSrc = src16WithNulls;
1245 uSrcLen = sizeof(src16WithNulls)/2;
1246 wDestLen =0;
1247 uDestLen =0;
1248 wDest = NULL;
1249 uDest = NULL;
1250 /* pre-flight*/
1251 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1252
1253 if(err == U_BUFFER_OVERFLOW_ERROR){
1254 err=U_ZERO_ERROR;
1255 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1256 wDestLen = reqLen+1;
1257 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,uSrcLen-1,&err);
1258 }
1259
1260 /* pre-flight */
1261 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1262
1263 if(err == U_BUFFER_OVERFLOW_ERROR){
1264 err =U_ZERO_ERROR;
1265 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1266 uDestLen = reqLen + 1;
1267 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,reqLen,&err);
1268 }
1269
1270 if(!U_FAILURE(err)) {
1271 for(i=0; i< uSrcLen; i++){
1272 if(uDest[i] != src16WithNulls[i]){
1273 log_verbose("u_str*WCS() failed for string with nulls expected: \\u%04X got: \\u%04X at index: %i \n", src16WithNulls[i] ,uDest[i],i);
1274 failed =true;
1275 }
1276 }
1277 }
1278
1279 if(U_FAILURE(err)){
1280 failed = true;
1281 }
1282 if(failed){
1283 log_err("u_strToWCS() failed \n");
1284 }
1285 free(wDest);
1286 free(uDest);
1287
1288 }
1289
1290 {
1291
1292 uSrc = src16j;
1293 uSrcLen = sizeof(src16j)/2;
1294 wDestLen =0;
1295 uDestLen =0;
1296 wDest = NULL;
1297 uDest = NULL;
1298 wDestLen = 0;
1299 /* pre-flight*/
1300 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1301
1302 if(err == U_BUFFER_OVERFLOW_ERROR){
1303 err=U_ZERO_ERROR;
1304 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1305 wDestLen = reqLen+1;
1306 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&err);
1307 }
1308 uDestLen = 0;
1309 /* pre-flight */
1310 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1311
1312 if(err == U_BUFFER_OVERFLOW_ERROR){
1313 err =U_ZERO_ERROR;
1314 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1315 uDestLen = reqLen + 1;
1316 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&err);
1317 }
1318
1319
1320 if(!U_FAILURE(err)) {
1321 for(i=0; i< uSrcLen; i++){
1322 if(uDest[i] != src16j[i]){
1323 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", src16j[i] ,uDest[i],i);
1324 failed =true;
1325 }
1326 }
1327 }
1328
1329 if(U_FAILURE(err)){
1330 failed = true;
1331 }
1332 if(failed){
1333 log_err("u_strToWCS() failed \n");
1334 }
1335 free(wDest);
1336 free(uDest);
1337 }
1338
1339 /*
1340 * Test u_terminateWChars().
1341 * All u_terminateXYZ() use the same implementation macro;
1342 * we test this function to improve API coverage.
1343 */
1344 {
1345 wchar_t buffer[10];
1346
1347 err=U_ZERO_ERROR;
1348 buffer[3]=0x20ac;
1349 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1350 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1351 log_err("u_terminateWChars(buffer, all, 3, zero) failed: %s length %d [3]==U+%04x\n",
1352 u_errorName(err), wDestLen, buffer[3]);
1353 }
1354
1355 err=U_ZERO_ERROR;
1356 buffer[3]=0x20ac;
1357 wDestLen=u_terminateWChars(buffer, 3, 3, &err);
1358 if(err!=U_STRING_NOT_TERMINATED_WARNING || wDestLen!=3 || buffer[3]!=0x20ac) {
1359 log_err("u_terminateWChars(buffer, 3, 3, zero) failed: %s length %d [3]==U+%04x\n",
1360 u_errorName(err), wDestLen, buffer[3]);
1361 }
1362
1363 err=U_STRING_NOT_TERMINATED_WARNING;
1364 buffer[3]=0x20ac;
1365 wDestLen=u_terminateWChars(buffer, UPRV_LENGTHOF(buffer), 3, &err);
1366 if(err!=U_ZERO_ERROR || wDestLen!=3 || buffer[3]!=0) {
1367 log_err("u_terminateWChars(buffer, all, 3, not-terminated) failed: %s length %d [3]==U+%04x\n",
1368 u_errorName(err), wDestLen, buffer[3]);
1369 }
1370
1371 err=U_ZERO_ERROR;
1372 buffer[3]=0x20ac;
1373 wDestLen=u_terminateWChars(buffer, 2, 3, &err);
1374 if(err!=U_BUFFER_OVERFLOW_ERROR || wDestLen!=3 || buffer[3]!=0x20ac) {
1375 log_err("u_terminateWChars(buffer, 2, 3, zero) failed: %s length %d [3]==U+%04x\n",
1376 u_errorName(err), wDestLen, buffer[3]);
1377 }
1378 }
1379 #else
1380 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1381 #endif
1382 }
1383
Test_widestrs()1384 static void Test_widestrs()
1385 {
1386 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1387 wchar_t ws[100];
1388 UChar rts[100];
1389 int32_t wcap = UPRV_LENGTHOF(ws);
1390 int32_t wl;
1391 int32_t rtcap = UPRV_LENGTHOF(rts);
1392 int32_t rtl;
1393 wchar_t *wcs;
1394 UChar *cp;
1395 const char *errname;
1396 UChar ustr[] = {'h', 'e', 'l', 'l', 'o', 0};
1397 int32_t ul = UPRV_LENGTHOF(ustr) -1;
1398 char astr[100];
1399
1400 UErrorCode err;
1401
1402 err = U_ZERO_ERROR;
1403 wcs = u_strToWCS(ws, wcap, &wl, ustr, ul, &err);
1404 if (U_FAILURE(err)) {
1405 errname = u_errorName(err);
1406 log_err("test_widestrs: u_strToWCS error: %s!\n",errname);
1407 }
1408 if(ul!=wl){
1409 log_err("u_strToWCS: ustr = %s, ul = %d, ws = %S, wl = %d!\n", u_austrcpy(astr, ustr), ul, ws, wl);
1410 }
1411 err = U_ZERO_ERROR;
1412 wl = (int32_t)uprv_wcslen(wcs);
1413 cp = u_strFromWCS(rts, rtcap, &rtl, wcs, wl, &err);
1414 (void)cp; /* Suppress set but not used warning. */
1415 if (U_FAILURE(err)) {
1416 errname = u_errorName(err);
1417 fprintf(stderr, "test_widestrs: ucnv_wcstombs error: %s!\n",errname);
1418 }
1419 if(wl != rtl){
1420 log_err("u_strFromWCS: wcs = %S, wl = %d,rts = %s, rtl = %d!\n", wcs, wl, u_austrcpy(astr, rts), rtl);
1421 }
1422 #else
1423 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1424 #endif
1425 }
1426
1427 static void
Test_WCHART_LongString()1428 Test_WCHART_LongString(){
1429 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
1430 UErrorCode status = U_ZERO_ERROR;
1431 const char* testdatapath=loadTestData(&status);
1432 UResourceBundle *theBundle = ures_open(testdatapath, "testtypes", &status);
1433 int32_t strLen =0;
1434 const UChar* str = ures_getStringByKey(theBundle, "testinclude",&strLen,&status);
1435 const UChar* uSrc = str;
1436 int32_t uSrcLen = strLen;
1437 int32_t wDestLen =0, reqLen=0, i=0;
1438 int32_t uDestLen =0;
1439 wchar_t* wDest = NULL;
1440 UChar* uDest = NULL;
1441 UBool failed = false;
1442
1443 log_verbose("Loaded string of %d UChars\n", uSrcLen);
1444
1445 if(U_FAILURE(status)){
1446 log_data_err("Could not get testinclude resource from testtypes bundle. Error: %s\n",u_errorName(status));
1447 return;
1448 }
1449
1450 /* pre-flight*/
1451 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1452
1453 if(status == U_BUFFER_OVERFLOW_ERROR){
1454 status=U_ZERO_ERROR;
1455 wDest =(wchar_t*) malloc(sizeof(wchar_t) * (reqLen+1));
1456 wDestLen = reqLen+1;
1457 u_strToWCS(wDest,wDestLen,&reqLen,uSrc,-1,&status);
1458 log_verbose("To %d*%d-byte wchar_ts\n", reqLen,sizeof(wchar_t));
1459 }
1460
1461 {
1462 int j;
1463 for(j=0;j>=0&&j<reqLen;j++) {
1464 if(wDest[j]!=uSrc[j]) {
1465 log_verbose("Diff %04X vs %04X @ %d\n", wDest[j],uSrc[j],j);
1466 break;
1467 }
1468 }
1469 }
1470
1471 uDestLen = 0;
1472 /* pre-flight */
1473 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1474 if(status == U_BUFFER_OVERFLOW_ERROR){
1475 status =U_ZERO_ERROR;
1476 uDest = (UChar*) malloc(sizeof(UChar) * (reqLen+1));
1477 u_memset(uDest,0xFFFF,reqLen+1);
1478 uDestLen = reqLen + 1;
1479 u_strFromWCS(uDest, uDestLen,&reqLen,wDest,-1,&status);
1480 log_verbose("Back to %d UChars\n", reqLen);
1481 }
1482 #if defined(U_WCHAR_IS_UTF16)
1483 log_verbose("U_WCHAR_IS_UTF16\n");
1484 #elif defined(U_WCHAR_IS_UTF32)
1485 log_verbose("U_WCHAR_IS_UTF32\n");
1486 #else
1487 log_verbose("U_WCHAR_IS_idunno (not UTF)\n");
1488 #endif
1489
1490 if(reqLen!=uSrcLen) {
1491 log_err("Error: dest len is %d but expected src len %d\n", reqLen, uSrcLen);
1492 }
1493
1494 for(i=0; i< uSrcLen; i++){
1495 if(uDest[i] != str[i]){
1496 log_verbose("u_str*WCS() failed for null terminated string expected: \\u%04X got: \\u%04X at index: %i \n", str[i], uDest[i],i);
1497 failed =true;
1498 }
1499 }
1500
1501 if(U_FAILURE(status)){
1502 failed = true;
1503 }
1504 if(failed){
1505 log_err("u_strToWCS() failed \n");
1506 }
1507 free(wDest);
1508 free(uDest);
1509 /* close the bundle */
1510 ures_close(theBundle);
1511 #else
1512 log_info("Not testing u_str*WCS because (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION) and wchar is neither utf16 nor utf32");
1513 #endif
1514 }
1515
Test_strToJavaModifiedUTF8()1516 static void Test_strToJavaModifiedUTF8() {
1517 static const UChar src[]={
1518 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1519 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1520 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1521 0xdbff, 0xdfff,
1522 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xed, 0xe0e, 0x6f
1523 };
1524 static const uint8_t expected[]={
1525 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1526 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1527 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1528 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0xc0, 0x80,
1529 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1530 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xc3, 0xad, 0xe0, 0xb8, 0x8e, 0x6f
1531 };
1532 static const UChar shortSrc[]={
1533 0xe01, 0xe1, 0x61
1534 };
1535 static const uint8_t shortExpected[]={
1536 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1537 };
1538 static const UChar asciiNul[]={
1539 0x61, 0x62, 0x63, 0
1540 };
1541 static const uint8_t asciiNulExpected[]={
1542 0x61, 0x62, 0x63
1543 };
1544 char dest[200];
1545 char *p;
1546 int32_t length, expectedTerminatedLength;
1547 UErrorCode errorCode;
1548
1549 expectedTerminatedLength=(int32_t)(strstr((const char *)expected, "\xc0\x80")-
1550 (const char *)expected);
1551
1552 errorCode=U_ZERO_ERROR;
1553 length=-5;
1554 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1555 src, UPRV_LENGTHOF(src), &errorCode);
1556 if( U_FAILURE(errorCode) || p!=dest ||
1557 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1558 dest[length]!=0
1559 ) {
1560 log_err("u_strToJavaModifiedUTF8(normal) failed - %s\n", u_errorName(errorCode));
1561 }
1562 memset(dest, 0xff, sizeof(dest));
1563 errorCode=U_ZERO_ERROR;
1564 length=-5;
1565 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL,
1566 src, UPRV_LENGTHOF(src), &errorCode);
1567 if( U_FAILURE(errorCode) || p!=dest ||
1568 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1569 dest[UPRV_LENGTHOF(expected)]!=0
1570 ) {
1571 log_err("u_strToJavaModifiedUTF8(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1572 }
1573 memset(dest, 0xff, sizeof(dest));
1574 errorCode=U_ZERO_ERROR;
1575 length=-5;
1576 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected), &length,
1577 src, UPRV_LENGTHOF(src), &errorCode);
1578 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1579 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1580 dest[length]!=(char)0xff
1581 ) {
1582 log_err("u_strToJavaModifiedUTF8(tight) failed - %s\n", u_errorName(errorCode));
1583 }
1584 memset(dest, 0xff, sizeof(dest));
1585 errorCode=U_ZERO_ERROR;
1586 length=-5;
1587 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length, src, -1, &errorCode);
1588 if( U_FAILURE(errorCode) || p!=dest ||
1589 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1590 dest[length]!=0
1591 ) {
1592 log_err("u_strToJavaModifiedUTF8(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1593 }
1594 memset(dest, 0xff, sizeof(dest));
1595 errorCode=U_ZERO_ERROR;
1596 length=-5;
1597 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), NULL, src, -1, &errorCode);
1598 if( U_FAILURE(errorCode) || p!=dest ||
1599 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1600 dest[expectedTerminatedLength]!=0
1601 ) {
1602 log_err("u_strToJavaModifiedUTF8(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1603 }
1604 memset(dest, 0xff, sizeof(dest));
1605 errorCode=U_ZERO_ERROR;
1606 length=-5;
1607 p=u_strToJavaModifiedUTF8(dest, UPRV_LENGTHOF(expected)/2, &length,
1608 src, UPRV_LENGTHOF(src), &errorCode);
1609 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1610 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=(char)0xff
1611 ) {
1612 log_err("u_strToJavaModifiedUTF8(overflow) failed - %s\n", u_errorName(errorCode));
1613 }
1614 memset(dest, 0xff, sizeof(dest));
1615 errorCode=U_ZERO_ERROR;
1616 length=-5;
1617 p=u_strToJavaModifiedUTF8(NULL, 0, &length,
1618 src, UPRV_LENGTHOF(src), &errorCode);
1619 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1620 length!=UPRV_LENGTHOF(expected) || dest[0]!=(char)0xff
1621 ) {
1622 log_err("u_strToJavaModifiedUTF8(pure preflighting) failed - %s\n", u_errorName(errorCode));
1623 }
1624 memset(dest, 0xff, sizeof(dest));
1625 errorCode=U_ZERO_ERROR;
1626 length=-5;
1627 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1628 shortSrc, UPRV_LENGTHOF(shortSrc), &errorCode);
1629 if( U_FAILURE(errorCode) || p!=dest ||
1630 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1631 dest[length]!=0
1632 ) {
1633 log_err("u_strToJavaModifiedUTF8(short) failed - %s\n", u_errorName(errorCode));
1634 }
1635 memset(dest, 0xff, sizeof(dest));
1636 errorCode=U_ZERO_ERROR;
1637 length=-5;
1638 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1639 asciiNul, -1, &errorCode);
1640 if( U_FAILURE(errorCode) || p!=dest ||
1641 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1642 dest[length]!=0
1643 ) {
1644 log_err("u_strToJavaModifiedUTF8(asciiNul) failed - %s\n", u_errorName(errorCode));
1645 }
1646 memset(dest, 0xff, sizeof(dest));
1647 errorCode=U_ZERO_ERROR;
1648 length=-5;
1649 p=u_strToJavaModifiedUTF8(dest, (int32_t)sizeof(dest), &length,
1650 NULL, 0, &errorCode);
1651 if( U_FAILURE(errorCode) || p!=dest ||
1652 length!=0 || dest[0]!=0
1653 ) {
1654 log_err("u_strToJavaModifiedUTF8(empty) failed - %s\n", u_errorName(errorCode));
1655 }
1656
1657 /* illegal arguments */
1658 memset(dest, 0xff, sizeof(dest));
1659 errorCode=U_ZERO_ERROR;
1660 length=-5;
1661 p=u_strToJavaModifiedUTF8(NULL, sizeof(dest), &length,
1662 src, UPRV_LENGTHOF(src), &errorCode);
1663 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1664 log_err("u_strToJavaModifiedUTF8(dest=NULL) failed - %s\n", u_errorName(errorCode));
1665 }
1666 memset(dest, 0xff, sizeof(dest));
1667 errorCode=U_ZERO_ERROR;
1668 length=-5;
1669 p=u_strToJavaModifiedUTF8(dest, -1, &length,
1670 src, UPRV_LENGTHOF(src), &errorCode);
1671 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1672 log_err("u_strToJavaModifiedUTF8(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1673 }
1674 memset(dest, 0xff, sizeof(dest));
1675 errorCode=U_ZERO_ERROR;
1676 length=-5;
1677 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1678 NULL, UPRV_LENGTHOF(src), &errorCode);
1679 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1680 log_err("u_strToJavaModifiedUTF8(src=NULL) failed - %s\n", u_errorName(errorCode));
1681 }
1682 memset(dest, 0xff, sizeof(dest));
1683 errorCode=U_ZERO_ERROR;
1684 length=-5;
1685 p=u_strToJavaModifiedUTF8(dest, sizeof(dest), &length,
1686 NULL, -1, &errorCode);
1687 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=(char)0xff) {
1688 log_err("u_strToJavaModifiedUTF8(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1689 }
1690 }
1691
Test_strFromJavaModifiedUTF8()1692 static void Test_strFromJavaModifiedUTF8() {
1693 static const uint8_t src[]={
1694 0x61, 0x62, 0x63, 0xc3, 0xa1, 0xc3, 0xa2, 0xc3, 0xa3,
1695 0xe0, 0xb8, 0x81, 0xe0, 0xb8, 0x82, 0xe0, 0xb8, 0x83,
1696 0xee, 0x80, 0x81, 0xee, 0x80, 0x82, 0xee, 0x80, 0x83,
1697 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xb0, 0x80, 0xed, 0xa0, 0x80, 0,
1698 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1699 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80, /* invalid sequences */
1700 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1701 0xe0, 0x81, 0xac, 0xe0, 0x83, 0xad, /* non-shortest forms are allowed */
1702 0xe0, 0xb8, 0x8e, 0x6f
1703 };
1704 static const UChar expected[]={
1705 0x61, 0x62, 0x63, 0xe1, 0xe2, 0xe3,
1706 0xe01, 0xe02, 0xe03, 0xe001, 0xe002, 0xe003,
1707 0xd800, 0xdc00, 0xdc00, 0xd800, 0,
1708 0xdbff, 0xdfff,
1709 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1710 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b,
1711 0x6c, 0xed,
1712 0xe0e, 0x6f
1713 };
1714 static const uint8_t shortSrc[]={
1715 0xe0, 0xb8, 0x81, 0xc3, 0xa1, 0x61
1716 };
1717 static const UChar shortExpected[]={
1718 0xe01, 0xe1, 0x61
1719 };
1720 static const uint8_t asciiNul[]={
1721 0x61, 0x62, 0x63, 0
1722 };
1723 static const UChar asciiNulExpected[]={
1724 0x61, 0x62, 0x63
1725 };
1726 static const uint8_t invalid[]={
1727 0x81, 0xc0, 0xe0, 0xb8, 0xf0, 0x90, 0x80, 0x80
1728 };
1729 static const UChar invalidExpectedFFFD[]={
1730 0xfffd, 0xfffd, 0xfffd, 0xfffd
1731 };
1732 static const UChar invalidExpected50000[]={
1733 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00, 0xd900, 0xdc00
1734 };
1735 UChar dest[200];
1736 UChar *p;
1737 int32_t length, expectedTerminatedLength;
1738 int32_t numSubstitutions;
1739 UErrorCode errorCode;
1740
1741 expectedTerminatedLength=(int32_t)(u_strchr(expected, 0)-expected);
1742
1743 errorCode=U_ZERO_ERROR;
1744 length=numSubstitutions=-5;
1745 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1746 (const char *)src, UPRV_LENGTHOF(src),
1747 0xfffd, &numSubstitutions, &errorCode);
1748 if( U_FAILURE(errorCode) || p!=dest ||
1749 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1750 dest[length]!=0 ||
1751 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1752 ) {
1753 log_err("u_strFromJavaModifiedUTF8WithSub(normal) failed - %s\n", u_errorName(errorCode));
1754 }
1755 memset(dest, 0xff, sizeof(dest));
1756 errorCode=U_ZERO_ERROR;
1757 length=numSubstitutions=-5;
1758 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1759 (const char *)src, UPRV_LENGTHOF(src),
1760 0xfffd, &numSubstitutions, &errorCode);
1761 if( U_FAILURE(errorCode) || p!=dest ||
1762 0!=memcmp(dest, expected, UPRV_LENGTHOF(expected)) ||
1763 dest[UPRV_LENGTHOF(expected)]!=0 ||
1764 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1765 ) {
1766 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1767 }
1768 memset(dest, 0xff, sizeof(dest));
1769 errorCode=U_ZERO_ERROR;
1770 length=numSubstitutions=-5;
1771 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1772 (const char *)src, UPRV_LENGTHOF(src),
1773 0xfffd, NULL, &errorCode);
1774 if( U_FAILURE(errorCode) || p!=dest ||
1775 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1776 dest[length]!=0
1777 ) {
1778 log_err("u_strFromJavaModifiedUTF8WithSub(normal, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1779 }
1780 memset(dest, 0xff, sizeof(dest));
1781 errorCode=U_ZERO_ERROR;
1782 length=numSubstitutions=-5;
1783 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected), &length,
1784 (const char *)src, UPRV_LENGTHOF(src),
1785 0xfffd, &numSubstitutions, &errorCode);
1786 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || p!=dest ||
1787 length!=UPRV_LENGTHOF(expected) || 0!=memcmp(dest, expected, length) ||
1788 dest[length]!=0xffff ||
1789 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1790 ) {
1791 log_err("u_strFromJavaModifiedUTF8WithSub(tight) failed - %s\n", u_errorName(errorCode));
1792 }
1793 memset(dest, 0xff, sizeof(dest));
1794 errorCode=U_ZERO_ERROR;
1795 length=numSubstitutions=-5;
1796 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1797 (const char *)src, -1,
1798 0xfffd, &numSubstitutions, &errorCode);
1799 if( U_FAILURE(errorCode) || p!=dest ||
1800 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1801 dest[length]!=0 ||
1802 numSubstitutions!=0
1803 ) {
1804 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated) failed - %s\n", u_errorName(errorCode));
1805 }
1806 memset(dest, 0xff, sizeof(dest));
1807 errorCode=U_ZERO_ERROR;
1808 length=numSubstitutions=-5;
1809 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), NULL,
1810 (const char *)src, -1,
1811 0xfffd, &numSubstitutions, &errorCode);
1812 if( U_FAILURE(errorCode) || p!=dest ||
1813 0!=memcmp(dest, expected, expectedTerminatedLength) ||
1814 dest[expectedTerminatedLength]!=0 ||
1815 numSubstitutions!=0
1816 ) {
1817 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pLength=NULL) failed - %s\n", u_errorName(errorCode));
1818 }
1819 memset(dest, 0xff, sizeof(dest));
1820 errorCode=U_ZERO_ERROR;
1821 length=numSubstitutions=-5;
1822 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1823 (const char *)src, -1,
1824 0xfffd, NULL, &errorCode);
1825 if( U_FAILURE(errorCode) || p!=dest ||
1826 length!=expectedTerminatedLength || 0!=memcmp(dest, expected, length) ||
1827 dest[length]!=0
1828 ) {
1829 log_err("u_strFromJavaModifiedUTF8WithSub(NUL-terminated, pNumSubstitutions=NULL) failed - %s\n", u_errorName(errorCode));
1830 }
1831 memset(dest, 0xff, sizeof(dest));
1832 errorCode=U_ZERO_ERROR;
1833 length=numSubstitutions=-5;
1834 p=u_strFromJavaModifiedUTF8WithSub(dest, UPRV_LENGTHOF(expected)/2, &length,
1835 (const char *)src, UPRV_LENGTHOF(src),
1836 0xfffd, &numSubstitutions, &errorCode);
1837 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1838 length!=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)/2]!=0xffff
1839 ) {
1840 log_err("u_strFromJavaModifiedUTF8WithSub(overflow) failed - %s\n", u_errorName(errorCode));
1841 }
1842 memset(dest, 0xff, sizeof(dest));
1843 errorCode=U_ZERO_ERROR;
1844 length=numSubstitutions=-5;
1845 p=u_strFromJavaModifiedUTF8WithSub(NULL, 0, &length,
1846 (const char *)src, UPRV_LENGTHOF(src),
1847 0xfffd, &numSubstitutions, &errorCode);
1848 if( errorCode!=U_BUFFER_OVERFLOW_ERROR ||
1849 length!=UPRV_LENGTHOF(expected) || dest[0]!=0xffff
1850 ) {
1851 log_err("u_strFromJavaModifiedUTF8WithSub(pure preflighting) failed - %s\n", u_errorName(errorCode));
1852 }
1853 memset(dest, 0xff, sizeof(dest));
1854 errorCode=U_ZERO_ERROR;
1855 length=numSubstitutions=-5;
1856 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1857 (const char *)shortSrc, UPRV_LENGTHOF(shortSrc),
1858 0xfffd, &numSubstitutions, &errorCode);
1859 if( U_FAILURE(errorCode) || p!=dest ||
1860 length!=UPRV_LENGTHOF(shortExpected) || 0!=memcmp(dest, shortExpected, length) ||
1861 dest[length]!=0 ||
1862 numSubstitutions!=0
1863 ) {
1864 log_err("u_strFromJavaModifiedUTF8WithSub(short) failed - %s\n", u_errorName(errorCode));
1865 }
1866 memset(dest, 0xff, sizeof(dest));
1867 errorCode=U_ZERO_ERROR;
1868 length=numSubstitutions=-5;
1869 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1870 (const char *)asciiNul, -1,
1871 0xfffd, &numSubstitutions, &errorCode);
1872 if( U_FAILURE(errorCode) || p!=dest ||
1873 length!=UPRV_LENGTHOF(asciiNulExpected) || 0!=memcmp(dest, asciiNulExpected, length) ||
1874 dest[length]!=0 ||
1875 numSubstitutions!=0
1876 ) {
1877 log_err("u_strFromJavaModifiedUTF8WithSub(asciiNul) failed - %s\n", u_errorName(errorCode));
1878 }
1879 memset(dest, 0xff, sizeof(dest));
1880 errorCode=U_ZERO_ERROR;
1881 length=numSubstitutions=-5;
1882 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1883 NULL, 0, 0xfffd, &numSubstitutions, &errorCode);
1884 if( U_FAILURE(errorCode) || p!=dest ||
1885 length!=0 || dest[0]!=0 ||
1886 numSubstitutions!=0
1887 ) {
1888 log_err("u_strFromJavaModifiedUTF8WithSub(empty) failed - %s\n", u_errorName(errorCode));
1889 }
1890 memset(dest, 0xff, sizeof(dest));
1891 errorCode=U_ZERO_ERROR;
1892 length=numSubstitutions=-5;
1893 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1894 (const char *)invalid, UPRV_LENGTHOF(invalid),
1895 0xfffd, &numSubstitutions, &errorCode);
1896 if( U_FAILURE(errorCode) || p!=dest ||
1897 length!=UPRV_LENGTHOF(invalidExpectedFFFD) || 0!=memcmp(dest, invalidExpectedFFFD, length) ||
1898 dest[length]!=0 ||
1899 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD)
1900 ) {
1901 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->fffd) failed - %s\n", u_errorName(errorCode));
1902 }
1903 memset(dest, 0xff, sizeof(dest));
1904 errorCode=U_ZERO_ERROR;
1905 length=numSubstitutions=-5;
1906 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1907 (const char *)invalid, UPRV_LENGTHOF(invalid),
1908 0x50000, &numSubstitutions, &errorCode);
1909 if( U_FAILURE(errorCode) || p!=dest ||
1910 length!=UPRV_LENGTHOF(invalidExpected50000) || 0!=memcmp(dest, invalidExpected50000, length) ||
1911 dest[length]!=0 ||
1912 numSubstitutions!=UPRV_LENGTHOF(invalidExpectedFFFD) /* not ...50000 */
1913 ) {
1914 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->50000) failed - %s\n", u_errorName(errorCode));
1915 }
1916 memset(dest, 0xff, sizeof(dest));
1917 errorCode=U_ZERO_ERROR;
1918 length=numSubstitutions=-5;
1919 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1920 (const char *)invalid, UPRV_LENGTHOF(invalid),
1921 U_SENTINEL, &numSubstitutions, &errorCode);
1922 if(errorCode!=U_INVALID_CHAR_FOUND || dest[0]!=0xffff || numSubstitutions!=0) {
1923 log_err("u_strFromJavaModifiedUTF8WithSub(invalid->error) failed - %s\n", u_errorName(errorCode));
1924 }
1925 memset(dest, 0xff, sizeof(dest));
1926 errorCode=U_ZERO_ERROR;
1927 length=numSubstitutions=-5;
1928 p=u_strFromJavaModifiedUTF8WithSub(dest, (int32_t)sizeof(dest), &length,
1929 (const char *)src, UPRV_LENGTHOF(src),
1930 U_SENTINEL, &numSubstitutions, &errorCode);
1931 if( errorCode!=U_INVALID_CHAR_FOUND ||
1932 length>=UPRV_LENGTHOF(expected) || dest[UPRV_LENGTHOF(expected)-1]!=0xffff ||
1933 numSubstitutions!=0
1934 ) {
1935 log_err("u_strFromJavaModifiedUTF8WithSub(normal->error) failed - %s\n", u_errorName(errorCode));
1936 }
1937
1938 /* illegal arguments */
1939 memset(dest, 0xff, sizeof(dest));
1940 errorCode=U_ZERO_ERROR;
1941 length=numSubstitutions=-5;
1942 p=u_strFromJavaModifiedUTF8WithSub(NULL, sizeof(dest), &length,
1943 (const char *)src, UPRV_LENGTHOF(src),
1944 0xfffd, &numSubstitutions, &errorCode);
1945 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1946 log_err("u_strFromJavaModifiedUTF8WithSub(dest=NULL) failed - %s\n", u_errorName(errorCode));
1947 }
1948 memset(dest, 0xff, sizeof(dest));
1949 errorCode=U_ZERO_ERROR;
1950 length=numSubstitutions=-5;
1951 p=u_strFromJavaModifiedUTF8WithSub(dest, -1, &length,
1952 (const char *)src, UPRV_LENGTHOF(src),
1953 0xfffd, &numSubstitutions, &errorCode);
1954 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1955 log_err("u_strFromJavaModifiedUTF8WithSub(destCapacity<0) failed - %s\n", u_errorName(errorCode));
1956 }
1957 memset(dest, 0xff, sizeof(dest));
1958 errorCode=U_ZERO_ERROR;
1959 length=numSubstitutions=-5;
1960 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1961 NULL, UPRV_LENGTHOF(src),
1962 0xfffd, &numSubstitutions, &errorCode);
1963 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1964 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL) failed - %s\n", u_errorName(errorCode));
1965 }
1966 memset(dest, 0xff, sizeof(dest));
1967 errorCode=U_ZERO_ERROR;
1968 length=numSubstitutions=-5;
1969 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1970 NULL, -1, 0xfffd, &numSubstitutions, &errorCode);
1971 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1972 log_err("u_strFromJavaModifiedUTF8WithSub(src=NULL, srcLength<0) failed - %s\n", u_errorName(errorCode));
1973 }
1974 memset(dest, 0xff, sizeof(dest));
1975 errorCode=U_ZERO_ERROR;
1976 length=numSubstitutions=-5;
1977 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1978 (const char *)src, UPRV_LENGTHOF(src),
1979 0x110000, &numSubstitutions, &errorCode);
1980 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1981 log_err("u_strFromJavaModifiedUTF8WithSub(subchar=U_SENTINEL) failed - %s\n", u_errorName(errorCode));
1982 }
1983 memset(dest, 0xff, sizeof(dest));
1984 errorCode=U_ZERO_ERROR;
1985 length=numSubstitutions=-5;
1986 p=u_strFromJavaModifiedUTF8WithSub(dest, sizeof(dest), &length,
1987 (const char *)src, UPRV_LENGTHOF(src),
1988 0xdfff, &numSubstitutions, &errorCode);
1989 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR || dest[0]!=0xffff) {
1990 log_err("u_strFromJavaModifiedUTF8WithSub(subchar is surrogate) failed - %s\n", u_errorName(errorCode));
1991 }
1992 }
1993
1994 /* test that string transformation functions permit NULL source pointer when source length==0 */
TestNullEmptySource()1995 static void TestNullEmptySource() {
1996 char dest8[4]={ 3, 3, 3, 3 };
1997 UChar dest16[4]={ 3, 3, 3, 3 };
1998 UChar32 dest32[4]={ 3, 3, 3, 3 };
1999 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2000 wchar_t destW[4]={ 3, 3, 3, 3 };
2001 #endif
2002
2003 int32_t length;
2004 UErrorCode errorCode;
2005
2006 /* u_strFromXyz() */
2007
2008 dest16[0]=3;
2009 length=3;
2010 errorCode=U_ZERO_ERROR;
2011 u_strFromUTF8(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2012 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2013 log_err("u_strFromUTF8(source=NULL, sourceLength=0) failed\n");
2014 }
2015
2016 dest16[0]=3;
2017 length=3;
2018 errorCode=U_ZERO_ERROR;
2019 u_strFromUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2020 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2021 log_err("u_strFromUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2022 }
2023
2024 dest16[0]=3;
2025 length=3;
2026 errorCode=U_ZERO_ERROR;
2027 u_strFromUTF8Lenient(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2028 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2029 log_err("u_strFromUTF8Lenient(source=NULL, sourceLength=0) failed\n");
2030 }
2031
2032 dest16[0]=3;
2033 length=3;
2034 errorCode=U_ZERO_ERROR;
2035 u_strFromUTF32(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2036 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2037 log_err("u_strFromUTF32(source=NULL, sourceLength=0) failed\n");
2038 }
2039
2040 dest16[0]=3;
2041 length=3;
2042 errorCode=U_ZERO_ERROR;
2043 u_strFromUTF32WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2044 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2045 log_err("u_strFromUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2046 }
2047
2048 dest16[0]=3;
2049 length=3;
2050 errorCode=U_ZERO_ERROR;
2051 u_strFromJavaModifiedUTF8WithSub(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2052 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2053 log_err("u_strFromJavaModifiedUTF8WithSub(source=NULL, sourceLength=0) failed\n");
2054 }
2055
2056 /* u_strToXyz() */
2057
2058 dest8[0]=3;
2059 length=3;
2060 errorCode=U_ZERO_ERROR;
2061 u_strToUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2062 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2063 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2064 }
2065
2066 dest8[0]=3;
2067 length=3;
2068 errorCode=U_ZERO_ERROR;
2069 u_strToUTF8WithSub(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2070 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2071 log_err("u_strToUTF8(source=NULL, sourceLength=0) failed\n");
2072 }
2073
2074 dest32[0]=3;
2075 length=3;
2076 errorCode=U_ZERO_ERROR;
2077 u_strToUTF32(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, &errorCode);
2078 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2079 log_err("u_strToUTF32(source=NULL, sourceLength=0) failed\n");
2080 }
2081
2082 dest32[0]=3;
2083 length=3;
2084 errorCode=U_ZERO_ERROR;
2085 u_strToUTF32WithSub(dest32, UPRV_LENGTHOF(dest32), &length, NULL, 0, 0xfffd, NULL, &errorCode);
2086 if(errorCode!=U_ZERO_ERROR || length!=0 || dest32[0]!=0 || dest32[1]!=3) {
2087 log_err("u_strToUTF32WithSub(source=NULL, sourceLength=0) failed\n");
2088 }
2089
2090 dest8[0]=3;
2091 length=3;
2092 errorCode=U_ZERO_ERROR;
2093 u_strToJavaModifiedUTF8(dest8, UPRV_LENGTHOF(dest8), &length, NULL, 0, &errorCode);
2094 if(errorCode!=U_ZERO_ERROR || length!=0 || dest8[0]!=0 || dest8[1]!=3) {
2095 log_err("u_strToJavaModifiedUTF8(source=NULL, sourceLength=0) failed\n");
2096 }
2097
2098 #if (defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32)) || (!UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION)
2099
2100 dest16[0]=3;
2101 length=3;
2102 errorCode=U_ZERO_ERROR;
2103 u_strFromWCS(dest16, UPRV_LENGTHOF(dest16), &length, NULL, 0, &errorCode);
2104 if(errorCode!=U_ZERO_ERROR || length!=0 || dest16[0]!=0 || dest16[1]!=3) {
2105 log_err("u_strFromWCS(source=NULL, sourceLength=0) failed\n");
2106 }
2107
2108 destW[0]=3;
2109 length=3;
2110 errorCode=U_ZERO_ERROR;
2111 u_strToWCS(destW, UPRV_LENGTHOF(destW), &length, NULL, 0, &errorCode);
2112 if(errorCode!=U_ZERO_ERROR || length!=0 || destW[0]!=0 || destW[1]!=3) {
2113 log_err("u_strToWCS(source=NULL, sourceLength=0) failed\n");
2114 }
2115
2116 #endif
2117 }
2118