1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: custrtst.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002oct09
16 * created by: Markus W. Scherer
17 *
18 * Tests of ustring.h Unicode string API functions.
19 */
20
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
24 #include "cintltst.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include <string.h>
28
29 /* get the sign of an integer */
30 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
31
32 /* test setup --------------------------------------------------------------- */
33
34 static void setUpDataTable(void);
35 static void TestStringCopy(void);
36 static void TestStringFunctions(void);
37 static void TestStringSearching(void);
38 static void TestSurrogateSearching(void);
39 static void TestUnescape(void);
40 static void TestUnescapeRepeatedSurrogateLead20725(void);
41 static void TestCountChar32(void);
42 static void TestUCharIterator(void);
43
44 void addUStringTest(TestNode** root);
45
addUStringTest(TestNode ** root)46 void addUStringTest(TestNode** root)
47 {
48 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
49 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
50 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
51 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
52 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
53 addTest(root, &TestUnescapeRepeatedSurrogateLead20725,
54 "tsutil/custrtst/TestUnescapeRepeatedSurrogateLead20725");
55 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
56 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
57 }
58
59 /* test data for TestStringFunctions ---------------------------------------- */
60
61 UChar*** dataTable = NULL;
62
63 static const char* raw[3][4] = {
64
65 /* First String */
66 { "English_", "French_", "Croatian_", "English_"},
67 /* Second String */
68 { "United States", "France", "Croatia", "Unites States"},
69
70 /* Concatenated string */
71 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
72 };
73
setUpDataTable()74 static void setUpDataTable()
75 {
76 int32_t i,j;
77 if(dataTable == NULL) {
78 dataTable = (UChar***)calloc(sizeof(UChar**),3);
79
80 for (i = 0; i < 3; i++) {
81 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
82 for (j = 0; j < 4; j++){
83 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
84 u_uastrcpy(dataTable[i][j],raw[i][j]);
85 }
86 }
87 }
88 }
89
cleanUpDataTable()90 static void cleanUpDataTable()
91 {
92 int32_t i,j;
93 if(dataTable != NULL) {
94 for (i=0; i<3; i++) {
95 for(j = 0; j<4; j++) {
96 free(dataTable[i][j]);
97 }
98 free(dataTable[i]);
99 }
100 free(dataTable);
101 }
102 dataTable = NULL;
103 }
104
105 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()106 static void TestStringFunctions()
107 {
108 int32_t i,j,k;
109 UChar temp[512];
110 UChar nullTemp[512];
111 char test[512];
112 char tempOut[512];
113
114 setUpDataTable();
115
116 log_verbose("Testing u_strlen()\n");
117 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
118 log_err("There is an error in u_strlen()");
119
120 log_verbose("Testing u_memcpy() and u_memcmp()\n");
121
122 for(i=0;i<3;++i)
123 {
124 for(j=0;j<4;++j)
125 {
126 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
127 temp[0] = 0;
128 temp[7] = 0xA4; /* Mark the end */
129 u_memcpy(temp,dataTable[i][j], 7);
130
131 if(temp[7] != 0xA4)
132 log_err("an error occured in u_memcpy()\n");
133 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
134 log_err("an error occured in u_memcpy() or u_memcmp()\n");
135 }
136 }
137 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
138 log_err("an error occured in u_memcmp()\n");
139
140 log_verbose("Testing u_memset()\n");
141 nullTemp[0] = 0;
142 nullTemp[7] = 0;
143 u_memset(nullTemp, 0xa4, 7);
144 for (i = 0; i < 7; i++) {
145 if(nullTemp[i] != 0xa4) {
146 log_err("an error occured in u_memset()\n");
147 }
148 }
149 if(nullTemp[7] != 0) {
150 log_err("u_memset() went too far\n");
151 }
152
153 u_memset(nullTemp, 0, 7);
154 nullTemp[7] = 0xa4;
155 temp[7] = 0;
156 u_memcpy(temp,nullTemp, 7);
157 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
158 log_err("an error occured in u_memcpy() or u_memcmp()\n");
159
160
161 log_verbose("Testing u_memmove()\n");
162 for (i = 0; i < 7; i++) {
163 temp[i] = (UChar)i;
164 }
165 u_memmove(temp + 1, temp, 7);
166 if(temp[0] != 0) {
167 log_err("an error occured in u_memmove()\n");
168 }
169 for (i = 1; i <= 7; i++) {
170 if(temp[i] != (i - 1)) {
171 log_err("an error occured in u_memmove()\n");
172 }
173 }
174
175 log_verbose("Testing u_strcpy() and u_strcmp()\n");
176
177 for(i=0;i<3;++i)
178 {
179 for(j=0;j<4;++j)
180 {
181 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
182 temp[0] = 0;
183 u_strcpy(temp,dataTable[i][j]);
184
185 if(u_strcmp(temp,dataTable[i][j])!=0)
186 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
187 }
188 }
189 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
190 log_err("an error occured in u_memcmp()\n");
191
192 log_verbose("testing u_strcat()\n");
193 i=0;
194 for(j=0; j<2;++j)
195 {
196 u_uastrcpy(temp, "");
197 u_strcpy(temp,dataTable[i][j]);
198 u_strcat(temp,dataTable[i+1][j]);
199 if(u_strcmp(temp,dataTable[i+2][j])!=0)
200 log_err("something threw an error in u_strcat()\n");
201
202 }
203 log_verbose("Testing u_strncmp()\n");
204 for(i=0,j=0;j<4; ++j)
205 {
206 k=u_strlen(dataTable[i][j]);
207 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
208 log_err("Something threw an error in u_strncmp\n");
209 }
210 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
211 log_err("an error occured in u_memcmp()\n");
212
213
214 log_verbose("Testing u_strncat\n");
215 for(i=0,j=0;j<4; ++j)
216 {
217 k=u_strlen(dataTable[i][j]);
218
219 u_uastrcpy(temp,"");
220
221 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
222 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
223
224 }
225
226 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
227 for(i=2,j=0;j<4; ++j)
228 {
229 k=u_strlen(dataTable[i][j]);
230 u_strncpy(temp, dataTable[i][j],k);
231 temp[k] = 0xa4;
232
233 if(u_strncmp(temp, dataTable[i][j],k)!=0)
234 log_err("something threw an error in u_strncpy()\n");
235
236 if(temp[k] != 0xa4)
237 log_err("something threw an error in u_strncpy()\n");
238
239 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
240 u_uastrncpy(temp, raw[i][j], k-1);
241 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
242 log_err("something threw an error in u_uastrncpy(k-1)\n");
243
244 if(temp[k-1] != 0x3F)
245 log_err("something threw an error in u_uastrncpy(k-1)\n");
246
247 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
248 u_uastrncpy(temp, raw[i][j], k+1);
249 if(u_strcmp(temp, dataTable[i][j])!=0)
250 log_err("something threw an error in u_uastrncpy(k+1)\n");
251
252 if(temp[k] != 0)
253 log_err("something threw an error in u_uastrncpy(k+1)\n");
254
255 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
256 u_uastrncpy(temp, raw[i][j], k);
257 if(u_strncmp(temp, dataTable[i][j], k)!=0)
258 log_err("something threw an error in u_uastrncpy(k)\n");
259
260 if(temp[k] != 0x3F)
261 log_err("something threw an error in u_uastrncpy(k)\n");
262 }
263
264 log_verbose("Testing u_strchr() and u_memchr()\n");
265
266 for(i=2,j=0;j<4;j++)
267 {
268 UChar saveVal = dataTable[i][j][0];
269 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
270 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
271
272 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
273
274 if (findPtr == NULL || *findPtr != 0x005F) {
275 log_err("u_strchr can't find '_' in the string\n");
276 }
277
278 findPtr = u_strchr32(dataTable[i][j], 0x005F);
279 if (findPtr == NULL || *findPtr != 0x005F) {
280 log_err("u_strchr32 can't find '_' in the string\n");
281 }
282
283 findPtr = u_strchr(dataTable[i][j], 0);
284 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
285 log_err("u_strchr can't find NULL in the string\n");
286 }
287
288 findPtr = u_strchr32(dataTable[i][j], 0);
289 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
290 log_err("u_strchr32 can't find NULL in the string\n");
291 }
292
293 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
294 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
295 log_err("u_memchr can't find NULL in the string\n");
296 }
297
298 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
299 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
300 log_err("u_memchr32 can't find NULL in the string\n");
301 }
302
303 dataTable[i][j][0] = 0;
304 /* Make sure we skip over the NULL termination */
305 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
306 if (findPtr == NULL || *findPtr != 0x005F) {
307 log_err("u_memchr can't find '_' in the string\n");
308 }
309
310 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
311 if (findPtr == NULL || *findPtr != 0x005F) {
312 log_err("u_memchr32 can't find '_' in the string\n");
313 }
314 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
315 if (findPtr != NULL) {
316 log_err("Should have found NULL when the character is not there.\n");
317 }
318 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
319 }
320
321 /*
322 * test that u_strchr32()
323 * does not find surrogate code points when they are part of matched pairs
324 * (= part of supplementary code points)
325 * Jitterbug 1542
326 */
327 {
328 static const UChar s[]={
329 /* 0 1 2 3 4 5 6 7 8 9 */
330 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
331 };
332
333 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
334 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
335 }
336 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
337 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
338 }
339 }
340
341 log_verbose("Testing u_austrcpy()");
342 u_austrcpy(test,dataTable[0][0]);
343 if(strcmp(test,raw[0][0])!=0)
344 log_err("There is an error in u_austrcpy()");
345
346
347 log_verbose("Testing u_strtok_r()");
348 {
349 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
350 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
351 UChar delimBuf[sizeof(test)];
352 UChar currTokenBuf[sizeof(tokString)];
353 UChar *state;
354 uint32_t currToken = 0;
355 UChar *ptr;
356
357 u_uastrcpy(temp, tokString);
358 u_uastrcpy(delimBuf, " ");
359
360 ptr = u_strtok_r(temp, delimBuf, &state);
361 u_uastrcpy(delimBuf, " ,");
362 while (ptr != NULL) {
363 u_uastrcpy(currTokenBuf, tokens[currToken]);
364 if (u_strcmp(ptr, currTokenBuf) != 0) {
365 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
366 }
367 ptr = u_strtok_r(NULL, delimBuf, &state);
368 currToken++;
369 }
370
371 if (currToken != UPRV_LENGTHOF(tokens)) {
372 log_err("Didn't get correct number of tokens\n");
373 }
374 state = delimBuf; /* Give it an "invalid" saveState */
375 u_uastrcpy(currTokenBuf, "");
376 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
377 log_err("Didn't get NULL for empty string\n");
378 }
379 if (state != NULL) {
380 log_err("State should be NULL for empty string\n");
381 }
382 state = delimBuf; /* Give it an "invalid" saveState */
383 u_uastrcpy(currTokenBuf, ", ,");
384 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
385 log_err("Didn't get NULL for a string of delimiters\n");
386 }
387 if (state != NULL) {
388 log_err("State should be NULL for a string of delimiters\n");
389 }
390
391 state = delimBuf; /* Give it an "invalid" saveState */
392 u_uastrcpy(currTokenBuf, "q, ,");
393 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
394 log_err("Got NULL for a string that does not begin with delimiters\n");
395 }
396 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
397 log_err("Didn't get NULL for a string that ends in delimiters\n");
398 }
399 if (state != NULL) {
400 log_err("State should be NULL for empty string\n");
401 }
402
403 state = delimBuf; /* Give it an "invalid" saveState */
404 u_uastrcpy(currTokenBuf, tokString);
405 u_uastrcpy(temp, tokString);
406 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
407 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
408 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
409 log_err("Should have recieved the same string when there are no delimiters\n");
410 }
411 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
412 log_err("Should not have found another token in a one token string\n");
413 }
414 }
415
416 /* test u_strcmpCodePointOrder() */
417 {
418 /* these strings are in ascending order */
419 static const UChar strings[][4]={
420 { 0x61, 0 }, /* U+0061 */
421 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
422 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
423 { 0xd800, 0 }, /* U+d800 */
424 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
425 { 0xdfff, 0 }, /* U+dfff */
426 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
427 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
428 { 0xd800, 0xdc02, 0 }, /* U+10002 */
429 { 0xd84d, 0xdc56, 0 } /* U+23456 */
430 };
431
432 UCharIterator iter1, iter2;
433 int32_t len1, len2, r1, r2;
434
435 for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
436 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
437 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
438 }
439 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
440 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
441 }
442
443 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
444 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
445 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
446 }
447
448 /* test u_strCompare(TRUE) */
449 len1=u_strlen(strings[i]);
450 len2=u_strlen(strings[i+1]);
451 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
452 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
453 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
454 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
455 ) {
456 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
457 }
458
459 /* test u_strCompare(FALSE) */
460 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
461 r2=u_strcmp(strings[i], strings[i+1]);
462 if(_SIGN(r1)!=_SIGN(r2)) {
463 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
464 }
465
466 /* test u_strCompareIter() */
467 uiter_setString(&iter1, strings[i], len1);
468 uiter_setString(&iter2, strings[i+1], len2);
469 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
470 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
471 }
472 r1=u_strCompareIter(&iter1, &iter2, FALSE);
473 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
474 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
475 }
476 }
477 }
478
479 cleanUpDataTable();
480 }
481
TestStringSearching()482 static void TestStringSearching()
483 {
484 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
485 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
486 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
487 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
488 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
489 const UChar surrMatchSet4[] = {0x0000};
490 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
491 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
492 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
493 const UChar
494 empty[] = { 0 },
495 a[] = { 0x61, 0 },
496 ab[] = { 0x61, 0x62, 0 },
497 ba[] = { 0x62, 0x61, 0 },
498 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
499 cd[] = { 0x63, 0x64, 0 },
500 dc[] = { 0x64, 0x63, 0 },
501 cdh[] = { 0x63, 0x64, 0x68, 0 },
502 f[] = { 0x66, 0 },
503 fg[] = { 0x66, 0x67, 0 },
504 gf[] = { 0x67, 0x66, 0 };
505
506 log_verbose("Testing u_strpbrk()");
507
508 if (u_strpbrk(testString, a) != &testString[0]) {
509 log_err("u_strpbrk couldn't find first letter a.\n");
510 }
511 if (u_strpbrk(testString, dc) != &testString[2]) {
512 log_err("u_strpbrk couldn't find d or c.\n");
513 }
514 if (u_strpbrk(testString, cd) != &testString[2]) {
515 log_err("u_strpbrk couldn't find c or d.\n");
516 }
517 if (u_strpbrk(testString, cdh) != &testString[2]) {
518 log_err("u_strpbrk couldn't find c, d or h.\n");
519 }
520 if (u_strpbrk(testString, f) != NULL) {
521 log_err("u_strpbrk didn't return NULL for \"f\".\n");
522 }
523 if (u_strpbrk(testString, fg) != NULL) {
524 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
525 }
526 if (u_strpbrk(testString, gf) != NULL) {
527 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
528 }
529 if (u_strpbrk(testString, empty) != NULL) {
530 log_err("u_strpbrk didn't return NULL for \"\".\n");
531 }
532
533 log_verbose("Testing u_strpbrk() with surrogates");
534
535 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
536 log_err("u_strpbrk couldn't find first letter a.\n");
537 }
538 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
539 log_err("u_strpbrk couldn't find d or c.\n");
540 }
541 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
542 log_err("u_strpbrk couldn't find c or d.\n");
543 }
544 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
545 log_err("u_strpbrk couldn't find c, d or h.\n");
546 }
547 if (u_strpbrk(testSurrogateString, f) != NULL) {
548 log_err("u_strpbrk didn't return NULL for \"f\".\n");
549 }
550 if (u_strpbrk(testSurrogateString, fg) != NULL) {
551 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
552 }
553 if (u_strpbrk(testSurrogateString, gf) != NULL) {
554 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
555 }
556 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
557 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
558 }
559 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
560 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
561 }
562 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
563 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
564 }
565 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
566 log_err("u_strpbrk should have returned NULL for empty string.\n");
567 }
568 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
569 log_err("u_strpbrk should have found bad surrogate.\n");
570 }
571
572 log_verbose("Testing u_strcspn()");
573
574 if (u_strcspn(testString, a) != 0) {
575 log_err("u_strcspn couldn't find first letter a.\n");
576 }
577 if (u_strcspn(testString, dc) != 2) {
578 log_err("u_strcspn couldn't find d or c.\n");
579 }
580 if (u_strcspn(testString, cd) != 2) {
581 log_err("u_strcspn couldn't find c or d.\n");
582 }
583 if (u_strcspn(testString, cdh) != 2) {
584 log_err("u_strcspn couldn't find c, d or h.\n");
585 }
586 if (u_strcspn(testString, f) != u_strlen(testString)) {
587 log_err("u_strcspn didn't return NULL for \"f\".\n");
588 }
589 if (u_strcspn(testString, fg) != u_strlen(testString)) {
590 log_err("u_strcspn didn't return NULL for \"fg\".\n");
591 }
592 if (u_strcspn(testString, gf) != u_strlen(testString)) {
593 log_err("u_strcspn didn't return NULL for \"gf\".\n");
594 }
595
596 log_verbose("Testing u_strcspn() with surrogates");
597
598 if (u_strcspn(testSurrogateString, a) != 1) {
599 log_err("u_strcspn couldn't find first letter a.\n");
600 }
601 if (u_strcspn(testSurrogateString, dc) != 5) {
602 log_err("u_strcspn couldn't find d or c.\n");
603 }
604 if (u_strcspn(testSurrogateString, cd) != 5) {
605 log_err("u_strcspn couldn't find c or d.\n");
606 }
607 if (u_strcspn(testSurrogateString, cdh) != 5) {
608 log_err("u_strcspn couldn't find c, d or h.\n");
609 }
610 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
611 log_err("u_strcspn didn't return NULL for \"f\".\n");
612 }
613 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
614 log_err("u_strcspn didn't return NULL for \"fg\".\n");
615 }
616 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
617 log_err("u_strcspn didn't return NULL for \"gf\".\n");
618 }
619 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
620 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
621 }
622 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
623 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
624 }
625 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
626 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
627 }
628 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
629 log_err("u_strcspn should have returned strlen for empty string.\n");
630 }
631
632
633 log_verbose("Testing u_strspn()");
634
635 if (u_strspn(testString, a) != 1) {
636 log_err("u_strspn couldn't skip first letter a.\n");
637 }
638 if (u_strspn(testString, ab) != 2) {
639 log_err("u_strspn couldn't skip a or b.\n");
640 }
641 if (u_strspn(testString, ba) != 2) {
642 log_err("u_strspn couldn't skip a or b.\n");
643 }
644 if (u_strspn(testString, f) != 0) {
645 log_err("u_strspn didn't return 0 for \"f\".\n");
646 }
647 if (u_strspn(testString, dc) != 0) {
648 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
649 }
650 if (u_strspn(testString, abcd) != u_strlen(testString)) {
651 log_err("u_strspn couldn't skip over the whole string.\n");
652 }
653 if (u_strspn(testString, empty) != 0) {
654 log_err("u_strspn should have returned 0 for empty string.\n");
655 }
656
657 log_verbose("Testing u_strspn() with surrogates");
658 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
659 log_err("u_strspn couldn't skip 0xdbff or a.\n");
660 }
661 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
662 log_err("u_strspn couldn't skip 0xdbff or a.\n");
663 }
664 if (u_strspn(testSurrogateString, f) != 0) {
665 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
666 }
667 if (u_strspn(testSurrogateString, dc) != 0) {
668 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
669 }
670 if (u_strspn(testSurrogateString, cd) != 0) {
671 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
672 }
673 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
674 log_err("u_strspn couldn't skip whole string.\n");
675 }
676 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
677 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
678 }
679 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
680 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
681 }
682 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
683 log_err("u_strspn should have returned 0 for empty string.\n");
684 }
685 }
686
687 /*
688 * All binary Unicode string searches should behave the same for equivalent input.
689 * See Jitterbug 2145.
690 * There are some new functions, too - just test them all.
691 */
692 static void
TestSurrogateSearching()693 TestSurrogateSearching() {
694 static const UChar s[]={
695 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
696 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
697 }, sub_a[]={
698 0x61, 0
699 }, sub_b[]={
700 0x62, 0
701 }, sub_lead[]={
702 0xd801, 0
703 }, sub_trail[]={
704 0xdc02, 0
705 }, sub_supp[]={
706 0xd801, 0xdc02, 0
707 }, sub_supp2[]={
708 0xd801, 0xdc03, 0
709 }, sub_a_lead[]={
710 0x61, 0xd801, 0
711 }, sub_trail_a[]={
712 0xdc02, 0x61, 0
713 }, sub_aba[]={
714 0x61, 0x62, 0x61, 0
715 };
716 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
717 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
718
719 const UChar *first, *last;
720
721 /* search for NUL code point: find end of string */
722 first=s+u_strlen(s);
723
724 if(
725 first!=u_strchr(s, nul) ||
726 first!=u_strchr32(s, nul) ||
727 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
728 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
729 first!=u_strrchr(s, nul) ||
730 first!=u_strrchr32(s, nul) ||
731 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
732 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
733 ) {
734 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
735 }
736
737 /* search for empty substring: find beginning of string */
738 if(
739 s!=u_strstr(s, &nul) ||
740 s!=u_strFindFirst(s, -1, &nul, -1) ||
741 s!=u_strFindFirst(s, -1, &nul, 0) ||
742 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
743 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
744 s!=u_strrstr(s, &nul) ||
745 s!=u_strFindLast(s, -1, &nul, -1) ||
746 s!=u_strFindLast(s, -1, &nul, 0) ||
747 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
748 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
749 ) {
750 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
751 }
752
753 /* find 'a' in s[1..10[ */
754 first=s+3;
755 last=s+7;
756 if(
757 first!=u_strchr(s+1, a) ||
758 first!=u_strchr32(s+1, a) ||
759 first!=u_memchr(s+1, a, 9) ||
760 first!=u_memchr32(s+1, a, 9) ||
761 first!=u_strstr(s+1, sub_a) ||
762 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
763 first!=u_strFindFirst(s+1, -1, &a, 1) ||
764 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
765 first!=u_strFindFirst(s+1, 9, &a, 1) ||
766 (s+10)!=u_strrchr(s+1, a) ||
767 (s+10)!=u_strrchr32(s+1, a) ||
768 last!=u_memrchr(s+1, a, 9) ||
769 last!=u_memrchr32(s+1, a, 9) ||
770 (s+10)!=u_strrstr(s+1, sub_a) ||
771 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
772 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
773 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
774 last!=u_strFindLast(s+1, 9, &a, 1)
775 ) {
776 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
777 }
778
779 /* do not find 'b' in s[1..10[ */
780 if(
781 NULL!=u_strchr(s+1, b) ||
782 NULL!=u_strchr32(s+1, b) ||
783 NULL!=u_memchr(s+1, b, 9) ||
784 NULL!=u_memchr32(s+1, b, 9) ||
785 NULL!=u_strstr(s+1, sub_b) ||
786 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
787 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
788 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
789 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
790 NULL!=u_strrchr(s+1, b) ||
791 NULL!=u_strrchr32(s+1, b) ||
792 NULL!=u_memrchr(s+1, b, 9) ||
793 NULL!=u_memrchr32(s+1, b, 9) ||
794 NULL!=u_strrstr(s+1, sub_b) ||
795 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
796 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
797 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
798 NULL!=u_strFindLast(s+1, 9, &b, 1)
799 ) {
800 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
801 }
802
803 /* do not find a non-code point in s[1..10[ */
804 if(
805 NULL!=u_strchr32(s+1, ill) ||
806 NULL!=u_memchr32(s+1, ill, 9) ||
807 NULL!=u_strrchr32(s+1, ill) ||
808 NULL!=u_memrchr32(s+1, ill, 9)
809 ) {
810 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
811 }
812
813 /* find U+d801 in s[1..10[ */
814 first=s+6;
815 if(
816 first!=u_strchr(s+1, lead) ||
817 first!=u_strchr32(s+1, lead) ||
818 first!=u_memchr(s+1, lead, 9) ||
819 first!=u_memchr32(s+1, lead, 9) ||
820 first!=u_strstr(s+1, sub_lead) ||
821 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
822 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
823 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
824 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
825 first!=u_strrchr(s+1, lead) ||
826 first!=u_strrchr32(s+1, lead) ||
827 first!=u_memrchr(s+1, lead, 9) ||
828 first!=u_memrchr32(s+1, lead, 9) ||
829 first!=u_strrstr(s+1, sub_lead) ||
830 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
831 first!=u_strFindLast(s+1, -1, &lead, 1) ||
832 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
833 first!=u_strFindLast(s+1, 9, &lead, 1)
834 ) {
835 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
836 }
837
838 /* find U+dc02 in s[1..10[ */
839 first=s+4;
840 if(
841 first!=u_strchr(s+1, trail) ||
842 first!=u_strchr32(s+1, trail) ||
843 first!=u_memchr(s+1, trail, 9) ||
844 first!=u_memchr32(s+1, trail, 9) ||
845 first!=u_strstr(s+1, sub_trail) ||
846 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
847 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
848 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
849 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
850 first!=u_strrchr(s+1, trail) ||
851 first!=u_strrchr32(s+1, trail) ||
852 first!=u_memrchr(s+1, trail, 9) ||
853 first!=u_memrchr32(s+1, trail, 9) ||
854 first!=u_strrstr(s+1, sub_trail) ||
855 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
856 first!=u_strFindLast(s+1, -1, &trail, 1) ||
857 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
858 first!=u_strFindLast(s+1, 9, &trail, 1)
859 ) {
860 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
861 }
862
863 /* find U+10402 in s[1..10[ */
864 first=s+1;
865 last=s+8;
866 if(
867 first!=u_strchr32(s+1, supp) ||
868 first!=u_memchr32(s+1, supp, 9) ||
869 first!=u_strstr(s+1, sub_supp) ||
870 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
871 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
872 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
873 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
874 last!=u_strrchr32(s+1, supp) ||
875 last!=u_memrchr32(s+1, supp, 9) ||
876 last!=u_strrstr(s+1, sub_supp) ||
877 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
878 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
879 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
880 last!=u_strFindLast(s+1, 9, sub_supp, 2)
881 ) {
882 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
883 }
884
885 /* do not find U+10402 in a single UChar */
886 if(
887 NULL!=u_memchr32(s+1, supp, 1) ||
888 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
889 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
890 NULL!=u_memrchr32(s+1, supp, 1) ||
891 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
892 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
893 NULL!=u_memrchr32(s+2, supp, 1) ||
894 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
895 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
896 ) {
897 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
898 }
899
900 /* do not find U+10403 in s[1..10[ */
901 if(
902 NULL!=u_strchr32(s+1, supp2) ||
903 NULL!=u_memchr32(s+1, supp2, 9) ||
904 NULL!=u_strstr(s+1, sub_supp2) ||
905 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
906 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
907 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
908 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
909 NULL!=u_strrchr32(s+1, supp2) ||
910 NULL!=u_memrchr32(s+1, supp2, 9) ||
911 NULL!=u_strrstr(s+1, sub_supp2) ||
912 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
913 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
914 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
915 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
916 ) {
917 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
918 }
919
920 /* find <0061 d801> in s[1..10[ */
921 first=s+5;
922 if(
923 first!=u_strstr(s+1, sub_a_lead) ||
924 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
925 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
926 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
927 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
928 first!=u_strrstr(s+1, sub_a_lead) ||
929 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
930 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
931 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
932 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
933 ) {
934 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
935 }
936
937 /* find <dc02 0061> in s[1..10[ */
938 first=s+4;
939 if(
940 first!=u_strstr(s+1, sub_trail_a) ||
941 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
942 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
943 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
944 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
945 first!=u_strrstr(s+1, sub_trail_a) ||
946 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
947 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
948 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
949 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
950 ) {
951 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
952 }
953
954 /* do not find "aba" in s[1..10[ */
955 if(
956 NULL!=u_strstr(s+1, sub_aba) ||
957 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
958 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
959 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
960 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
961 NULL!=u_strrstr(s+1, sub_aba) ||
962 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
963 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
964 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
965 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
966 ) {
967 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
968 }
969 /* Regression test for ICU-20684 Use-of-uninitialized-value in isMatchAtCPBoundary
970 * Condition: search the same string while the first char is not an
971 * surrogate and the last char is the leading surragte.
972 */
973 {
974 static const UChar s[]={ 0x0020, 0xD9C1 };
975 if (u_strFindFirst(s, 2, s, 2) != s) {
976 log_err("error: ending with a partial supplementary code point should match\n");
977 }
978 }
979 }
980
TestStringCopy()981 static void TestStringCopy()
982 {
983 UChar temp[40];
984 UChar *result=0;
985 UChar subString[5];
986 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
987 char charOut[40];
988 char chars[]="abc"; /* needs default codepage */
989
990 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
991
992 u_uastrcpy(temp, "abc");
993 if(u_strcmp(temp, uchars) != 0) {
994 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
995 }
996
997 temp[0] = 0xFB; /* load garbage into it */
998 temp[1] = 0xFB;
999 temp[2] = 0xFB;
1000 temp[3] = 0xFB;
1001
1002 u_uastrncpy(temp, "abcabcabc", 3);
1003 if(u_strncmp(uchars, temp, 3) != 0){
1004 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1005 }
1006 if(temp[3] != 0xFB) {
1007 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1008 }
1009
1010 charOut[0] = (char)0x7B; /* load garbage into it */
1011 charOut[1] = (char)0x7B;
1012 charOut[2] = (char)0x7B;
1013 charOut[3] = (char)0x7B;
1014
1015 temp[0] = 0x0061;
1016 temp[1] = 0x0062;
1017 temp[2] = 0x0063;
1018 temp[3] = 0x0061;
1019 temp[4] = 0x0062;
1020 temp[5] = 0x0063;
1021 temp[6] = 0x0000;
1022
1023 u_austrncpy(charOut, temp, 3);
1024 if(strncmp(chars, charOut, 3) != 0){
1025 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1026 }
1027 if(charOut[3] != (char)0x7B) {
1028 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1029 }
1030
1031 /*Testing u_strchr()*/
1032 log_verbose("Testing u_strchr\n");
1033 temp[0]=0x42;
1034 temp[1]=0x62;
1035 temp[2]=0x62;
1036 temp[3]=0x63;
1037 temp[4]=0xd841;
1038 temp[5]=0xd841;
1039 temp[6]=0xdc02;
1040 temp[7]=0;
1041 result=u_strchr(temp, (UChar)0x62);
1042 if(result != temp+1){
1043 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1044 }
1045 /*Testing u_strstr()*/
1046 log_verbose("Testing u_strstr\n");
1047 subString[0]=0x62;
1048 subString[1]=0x63;
1049 subString[2]=0;
1050 result=u_strstr(temp, subString);
1051 if(result != temp+2){
1052 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1053 }
1054 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1055 if(result != temp){
1056 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1057 }
1058 result=u_strstr(subString, temp);
1059 if(result != NULL){
1060 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1061 }
1062
1063 /*Testing u_strchr32*/
1064 log_verbose("Testing u_strchr32\n");
1065 result=u_strchr32(temp, (UChar32)0x62);
1066 if(result != temp+1){
1067 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1068 }
1069 result=u_strchr32(temp, (UChar32)0xfb);
1070 if(result != NULL){
1071 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1072 }
1073 result=u_strchr32(temp, (UChar32)0x20402);
1074 if(result != temp+5){
1075 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1076 }
1077
1078 temp[7]=0xfc00;
1079 result=u_memchr32(temp, (UChar32)0x20402, 7);
1080 if(result != temp+5){
1081 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1082 }
1083 result=u_memchr32(temp, (UChar32)0x20402, 6);
1084 if(result != NULL){
1085 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1086 }
1087 result=u_memchr32(temp, (UChar32)0x20402, 1);
1088 if(result != NULL){
1089 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1090 }
1091 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1092 if(result != temp+7){
1093 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1094 }
1095 }
1096
1097 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1098
1099 static void
TestUnescape()1100 TestUnescape() {
1101 static UChar buffer[200];
1102
1103 static const char* input =
1104 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1105
1106 static const UChar expect[]={
1107 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1108 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1109 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1110 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1111 };
1112 static const int32_t explength = UPRV_LENGTHOF(expect)-1;
1113 int32_t length;
1114
1115 /* test u_unescape() */
1116 length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
1117 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1118 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1119 explength);
1120 }
1121
1122 /* try preflighting */
1123 length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
1124 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1125 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1126 }
1127
1128 /* ### TODO: test u_unescapeAt() */
1129 }
1130
1131 static void
TestUnescapeRepeatedSurrogateLead20725()1132 TestUnescapeRepeatedSurrogateLead20725() {
1133 const int32_t repeat = 20000;
1134 const int32_t srclen = repeat * 6 + 1;
1135 char *src = (char*)malloc(srclen);
1136 UChar *dest = (UChar*) malloc(sizeof(UChar) * (repeat + 1));
1137 if (src == NULL || dest == NULL) {
1138 log_err("memory allocation error");
1139 }
1140 for (int32_t i = 0; i < repeat; i++) {
1141 uprv_strcpy(src + (i * 6), "\\ud841");
1142 }
1143 int32_t len = u_unescape(src, dest, repeat);
1144 if (len != repeat) {
1145 log_err("failure in u_unescape()");
1146 }
1147 for (int32_t i = 0; i < repeat; i++) {
1148 if (dest[i] != 0xd841) {
1149 log_err("failure in u_unescape() return value");
1150 }
1151 }
1152 free(src);
1153
1154 // A few simple test cases to make sure that the code recovers properly
1155 u_unescape("\\ud841\\x5A", dest, repeat);
1156 const UChar expected1[] = {0xd841, 'Z', 0};
1157 if (u_strcmp(dest, expected1)!=0) {
1158 log_err("u_unescape() should return u\"\\ud841Z\" but got %s", dest);
1159 }
1160
1161 u_unescape("\\ud841\\U00050005", dest, repeat);
1162 const UChar expected2[] = {0xd841, 0xd900, 0xdc05, 0};
1163 if (u_strcmp(dest, expected2)!=0) {
1164 log_err("u_unescape() should return u\"\\ud841\\ud900\\udc05\" "
1165 "but got %s", dest);
1166 }
1167
1168 // \\xXX is ill-formed. The documentation states:
1169 // If an escape sequence is ill-formed, this method returns an empty string.
1170 u_unescape("\\ud841\\xXX", dest, repeat);
1171 const UChar expected3[] = { 0 };
1172 if (u_strcmp(dest, expected3)!=0) {
1173 log_err("u_unescape() should return empty string");
1174 }
1175
1176 free(dest);
1177
1178 }
1179
1180 /* test code point counting functions --------------------------------------- */
1181
1182 /* reference implementation of u_strHasMoreChar32Than() */
1183 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1184 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1185 int32_t count=u_countChar32(s, length);
1186 return count>number;
1187 }
1188
1189 /* compare the real function against the reference */
1190 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1191 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1192 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1193 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1194 i, length, number, u_strHasMoreChar32Than(s, length, number));
1195 }
1196 }
1197
1198 static void
TestCountChar32()1199 TestCountChar32() {
1200 static const UChar string[]={
1201 0x61, 0x62, 0xd800, 0xdc00,
1202 0xd801, 0xdc01, 0x63, 0xd802,
1203 0x64, 0xdc03, 0x65, 0x66,
1204 0xd804, 0xdc04, 0xd805, 0xdc05,
1205 0x67
1206 };
1207 UChar buffer[100];
1208 int32_t i, length, number;
1209
1210 /* test u_strHasMoreChar32Than() with length>=0 */
1211 length=UPRV_LENGTHOF(string);
1212 while(length>=0) {
1213 for(i=0; i<=length; ++i) {
1214 for(number=-1; number<=((length-i)+2); ++number) {
1215 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1216 }
1217 }
1218 --length;
1219 }
1220
1221 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1222 length=UPRV_LENGTHOF(string);
1223 u_memcpy(buffer, string, length);
1224 while(length>=0) {
1225 buffer[length]=0;
1226 for(i=0; i<=length; ++i) {
1227 for(number=-1; number<=((length-i)+2); ++number) {
1228 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1229 }
1230 }
1231 --length;
1232 }
1233
1234 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1235 for(length=-1; length<=1; ++length) {
1236 for(i=0; i<=length; ++i) {
1237 for(number=-2; number<=2; ++number) {
1238 _testStrHasMoreChar32Than(NULL, 0, length, number);
1239 }
1240 }
1241 }
1242 }
1243
1244 /* UCharIterator ------------------------------------------------------------ */
1245
1246 /*
1247 * Compare results from two iterators, should be same.
1248 * Assume that the text is not empty and that
1249 * iteration start==0 and iteration limit==length.
1250 */
1251 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1252 compareIterators(UCharIterator *iter1, const char *n1,
1253 UCharIterator *iter2, const char *n2) {
1254 int32_t i, pos1, pos2, middle, length;
1255 UChar32 c1, c2;
1256
1257 /* compare lengths */
1258 length=iter1->getIndex(iter1, UITER_LENGTH);
1259 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1260 if(length!=pos2) {
1261 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1262 return;
1263 }
1264
1265 /* set into the middle */
1266 middle=length/2;
1267
1268 pos1=iter1->move(iter1, middle, UITER_ZERO);
1269 if(pos1!=middle) {
1270 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1271 return;
1272 }
1273
1274 pos2=iter2->move(iter2, middle, UITER_ZERO);
1275 if(pos2!=middle) {
1276 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1277 return;
1278 }
1279
1280 /* test current() */
1281 c1=iter1->current(iter1);
1282 c2=iter2->current(iter2);
1283 if(c1!=c2) {
1284 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1285 return;
1286 }
1287
1288 /* move forward 3 UChars */
1289 for(i=0; i<3; ++i) {
1290 c1=iter1->next(iter1);
1291 c2=iter2->next(iter2);
1292 if(c1!=c2) {
1293 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1294 return;
1295 }
1296 }
1297
1298 /* move backward 5 UChars */
1299 for(i=0; i<5; ++i) {
1300 c1=iter1->previous(iter1);
1301 c2=iter2->previous(iter2);
1302 if(c1!=c2) {
1303 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1304 return;
1305 }
1306 }
1307
1308 /* iterate forward from the beginning */
1309 pos1=iter1->move(iter1, 0, UITER_START);
1310 if(pos1<0) {
1311 log_err("%s->move(start) failed\n", n1);
1312 return;
1313 }
1314 if(!iter1->hasNext(iter1)) {
1315 log_err("%s->hasNext() at the start returns FALSE\n", n1);
1316 return;
1317 }
1318
1319 pos2=iter2->move(iter2, 0, UITER_START);
1320 if(pos2<0) {
1321 log_err("%s->move(start) failed\n", n2);
1322 return;
1323 }
1324 if(!iter2->hasNext(iter2)) {
1325 log_err("%s->hasNext() at the start returns FALSE\n", n2);
1326 return;
1327 }
1328
1329 do {
1330 c1=iter1->next(iter1);
1331 c2=iter2->next(iter2);
1332 if(c1!=c2) {
1333 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1334 return;
1335 }
1336 } while(c1>=0);
1337
1338 if(iter1->hasNext(iter1)) {
1339 log_err("%s->hasNext() at the end returns TRUE\n", n1);
1340 return;
1341 }
1342 if(iter2->hasNext(iter2)) {
1343 log_err("%s->hasNext() at the end returns TRUE\n", n2);
1344 return;
1345 }
1346
1347 /* back to the middle */
1348 pos1=iter1->move(iter1, middle, UITER_ZERO);
1349 if(pos1!=middle) {
1350 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1351 return;
1352 }
1353
1354 pos2=iter2->move(iter2, middle, UITER_ZERO);
1355 if(pos2!=middle) {
1356 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1357 return;
1358 }
1359
1360 /* move to index 1 */
1361 pos1=iter1->move(iter1, 1, UITER_ZERO);
1362 if(pos1!=1) {
1363 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1364 return;
1365 }
1366
1367 pos2=iter2->move(iter2, 1, UITER_ZERO);
1368 if(pos2!=1) {
1369 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1370 return;
1371 }
1372
1373 /* iterate backward from the end */
1374 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1375 if(pos1<0) {
1376 log_err("%s->move(limit) failed\n", n1);
1377 return;
1378 }
1379 if(!iter1->hasPrevious(iter1)) {
1380 log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1381 return;
1382 }
1383
1384 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1385 if(pos2<0) {
1386 log_err("%s->move(limit) failed\n", n2);
1387 return;
1388 }
1389 if(!iter2->hasPrevious(iter2)) {
1390 log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1391 return;
1392 }
1393
1394 do {
1395 c1=iter1->previous(iter1);
1396 c2=iter2->previous(iter2);
1397 if(c1!=c2) {
1398 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1399 return;
1400 }
1401 } while(c1>=0);
1402
1403 if(iter1->hasPrevious(iter1)) {
1404 log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1405 return;
1406 }
1407 if(iter2->hasPrevious(iter2)) {
1408 log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1409 return;
1410 }
1411 }
1412
1413 /*
1414 * Test the iterator's getState() and setState() functions.
1415 * iter1 and iter2 must be set up for the same iterator type and the same string
1416 * but may be physically different structs (different addresses).
1417 *
1418 * Assume that the text is not empty and that
1419 * iteration start==0 and iteration limit==length.
1420 * It must be 2<=middle<=length-2.
1421 */
1422 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1423 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1424 UChar32 u[4];
1425
1426 UErrorCode errorCode;
1427 UChar32 c;
1428 uint32_t state;
1429 int32_t i, j;
1430
1431 /* get four UChars from the middle of the string */
1432 iter1->move(iter1, middle-2, UITER_ZERO);
1433 for(i=0; i<4; ++i) {
1434 c=iter1->next(iter1);
1435 if(c<0) {
1436 /* the test violates the assumptions, see comment above */
1437 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1438 return;
1439 }
1440 u[i]=c;
1441 }
1442
1443 /* move to the middle and get the state */
1444 iter1->move(iter1, -2, UITER_CURRENT);
1445 state=uiter_getState(iter1);
1446
1447 /* set the state into the second iterator and compare the results */
1448 errorCode=U_ZERO_ERROR;
1449 uiter_setState(iter2, state, &errorCode);
1450 if(U_FAILURE(errorCode)) {
1451 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1452 return;
1453 }
1454
1455 c=iter2->current(iter2);
1456 if(c!=u[2]) {
1457 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1458 }
1459
1460 c=iter2->previous(iter2);
1461 if(c!=u[1]) {
1462 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1463 }
1464
1465 iter2->move(iter2, 2, UITER_CURRENT);
1466 c=iter2->next(iter2);
1467 if(c!=u[3]) {
1468 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1469 }
1470
1471 iter2->move(iter2, -3, UITER_CURRENT);
1472 c=iter2->previous(iter2);
1473 if(c!=u[0]) {
1474 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1475 }
1476
1477 /* move the second iterator back to the middle */
1478 iter2->move(iter2, 1, UITER_CURRENT);
1479 iter2->next(iter2);
1480
1481 /* check that both are in the middle */
1482 i=iter1->getIndex(iter1, UITER_CURRENT);
1483 j=iter2->getIndex(iter2, UITER_CURRENT);
1484 if(i!=middle) {
1485 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1486 }
1487 if(i!=j) {
1488 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1489 }
1490
1491 /* compare lengths */
1492 i=iter1->getIndex(iter1, UITER_LENGTH);
1493 j=iter2->getIndex(iter2, UITER_LENGTH);
1494 if(i!=j) {
1495 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1496 }
1497 }
1498
1499 static void
TestUCharIterator()1500 TestUCharIterator() {
1501 static const UChar text[]={
1502 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1503 };
1504 char bytes[40];
1505
1506 UCharIterator iter, iter1, iter2;
1507 UConverter *cnv;
1508 UErrorCode errorCode;
1509 int32_t length;
1510
1511 /* simple API/code coverage - test NOOP UCharIterator */
1512 uiter_setString(&iter, NULL, 0);
1513 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1514 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1515 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1516 ) {
1517 log_err("NOOP UCharIterator behaves unexpectedly\n");
1518 }
1519
1520 /* test get/set state */
1521 length=UPRV_LENGTHOF(text)-1;
1522 uiter_setString(&iter1, text, -1);
1523 uiter_setString(&iter2, text, length);
1524 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1525 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1526
1527 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1528 errorCode=U_ZERO_ERROR;
1529 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1530 if(U_FAILURE(errorCode)) {
1531 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1532 return;
1533 }
1534
1535 uiter_setString(&iter1, text, -1);
1536 uiter_setUTF8(&iter2, bytes, length);
1537 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1538
1539 /* try again with length=-1 */
1540 uiter_setUTF8(&iter2, bytes, -1);
1541 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1542
1543 /* test get/set state */
1544 length=UPRV_LENGTHOF(text)-1;
1545 uiter_setUTF8(&iter1, bytes, -1);
1546 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1547 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1548
1549 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1550 errorCode=U_ZERO_ERROR;
1551 cnv=ucnv_open("UTF-16BE", &errorCode);
1552 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1553 ucnv_close(cnv);
1554 if(U_FAILURE(errorCode)) {
1555 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1556 return;
1557 }
1558
1559 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1560 bytes[length]=bytes[length+1]=0;
1561
1562 uiter_setString(&iter1, text, -1);
1563 uiter_setUTF16BE(&iter2, bytes, length);
1564 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1565
1566 /* try again with length=-1 */
1567 uiter_setUTF16BE(&iter2, bytes, -1);
1568 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1569
1570 /* try again after moving the bytes up one, and with length=-1 */
1571 memmove(bytes+1, bytes, length+2);
1572 uiter_setUTF16BE(&iter2, bytes+1, -1);
1573 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1574
1575 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1576 }
1577