1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: custrtst.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002oct09
16 * created by: Markus W. Scherer
17 *
18 * Tests of ustring.h Unicode string API functions.
19 */
20
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
24 #include "cintltst.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include <stdbool.h>
28 #include <string.h>
29
30 /* get the sign of an integer */
31 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
32
33 /* test setup --------------------------------------------------------------- */
34
35 static void setUpDataTable(void);
36 static void TestStringCopy(void);
37 static void TestStringFunctions(void);
38 static void TestStringSearching(void);
39 static void TestSurrogateSearching(void);
40 static void TestUnescape(void);
41 static void TestUnescapeRepeatedSurrogateLead20725(void);
42 static void TestCountChar32(void);
43 static void TestUCharIterator(void);
44
45 void addUStringTest(TestNode** root);
46
addUStringTest(TestNode ** root)47 void addUStringTest(TestNode** root)
48 {
49 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
50 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
51 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
52 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
53 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
54 addTest(root, &TestUnescapeRepeatedSurrogateLead20725,
55 "tsutil/custrtst/TestUnescapeRepeatedSurrogateLead20725");
56 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
57 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
58 }
59
60 /* test data for TestStringFunctions ---------------------------------------- */
61
62 UChar*** dataTable = NULL;
63
64 static const char* raw[3][4] = {
65
66 /* First String */
67 { "English_", "French_", "Croatian_", "English_"},
68 /* Second String */
69 { "United States", "France", "Croatia", "Unites States"},
70
71 /* Concatenated string */
72 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
73 };
74
setUpDataTable(void)75 static void setUpDataTable(void)
76 {
77 int32_t i,j;
78 if(dataTable == NULL) {
79 dataTable = (UChar***)calloc(sizeof(UChar**),3);
80
81 for (i = 0; i < 3; i++) {
82 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
83 for (j = 0; j < 4; j++){
84 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
85 u_uastrcpy(dataTable[i][j],raw[i][j]);
86 }
87 }
88 }
89 }
90
cleanUpDataTable(void)91 static void cleanUpDataTable(void)
92 {
93 int32_t i,j;
94 if(dataTable != NULL) {
95 for (i=0; i<3; i++) {
96 for(j = 0; j<4; j++) {
97 free(dataTable[i][j]);
98 }
99 free(dataTable[i]);
100 }
101 free(dataTable);
102 }
103 dataTable = NULL;
104 }
105
106 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions(void)107 static void TestStringFunctions(void)
108 {
109 int32_t i,j,k;
110 UChar temp[512];
111 UChar nullTemp[512];
112 char test[512];
113 char tempOut[512];
114
115 setUpDataTable();
116
117 log_verbose("Testing u_strlen()\n");
118 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
119 log_err("There is an error in u_strlen()");
120
121 log_verbose("Testing u_memcpy() and u_memcmp()\n");
122
123 for(i=0;i<3;++i)
124 {
125 for(j=0;j<4;++j)
126 {
127 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
128 temp[0] = 0;
129 temp[7] = 0xA4; /* Mark the end */
130 u_memcpy(temp,dataTable[i][j], 7);
131
132 if(temp[7] != 0xA4)
133 log_err("an error occurred in u_memcpy()\n");
134 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
135 log_err("an error occurred in u_memcpy() or u_memcmp()\n");
136 }
137 }
138 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
139 log_err("an error occurred in u_memcmp()\n");
140
141 log_verbose("Testing u_memset()\n");
142 nullTemp[0] = 0;
143 nullTemp[7] = 0;
144 u_memset(nullTemp, 0xa4, 7);
145 for (i = 0; i < 7; i++) {
146 if(nullTemp[i] != 0xa4) {
147 log_err("an error occurred in u_memset()\n");
148 }
149 }
150 if(nullTemp[7] != 0) {
151 log_err("u_memset() went too far\n");
152 }
153
154 u_memset(nullTemp, 0, 7);
155 nullTemp[7] = 0xa4;
156 temp[7] = 0;
157 u_memcpy(temp,nullTemp, 7);
158 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
159 log_err("an error occurred in u_memcpy() or u_memcmp()\n");
160
161
162 log_verbose("Testing u_memmove()\n");
163 for (i = 0; i < 7; i++) {
164 temp[i] = (UChar)i;
165 }
166 u_memmove(temp + 1, temp, 7);
167 if(temp[0] != 0) {
168 log_err("an error occurred in u_memmove()\n");
169 }
170 for (i = 1; i <= 7; i++) {
171 if(temp[i] != (i - 1)) {
172 log_err("an error occurred in u_memmove()\n");
173 }
174 }
175
176 log_verbose("Testing u_strcpy() and u_strcmp()\n");
177
178 for(i=0;i<3;++i)
179 {
180 for(j=0;j<4;++j)
181 {
182 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
183 temp[0] = 0;
184 u_strcpy(temp,dataTable[i][j]);
185
186 if(u_strcmp(temp,dataTable[i][j])!=0)
187 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
188 }
189 }
190 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
191 log_err("an error occurred in u_memcmp()\n");
192
193 log_verbose("testing u_strcat()\n");
194 i=0;
195 for(j=0; j<2;++j)
196 {
197 u_uastrcpy(temp, "");
198 u_strcpy(temp,dataTable[i][j]);
199 u_strcat(temp,dataTable[i+1][j]);
200 if(u_strcmp(temp,dataTable[i+2][j])!=0)
201 log_err("something threw an error in u_strcat()\n");
202
203 }
204 log_verbose("Testing u_strncmp()\n");
205 for(i=0,j=0;j<4; ++j)
206 {
207 k=u_strlen(dataTable[i][j]);
208 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
209 log_err("Something threw an error in u_strncmp\n");
210 }
211 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
212 log_err("an error occurred in u_memcmp()\n");
213
214
215 log_verbose("Testing u_strncat\n");
216 for(i=0,j=0;j<4; ++j)
217 {
218 k=u_strlen(dataTable[i][j]);
219
220 u_uastrcpy(temp,"");
221
222 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
223 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
224
225 }
226
227 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
228 for(i=2,j=0;j<4; ++j)
229 {
230 k=u_strlen(dataTable[i][j]);
231 u_strncpy(temp, dataTable[i][j],k);
232 temp[k] = 0xa4;
233
234 if(u_strncmp(temp, dataTable[i][j],k)!=0)
235 log_err("something threw an error in u_strncpy()\n");
236
237 if(temp[k] != 0xa4)
238 log_err("something threw an error in u_strncpy()\n");
239
240 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
241 u_uastrncpy(temp, raw[i][j], k-1);
242 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
243 log_err("something threw an error in u_uastrncpy(k-1)\n");
244
245 if(temp[k-1] != 0x3F)
246 log_err("something threw an error in u_uastrncpy(k-1)\n");
247
248 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
249 u_uastrncpy(temp, raw[i][j], k+1);
250 if(u_strcmp(temp, dataTable[i][j])!=0)
251 log_err("something threw an error in u_uastrncpy(k+1)\n");
252
253 if(temp[k] != 0)
254 log_err("something threw an error in u_uastrncpy(k+1)\n");
255
256 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
257 u_uastrncpy(temp, raw[i][j], k);
258 if(u_strncmp(temp, dataTable[i][j], k)!=0)
259 log_err("something threw an error in u_uastrncpy(k)\n");
260
261 if(temp[k] != 0x3F)
262 log_err("something threw an error in u_uastrncpy(k)\n");
263 }
264
265 log_verbose("Testing u_strchr() and u_memchr()\n");
266
267 for(i=2,j=0;j<4;j++)
268 {
269 UChar saveVal = dataTable[i][j][0];
270 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
271 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
272
273 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
274
275 if (findPtr == NULL || *findPtr != 0x005F) {
276 log_err("u_strchr can't find '_' in the string\n");
277 }
278
279 findPtr = u_strchr32(dataTable[i][j], 0x005F);
280 if (findPtr == NULL || *findPtr != 0x005F) {
281 log_err("u_strchr32 can't find '_' in the string\n");
282 }
283
284 findPtr = u_strchr(dataTable[i][j], 0);
285 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
286 log_err("u_strchr can't find NULL in the string\n");
287 }
288
289 findPtr = u_strchr32(dataTable[i][j], 0);
290 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
291 log_err("u_strchr32 can't find NULL in the string\n");
292 }
293
294 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
295 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
296 log_err("u_memchr can't find NULL in the string\n");
297 }
298
299 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
300 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
301 log_err("u_memchr32 can't find NULL in the string\n");
302 }
303
304 dataTable[i][j][0] = 0;
305 /* Make sure we skip over the NULL termination */
306 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
307 if (findPtr == NULL || *findPtr != 0x005F) {
308 log_err("u_memchr can't find '_' in the string\n");
309 }
310
311 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
312 if (findPtr == NULL || *findPtr != 0x005F) {
313 log_err("u_memchr32 can't find '_' in the string\n");
314 }
315 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
316 if (findPtr != NULL) {
317 log_err("Should have found NULL when the character is not there.\n");
318 }
319 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
320 }
321
322 /*
323 * test that u_strchr32()
324 * does not find surrogate code points when they are part of matched pairs
325 * (= part of supplementary code points)
326 * Jitterbug 1542
327 */
328 {
329 static const UChar s[]={
330 /* 0 1 2 3 4 5 6 7 8 9 */
331 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
332 };
333
334 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
335 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
336 }
337 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
338 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
339 }
340 }
341
342 log_verbose("Testing u_austrcpy()");
343 u_austrcpy(test,dataTable[0][0]);
344 if(strcmp(test,raw[0][0])!=0)
345 log_err("There is an error in u_austrcpy()");
346
347
348 log_verbose("Testing u_strtok_r()");
349 {
350 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
351 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
352 UChar delimBuf[sizeof(test)];
353 UChar currTokenBuf[sizeof(tokString)];
354 UChar *state;
355 uint32_t currToken = 0;
356 UChar *ptr;
357
358 u_uastrcpy(temp, tokString);
359 u_uastrcpy(delimBuf, " ");
360
361 ptr = u_strtok_r(temp, delimBuf, &state);
362 u_uastrcpy(delimBuf, " ,");
363 while (ptr != NULL) {
364 u_uastrcpy(currTokenBuf, tokens[currToken]);
365 if (u_strcmp(ptr, currTokenBuf) != 0) {
366 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
367 }
368 ptr = u_strtok_r(NULL, delimBuf, &state);
369 currToken++;
370 }
371
372 if (currToken != UPRV_LENGTHOF(tokens)) {
373 log_err("Didn't get correct number of tokens\n");
374 }
375 state = delimBuf; /* Give it an "invalid" saveState */
376 u_uastrcpy(currTokenBuf, "");
377 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
378 log_err("Didn't get NULL for empty string\n");
379 }
380 if (state != NULL) {
381 log_err("State should be NULL for empty string\n");
382 }
383 state = delimBuf; /* Give it an "invalid" saveState */
384 u_uastrcpy(currTokenBuf, ", ,");
385 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
386 log_err("Didn't get NULL for a string of delimiters\n");
387 }
388 if (state != NULL) {
389 log_err("State should be NULL for a string of delimiters\n");
390 }
391
392 state = delimBuf; /* Give it an "invalid" saveState */
393 u_uastrcpy(currTokenBuf, "q, ,");
394 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
395 log_err("Got NULL for a string that does not begin with delimiters\n");
396 }
397 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
398 log_err("Didn't get NULL for a string that ends in delimiters\n");
399 }
400 if (state != NULL) {
401 log_err("State should be NULL for empty string\n");
402 }
403
404 state = delimBuf; /* Give it an "invalid" saveState */
405 u_uastrcpy(currTokenBuf, tokString);
406 u_uastrcpy(temp, tokString);
407 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
408 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
409 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
410 log_err("Should have received the same string when there are no delimiters\n");
411 }
412 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
413 log_err("Should not have found another token in a one token string\n");
414 }
415 }
416
417 /* test u_strcmpCodePointOrder() */
418 {
419 /* these strings are in ascending order */
420 static const UChar strings[][4]={
421 { 0x61, 0 }, /* U+0061 */
422 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
423 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
424 { 0xd800, 0 }, /* U+d800 */
425 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
426 { 0xdfff, 0 }, /* U+dfff */
427 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
428 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
429 { 0xd800, 0xdc02, 0 }, /* U+10002 */
430 { 0xd84d, 0xdc56, 0 } /* U+23456 */
431 };
432
433 UCharIterator iter1, iter2;
434 int32_t len1, len2, r1, r2;
435
436 for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
437 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
438 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
439 }
440 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
441 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
442 }
443
444 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
445 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
446 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
447 }
448
449 /* test u_strCompare(true) */
450 len1=u_strlen(strings[i]);
451 len2=u_strlen(strings[i+1]);
452 if( u_strCompare(strings[i], -1, strings[i+1], -1, true)>=0 ||
453 u_strCompare(strings[i], -1, strings[i+1], len2, true)>=0 ||
454 u_strCompare(strings[i], len1, strings[i+1], -1, true)>=0 ||
455 u_strCompare(strings[i], len1, strings[i+1], len2, true)>=0
456 ) {
457 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
458 }
459
460 /* test u_strCompare(false) */
461 r1=u_strCompare(strings[i], -1, strings[i+1], -1, false);
462 r2=u_strcmp(strings[i], strings[i+1]);
463 if(_SIGN(r1)!=_SIGN(r2)) {
464 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
465 }
466
467 /* test u_strCompareIter() */
468 uiter_setString(&iter1, strings[i], len1);
469 uiter_setString(&iter2, strings[i+1], len2);
470 if(u_strCompareIter(&iter1, &iter2, true)>=0) {
471 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
472 }
473 r1=u_strCompareIter(&iter1, &iter2, false);
474 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
475 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
476 }
477 }
478 }
479
480 cleanUpDataTable();
481 }
482
TestStringSearching(void)483 static void TestStringSearching(void)
484 {
485 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
486 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
487 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
488 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
489 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
490 const UChar surrMatchSet4[] = {0x0000};
491 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
492 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
493 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
494 const UChar
495 empty[] = { 0 },
496 a[] = { 0x61, 0 },
497 ab[] = { 0x61, 0x62, 0 },
498 ba[] = { 0x62, 0x61, 0 },
499 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
500 cd[] = { 0x63, 0x64, 0 },
501 dc[] = { 0x64, 0x63, 0 },
502 cdh[] = { 0x63, 0x64, 0x68, 0 },
503 f[] = { 0x66, 0 },
504 fg[] = { 0x66, 0x67, 0 },
505 gf[] = { 0x67, 0x66, 0 };
506
507 log_verbose("Testing u_strpbrk()");
508
509 if (u_strpbrk(testString, a) != &testString[0]) {
510 log_err("u_strpbrk couldn't find first letter a.\n");
511 }
512 if (u_strpbrk(testString, dc) != &testString[2]) {
513 log_err("u_strpbrk couldn't find d or c.\n");
514 }
515 if (u_strpbrk(testString, cd) != &testString[2]) {
516 log_err("u_strpbrk couldn't find c or d.\n");
517 }
518 if (u_strpbrk(testString, cdh) != &testString[2]) {
519 log_err("u_strpbrk couldn't find c, d or h.\n");
520 }
521 if (u_strpbrk(testString, f) != NULL) {
522 log_err("u_strpbrk didn't return NULL for \"f\".\n");
523 }
524 if (u_strpbrk(testString, fg) != NULL) {
525 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
526 }
527 if (u_strpbrk(testString, gf) != NULL) {
528 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
529 }
530 if (u_strpbrk(testString, empty) != NULL) {
531 log_err("u_strpbrk didn't return NULL for \"\".\n");
532 }
533
534 log_verbose("Testing u_strpbrk() with surrogates");
535
536 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
537 log_err("u_strpbrk couldn't find first letter a.\n");
538 }
539 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
540 log_err("u_strpbrk couldn't find d or c.\n");
541 }
542 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
543 log_err("u_strpbrk couldn't find c or d.\n");
544 }
545 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
546 log_err("u_strpbrk couldn't find c, d or h.\n");
547 }
548 if (u_strpbrk(testSurrogateString, f) != NULL) {
549 log_err("u_strpbrk didn't return NULL for \"f\".\n");
550 }
551 if (u_strpbrk(testSurrogateString, fg) != NULL) {
552 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
553 }
554 if (u_strpbrk(testSurrogateString, gf) != NULL) {
555 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
556 }
557 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
558 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
559 }
560 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
561 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
562 }
563 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
564 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
565 }
566 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
567 log_err("u_strpbrk should have returned NULL for empty string.\n");
568 }
569 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
570 log_err("u_strpbrk should have found bad surrogate.\n");
571 }
572
573 log_verbose("Testing u_strcspn()");
574
575 if (u_strcspn(testString, a) != 0) {
576 log_err("u_strcspn couldn't find first letter a.\n");
577 }
578 if (u_strcspn(testString, dc) != 2) {
579 log_err("u_strcspn couldn't find d or c.\n");
580 }
581 if (u_strcspn(testString, cd) != 2) {
582 log_err("u_strcspn couldn't find c or d.\n");
583 }
584 if (u_strcspn(testString, cdh) != 2) {
585 log_err("u_strcspn couldn't find c, d or h.\n");
586 }
587 if (u_strcspn(testString, f) != u_strlen(testString)) {
588 log_err("u_strcspn didn't return NULL for \"f\".\n");
589 }
590 if (u_strcspn(testString, fg) != u_strlen(testString)) {
591 log_err("u_strcspn didn't return NULL for \"fg\".\n");
592 }
593 if (u_strcspn(testString, gf) != u_strlen(testString)) {
594 log_err("u_strcspn didn't return NULL for \"gf\".\n");
595 }
596
597 log_verbose("Testing u_strcspn() with surrogates");
598
599 if (u_strcspn(testSurrogateString, a) != 1) {
600 log_err("u_strcspn couldn't find first letter a.\n");
601 }
602 if (u_strcspn(testSurrogateString, dc) != 5) {
603 log_err("u_strcspn couldn't find d or c.\n");
604 }
605 if (u_strcspn(testSurrogateString, cd) != 5) {
606 log_err("u_strcspn couldn't find c or d.\n");
607 }
608 if (u_strcspn(testSurrogateString, cdh) != 5) {
609 log_err("u_strcspn couldn't find c, d or h.\n");
610 }
611 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
612 log_err("u_strcspn didn't return NULL for \"f\".\n");
613 }
614 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
615 log_err("u_strcspn didn't return NULL for \"fg\".\n");
616 }
617 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
618 log_err("u_strcspn didn't return NULL for \"gf\".\n");
619 }
620 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
621 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
622 }
623 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
624 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
625 }
626 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
627 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
628 }
629 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
630 log_err("u_strcspn should have returned strlen for empty string.\n");
631 }
632
633
634 log_verbose("Testing u_strspn()");
635
636 if (u_strspn(testString, a) != 1) {
637 log_err("u_strspn couldn't skip first letter a.\n");
638 }
639 if (u_strspn(testString, ab) != 2) {
640 log_err("u_strspn couldn't skip a or b.\n");
641 }
642 if (u_strspn(testString, ba) != 2) {
643 log_err("u_strspn couldn't skip a or b.\n");
644 }
645 if (u_strspn(testString, f) != 0) {
646 log_err("u_strspn didn't return 0 for \"f\".\n");
647 }
648 if (u_strspn(testString, dc) != 0) {
649 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
650 }
651 if (u_strspn(testString, abcd) != u_strlen(testString)) {
652 log_err("u_strspn couldn't skip over the whole string.\n");
653 }
654 if (u_strspn(testString, empty) != 0) {
655 log_err("u_strspn should have returned 0 for empty string.\n");
656 }
657
658 log_verbose("Testing u_strspn() with surrogates");
659 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
660 log_err("u_strspn couldn't skip 0xdbff or a.\n");
661 }
662 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
663 log_err("u_strspn couldn't skip 0xdbff or a.\n");
664 }
665 if (u_strspn(testSurrogateString, f) != 0) {
666 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
667 }
668 if (u_strspn(testSurrogateString, dc) != 0) {
669 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
670 }
671 if (u_strspn(testSurrogateString, cd) != 0) {
672 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
673 }
674 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
675 log_err("u_strspn couldn't skip whole string.\n");
676 }
677 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
678 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
679 }
680 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
681 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
682 }
683 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
684 log_err("u_strspn should have returned 0 for empty string.\n");
685 }
686 }
687
688 /*
689 * All binary Unicode string searches should behave the same for equivalent input.
690 * See Jitterbug 2145.
691 * There are some new functions, too - just test them all.
692 */
693 static void
TestSurrogateSearching(void)694 TestSurrogateSearching(void) {
695 static const UChar s[]={
696 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
697 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
698 }, sub_a[]={
699 0x61, 0
700 }, sub_b[]={
701 0x62, 0
702 }, sub_lead[]={
703 0xd801, 0
704 }, sub_trail[]={
705 0xdc02, 0
706 }, sub_supp[]={
707 0xd801, 0xdc02, 0
708 }, sub_supp2[]={
709 0xd801, 0xdc03, 0
710 }, sub_a_lead[]={
711 0x61, 0xd801, 0
712 }, sub_trail_a[]={
713 0xdc02, 0x61, 0
714 }, sub_aba[]={
715 0x61, 0x62, 0x61, 0
716 };
717 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
718 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
719
720 const UChar *first, *last;
721
722 /* search for NUL code point: find end of string */
723 first=s+u_strlen(s);
724
725 if(
726 first!=u_strchr(s, nul) ||
727 first!=u_strchr32(s, nul) ||
728 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
729 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
730 first!=u_strrchr(s, nul) ||
731 first!=u_strrchr32(s, nul) ||
732 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
733 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
734 ) {
735 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
736 }
737
738 /* search for empty substring: find beginning of string */
739 if(
740 s!=u_strstr(s, &nul) ||
741 s!=u_strFindFirst(s, -1, &nul, -1) ||
742 s!=u_strFindFirst(s, -1, &nul, 0) ||
743 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
744 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
745 s!=u_strrstr(s, &nul) ||
746 s!=u_strFindLast(s, -1, &nul, -1) ||
747 s!=u_strFindLast(s, -1, &nul, 0) ||
748 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
749 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
750 ) {
751 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
752 }
753
754 /* find 'a' in s[1..10[ */
755 first=s+3;
756 last=s+7;
757 if(
758 first!=u_strchr(s+1, a) ||
759 first!=u_strchr32(s+1, a) ||
760 first!=u_memchr(s+1, a, 9) ||
761 first!=u_memchr32(s+1, a, 9) ||
762 first!=u_strstr(s+1, sub_a) ||
763 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
764 first!=u_strFindFirst(s+1, -1, &a, 1) ||
765 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
766 first!=u_strFindFirst(s+1, 9, &a, 1) ||
767 (s+10)!=u_strrchr(s+1, a) ||
768 (s+10)!=u_strrchr32(s+1, a) ||
769 last!=u_memrchr(s+1, a, 9) ||
770 last!=u_memrchr32(s+1, a, 9) ||
771 (s+10)!=u_strrstr(s+1, sub_a) ||
772 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
773 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
774 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
775 last!=u_strFindLast(s+1, 9, &a, 1)
776 ) {
777 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
778 }
779
780 /* do not find 'b' in s[1..10[ */
781 if(
782 NULL!=u_strchr(s+1, b) ||
783 NULL!=u_strchr32(s+1, b) ||
784 NULL!=u_memchr(s+1, b, 9) ||
785 NULL!=u_memchr32(s+1, b, 9) ||
786 NULL!=u_strstr(s+1, sub_b) ||
787 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
788 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
789 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
790 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
791 NULL!=u_strrchr(s+1, b) ||
792 NULL!=u_strrchr32(s+1, b) ||
793 NULL!=u_memrchr(s+1, b, 9) ||
794 NULL!=u_memrchr32(s+1, b, 9) ||
795 NULL!=u_strrstr(s+1, sub_b) ||
796 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
797 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
798 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
799 NULL!=u_strFindLast(s+1, 9, &b, 1)
800 ) {
801 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
802 }
803
804 /* do not find a non-code point in s[1..10[ */
805 if(
806 NULL!=u_strchr32(s+1, ill) ||
807 NULL!=u_memchr32(s+1, ill, 9) ||
808 NULL!=u_strrchr32(s+1, ill) ||
809 NULL!=u_memrchr32(s+1, ill, 9)
810 ) {
811 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
812 }
813
814 /* find U+d801 in s[1..10[ */
815 first=s+6;
816 if(
817 first!=u_strchr(s+1, lead) ||
818 first!=u_strchr32(s+1, lead) ||
819 first!=u_memchr(s+1, lead, 9) ||
820 first!=u_memchr32(s+1, lead, 9) ||
821 first!=u_strstr(s+1, sub_lead) ||
822 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
823 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
824 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
825 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
826 first!=u_strrchr(s+1, lead) ||
827 first!=u_strrchr32(s+1, lead) ||
828 first!=u_memrchr(s+1, lead, 9) ||
829 first!=u_memrchr32(s+1, lead, 9) ||
830 first!=u_strrstr(s+1, sub_lead) ||
831 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
832 first!=u_strFindLast(s+1, -1, &lead, 1) ||
833 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
834 first!=u_strFindLast(s+1, 9, &lead, 1)
835 ) {
836 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
837 }
838
839 /* find U+dc02 in s[1..10[ */
840 first=s+4;
841 if(
842 first!=u_strchr(s+1, trail) ||
843 first!=u_strchr32(s+1, trail) ||
844 first!=u_memchr(s+1, trail, 9) ||
845 first!=u_memchr32(s+1, trail, 9) ||
846 first!=u_strstr(s+1, sub_trail) ||
847 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
848 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
849 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
850 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
851 first!=u_strrchr(s+1, trail) ||
852 first!=u_strrchr32(s+1, trail) ||
853 first!=u_memrchr(s+1, trail, 9) ||
854 first!=u_memrchr32(s+1, trail, 9) ||
855 first!=u_strrstr(s+1, sub_trail) ||
856 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
857 first!=u_strFindLast(s+1, -1, &trail, 1) ||
858 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
859 first!=u_strFindLast(s+1, 9, &trail, 1)
860 ) {
861 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
862 }
863
864 /* find U+10402 in s[1..10[ */
865 first=s+1;
866 last=s+8;
867 if(
868 first!=u_strchr32(s+1, supp) ||
869 first!=u_memchr32(s+1, supp, 9) ||
870 first!=u_strstr(s+1, sub_supp) ||
871 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
872 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
873 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
874 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
875 last!=u_strrchr32(s+1, supp) ||
876 last!=u_memrchr32(s+1, supp, 9) ||
877 last!=u_strrstr(s+1, sub_supp) ||
878 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
879 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
880 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
881 last!=u_strFindLast(s+1, 9, sub_supp, 2)
882 ) {
883 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
884 }
885
886 /* do not find U+10402 in a single UChar */
887 if(
888 NULL!=u_memchr32(s+1, supp, 1) ||
889 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
890 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
891 NULL!=u_memrchr32(s+1, supp, 1) ||
892 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
893 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
894 NULL!=u_memrchr32(s+2, supp, 1) ||
895 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
896 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
897 ) {
898 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
899 }
900
901 /* do not find U+10403 in s[1..10[ */
902 if(
903 NULL!=u_strchr32(s+1, supp2) ||
904 NULL!=u_memchr32(s+1, supp2, 9) ||
905 NULL!=u_strstr(s+1, sub_supp2) ||
906 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
907 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
908 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
909 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
910 NULL!=u_strrchr32(s+1, supp2) ||
911 NULL!=u_memrchr32(s+1, supp2, 9) ||
912 NULL!=u_strrstr(s+1, sub_supp2) ||
913 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
914 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
915 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
916 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
917 ) {
918 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
919 }
920
921 /* find <0061 d801> in s[1..10[ */
922 first=s+5;
923 if(
924 first!=u_strstr(s+1, sub_a_lead) ||
925 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
926 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
927 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
928 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
929 first!=u_strrstr(s+1, sub_a_lead) ||
930 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
931 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
932 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
933 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
934 ) {
935 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
936 }
937
938 /* find <dc02 0061> in s[1..10[ */
939 first=s+4;
940 if(
941 first!=u_strstr(s+1, sub_trail_a) ||
942 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
943 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
944 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
945 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
946 first!=u_strrstr(s+1, sub_trail_a) ||
947 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
948 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
949 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
950 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
951 ) {
952 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
953 }
954
955 /* do not find "aba" in s[1..10[ */
956 if(
957 NULL!=u_strstr(s+1, sub_aba) ||
958 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
959 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
960 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
961 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
962 NULL!=u_strrstr(s+1, sub_aba) ||
963 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
964 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
965 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
966 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
967 ) {
968 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
969 }
970 /* Regression test for ICU-20684 Use-of-uninitialized-value in isMatchAtCPBoundary
971 * Condition: search the same string while the first char is not an
972 * surrogate and the last char is the leading surragte.
973 */
974 static const UChar s2[]={ 0x0020, 0xD9C1 };
975 if (u_strFindFirst(s2, 2, s2, 2) != s2) {
976 log_err("error: ending with a partial supplementary code point should match\n");
977 }
978 }
979
TestStringCopy(void)980 static void TestStringCopy(void)
981 {
982 UChar temp[40];
983 UChar *result=0;
984 UChar subString[5];
985 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
986 char charOut[40];
987 char chars[]="abc"; /* needs default codepage */
988
989 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
990
991 u_uastrcpy(temp, "abc");
992 if(u_strcmp(temp, uchars) != 0) {
993 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
994 }
995
996 temp[0] = 0xFB; /* load garbage into it */
997 temp[1] = 0xFB;
998 temp[2] = 0xFB;
999 temp[3] = 0xFB;
1000
1001 u_uastrncpy(temp, "abcabcabc", 3);
1002 if(u_strncmp(uchars, temp, 3) != 0){
1003 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1004 }
1005 if(temp[3] != 0xFB) {
1006 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1007 }
1008
1009 charOut[0] = (char)0x7B; /* load garbage into it */
1010 charOut[1] = (char)0x7B;
1011 charOut[2] = (char)0x7B;
1012 charOut[3] = (char)0x7B;
1013
1014 temp[0] = 0x0061;
1015 temp[1] = 0x0062;
1016 temp[2] = 0x0063;
1017 temp[3] = 0x0061;
1018 temp[4] = 0x0062;
1019 temp[5] = 0x0063;
1020 temp[6] = 0x0000;
1021
1022 u_austrncpy(charOut, temp, 3);
1023 if(strncmp(chars, charOut, 3) != 0){
1024 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1025 }
1026 if(charOut[3] != (char)0x7B) {
1027 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1028 }
1029
1030 /*Testing u_strchr()*/
1031 log_verbose("Testing u_strchr\n");
1032 temp[0]=0x42;
1033 temp[1]=0x62;
1034 temp[2]=0x62;
1035 temp[3]=0x63;
1036 temp[4]=0xd841;
1037 temp[5]=0xd841;
1038 temp[6]=0xdc02;
1039 temp[7]=0;
1040 result=u_strchr(temp, (UChar)0x62);
1041 if(result != temp+1){
1042 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1043 }
1044 /*Testing u_strstr()*/
1045 log_verbose("Testing u_strstr\n");
1046 subString[0]=0x62;
1047 subString[1]=0x63;
1048 subString[2]=0;
1049 result=u_strstr(temp, subString);
1050 if(result != temp+2){
1051 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1052 }
1053 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1054 if(result != temp){
1055 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1056 }
1057 result=u_strstr(subString, temp);
1058 if(result != NULL){
1059 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1060 }
1061
1062 /*Testing u_strchr32*/
1063 log_verbose("Testing u_strchr32\n");
1064 result=u_strchr32(temp, (UChar32)0x62);
1065 if(result != temp+1){
1066 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1067 }
1068 result=u_strchr32(temp, (UChar32)0xfb);
1069 if(result != NULL){
1070 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1071 }
1072 result=u_strchr32(temp, (UChar32)0x20402);
1073 if(result != temp+5){
1074 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1075 }
1076
1077 temp[7]=0xfc00;
1078 result=u_memchr32(temp, (UChar32)0x20402, 7);
1079 if(result != temp+5){
1080 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1081 }
1082 result=u_memchr32(temp, (UChar32)0x20402, 6);
1083 if(result != NULL){
1084 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1085 }
1086 result=u_memchr32(temp, (UChar32)0x20402, 1);
1087 if(result != NULL){
1088 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1089 }
1090 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1091 if(result != temp+7){
1092 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1093 }
1094 }
1095
1096 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1097
1098 static void
TestUnescape(void)1099 TestUnescape(void) {
1100 static UChar buffer[200];
1101
1102 static const char* input =
1103 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1104
1105 static const UChar expect[]={
1106 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1107 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1108 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1109 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1110 };
1111 static const int32_t explength = UPRV_LENGTHOF(expect)-1;
1112 int32_t length;
1113
1114 /* test u_unescape() */
1115 length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
1116 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1117 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1118 explength);
1119 }
1120
1121 /* try preflighting */
1122 length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
1123 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1124 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1125 }
1126
1127 /* ### TODO: test u_unescapeAt() */
1128 }
1129
1130 static void
TestUnescapeRepeatedSurrogateLead20725(void)1131 TestUnescapeRepeatedSurrogateLead20725(void) {
1132 const int32_t repeat = 20000;
1133 const int32_t srclen = repeat * 6 + 1;
1134 char *src = (char*)malloc(srclen);
1135 UChar *dest = (UChar*) malloc(sizeof(UChar) * (repeat + 1));
1136 if (src == NULL || dest == NULL) {
1137 log_err("memory allocation error");
1138 }
1139 for (int32_t i = 0; i < repeat; i++) {
1140 uprv_strcpy(src + (i * 6), "\\ud841");
1141 }
1142 int32_t len = u_unescape(src, dest, repeat);
1143 if (len != repeat) {
1144 log_err("failure in u_unescape()");
1145 }
1146 for (int32_t i = 0; i < repeat; i++) {
1147 if (dest[i] != 0xd841) {
1148 log_err("failure in u_unescape() return value");
1149 }
1150 }
1151 free(src);
1152
1153 // A few simple test cases to make sure that the code recovers properly
1154 u_unescape("\\ud841\\x5A", dest, repeat);
1155 const UChar expected1[] = {0xd841, 'Z', 0};
1156 if (u_strcmp(dest, expected1)!=0) {
1157 log_err("u_unescape() should return u\"\\ud841Z\" but got %s", dest);
1158 }
1159
1160 u_unescape("\\ud841\\U00050005", dest, repeat);
1161 const UChar expected2[] = {0xd841, 0xd900, 0xdc05, 0};
1162 if (u_strcmp(dest, expected2)!=0) {
1163 log_err("u_unescape() should return u\"\\ud841\\ud900\\udc05\" "
1164 "but got %s", dest);
1165 }
1166
1167 // \\xXX is ill-formed. The documentation states:
1168 // If an escape sequence is ill-formed, this method returns an empty string.
1169 u_unescape("\\ud841\\xXX", dest, repeat);
1170 const UChar expected3[] = { 0 };
1171 if (u_strcmp(dest, expected3)!=0) {
1172 log_err("u_unescape() should return empty string");
1173 }
1174
1175 free(dest);
1176
1177 }
1178
1179 /* test code point counting functions --------------------------------------- */
1180
1181 /* reference implementation of u_strHasMoreChar32Than() */
1182 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1183 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1184 int32_t count=u_countChar32(s, length);
1185 return count>number;
1186 }
1187
1188 /* compare the real function against the reference */
1189 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1190 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1191 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1192 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1193 i, length, number, u_strHasMoreChar32Than(s, length, number));
1194 }
1195 }
1196
1197 static void
TestCountChar32(void)1198 TestCountChar32(void) {
1199 static const UChar string[]={
1200 0x61, 0x62, 0xd800, 0xdc00,
1201 0xd801, 0xdc01, 0x63, 0xd802,
1202 0x64, 0xdc03, 0x65, 0x66,
1203 0xd804, 0xdc04, 0xd805, 0xdc05,
1204 0x67
1205 };
1206 UChar buffer[100];
1207 int32_t i, length, number;
1208
1209 /* test u_strHasMoreChar32Than() with length>=0 */
1210 length=UPRV_LENGTHOF(string);
1211 while(length>=0) {
1212 for(i=0; i<=length; ++i) {
1213 for(number=-1; number<=((length-i)+2); ++number) {
1214 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1215 }
1216 }
1217 --length;
1218 }
1219
1220 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1221 length=UPRV_LENGTHOF(string);
1222 u_memcpy(buffer, string, length);
1223 while(length>=0) {
1224 buffer[length]=0;
1225 for(i=0; i<=length; ++i) {
1226 for(number=-1; number<=((length-i)+2); ++number) {
1227 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1228 }
1229 }
1230 --length;
1231 }
1232
1233 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1234 for(length=-1; length<=1; ++length) {
1235 for(i=0; i<=length; ++i) {
1236 for(number=-2; number<=2; ++number) {
1237 _testStrHasMoreChar32Than(NULL, 0, length, number);
1238 }
1239 }
1240 }
1241 }
1242
1243 /* UCharIterator ------------------------------------------------------------ */
1244
1245 /*
1246 * Compare results from two iterators, should be same.
1247 * Assume that the text is not empty and that
1248 * iteration start==0 and iteration limit==length.
1249 */
1250 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1251 compareIterators(UCharIterator *iter1, const char *n1,
1252 UCharIterator *iter2, const char *n2) {
1253 int32_t i, pos1, pos2, middle, length;
1254 UChar32 c1, c2;
1255
1256 /* compare lengths */
1257 length=iter1->getIndex(iter1, UITER_LENGTH);
1258 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1259 if(length!=pos2) {
1260 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1261 return;
1262 }
1263
1264 /* set into the middle */
1265 middle=length/2;
1266
1267 pos1=iter1->move(iter1, middle, UITER_ZERO);
1268 if(pos1!=middle) {
1269 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1270 return;
1271 }
1272
1273 pos2=iter2->move(iter2, middle, UITER_ZERO);
1274 if(pos2!=middle) {
1275 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1276 return;
1277 }
1278
1279 /* test current() */
1280 c1=iter1->current(iter1);
1281 c2=iter2->current(iter2);
1282 if(c1!=c2) {
1283 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1284 return;
1285 }
1286
1287 /* move forward 3 UChars */
1288 for(i=0; i<3; ++i) {
1289 c1=iter1->next(iter1);
1290 c2=iter2->next(iter2);
1291 if(c1!=c2) {
1292 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1293 return;
1294 }
1295 }
1296
1297 /* move backward 5 UChars */
1298 for(i=0; i<5; ++i) {
1299 c1=iter1->previous(iter1);
1300 c2=iter2->previous(iter2);
1301 if(c1!=c2) {
1302 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1303 return;
1304 }
1305 }
1306
1307 /* iterate forward from the beginning */
1308 pos1=iter1->move(iter1, 0, UITER_START);
1309 if(pos1<0) {
1310 log_err("%s->move(start) failed\n", n1);
1311 return;
1312 }
1313 if(!iter1->hasNext(iter1)) {
1314 log_err("%s->hasNext() at the start returns false\n", n1);
1315 return;
1316 }
1317
1318 pos2=iter2->move(iter2, 0, UITER_START);
1319 if(pos2<0) {
1320 log_err("%s->move(start) failed\n", n2);
1321 return;
1322 }
1323 if(!iter2->hasNext(iter2)) {
1324 log_err("%s->hasNext() at the start returns false\n", n2);
1325 return;
1326 }
1327
1328 do {
1329 c1=iter1->next(iter1);
1330 c2=iter2->next(iter2);
1331 if(c1!=c2) {
1332 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1333 return;
1334 }
1335 } while(c1>=0);
1336
1337 if(iter1->hasNext(iter1)) {
1338 log_err("%s->hasNext() at the end returns true\n", n1);
1339 return;
1340 }
1341 if(iter2->hasNext(iter2)) {
1342 log_err("%s->hasNext() at the end returns true\n", n2);
1343 return;
1344 }
1345
1346 /* back to the middle */
1347 pos1=iter1->move(iter1, middle, UITER_ZERO);
1348 if(pos1!=middle) {
1349 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1350 return;
1351 }
1352
1353 pos2=iter2->move(iter2, middle, UITER_ZERO);
1354 if(pos2!=middle) {
1355 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1356 return;
1357 }
1358
1359 /* move to index 1 */
1360 pos1=iter1->move(iter1, 1, UITER_ZERO);
1361 if(pos1!=1) {
1362 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1363 return;
1364 }
1365
1366 pos2=iter2->move(iter2, 1, UITER_ZERO);
1367 if(pos2!=1) {
1368 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1369 return;
1370 }
1371
1372 /* iterate backward from the end */
1373 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1374 if(pos1<0) {
1375 log_err("%s->move(limit) failed\n", n1);
1376 return;
1377 }
1378 if(!iter1->hasPrevious(iter1)) {
1379 log_err("%s->hasPrevious() at the end returns false\n", n1);
1380 return;
1381 }
1382
1383 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1384 if(pos2<0) {
1385 log_err("%s->move(limit) failed\n", n2);
1386 return;
1387 }
1388 if(!iter2->hasPrevious(iter2)) {
1389 log_err("%s->hasPrevious() at the end returns false\n", n2);
1390 return;
1391 }
1392
1393 do {
1394 c1=iter1->previous(iter1);
1395 c2=iter2->previous(iter2);
1396 if(c1!=c2) {
1397 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1398 return;
1399 }
1400 } while(c1>=0);
1401
1402 if(iter1->hasPrevious(iter1)) {
1403 log_err("%s->hasPrevious() at the start returns true\n", n1);
1404 return;
1405 }
1406 if(iter2->hasPrevious(iter2)) {
1407 log_err("%s->hasPrevious() at the start returns true\n", n2);
1408 return;
1409 }
1410 }
1411
1412 /*
1413 * Test the iterator's getState() and setState() functions.
1414 * iter1 and iter2 must be set up for the same iterator type and the same string
1415 * but may be physically different structs (different addresses).
1416 *
1417 * Assume that the text is not empty and that
1418 * iteration start==0 and iteration limit==length.
1419 * It must be 2<=middle<=length-2.
1420 */
1421 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1422 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1423 UChar32 u[4];
1424
1425 UErrorCode errorCode;
1426 UChar32 c;
1427 uint32_t state;
1428 int32_t i, j;
1429
1430 /* get four UChars from the middle of the string */
1431 iter1->move(iter1, middle-2, UITER_ZERO);
1432 for(i=0; i<4; ++i) {
1433 c=iter1->next(iter1);
1434 if(c<0) {
1435 /* the test violates the assumptions, see comment above */
1436 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1437 return;
1438 }
1439 u[i]=c;
1440 }
1441
1442 /* move to the middle and get the state */
1443 iter1->move(iter1, -2, UITER_CURRENT);
1444 state=uiter_getState(iter1);
1445
1446 /* set the state into the second iterator and compare the results */
1447 errorCode=U_ZERO_ERROR;
1448 uiter_setState(iter2, state, &errorCode);
1449 if(U_FAILURE(errorCode)) {
1450 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1451 return;
1452 }
1453
1454 c=iter2->current(iter2);
1455 if(c!=u[2]) {
1456 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1457 }
1458
1459 c=iter2->previous(iter2);
1460 if(c!=u[1]) {
1461 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1462 }
1463
1464 iter2->move(iter2, 2, UITER_CURRENT);
1465 c=iter2->next(iter2);
1466 if(c!=u[3]) {
1467 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1468 }
1469
1470 iter2->move(iter2, -3, UITER_CURRENT);
1471 c=iter2->previous(iter2);
1472 if(c!=u[0]) {
1473 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1474 }
1475
1476 /* move the second iterator back to the middle */
1477 iter2->move(iter2, 1, UITER_CURRENT);
1478 iter2->next(iter2);
1479
1480 /* check that both are in the middle */
1481 i=iter1->getIndex(iter1, UITER_CURRENT);
1482 j=iter2->getIndex(iter2, UITER_CURRENT);
1483 if(i!=middle) {
1484 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1485 }
1486 if(i!=j) {
1487 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1488 }
1489
1490 /* compare lengths */
1491 i=iter1->getIndex(iter1, UITER_LENGTH);
1492 j=iter2->getIndex(iter2, UITER_LENGTH);
1493 if(i!=j) {
1494 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1495 }
1496 }
1497
1498 static void
TestUCharIterator(void)1499 TestUCharIterator(void) {
1500 static const UChar text[]={
1501 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1502 };
1503 char bytes[40];
1504
1505 UCharIterator iter, iter1, iter2;
1506 UConverter *cnv;
1507 UErrorCode errorCode;
1508 int32_t length;
1509
1510 /* simple API/code coverage - test NOOP UCharIterator */
1511 uiter_setString(&iter, NULL, 0);
1512 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1513 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1514 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1515 ) {
1516 log_err("NOOP UCharIterator behaves unexpectedly\n");
1517 }
1518
1519 /* test get/set state */
1520 length=UPRV_LENGTHOF(text)-1;
1521 uiter_setString(&iter1, text, -1);
1522 uiter_setString(&iter2, text, length);
1523 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1524 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1525
1526 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1527 errorCode=U_ZERO_ERROR;
1528 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1529 if(U_FAILURE(errorCode)) {
1530 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1531 return;
1532 }
1533
1534 uiter_setString(&iter1, text, -1);
1535 uiter_setUTF8(&iter2, bytes, length);
1536 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1537
1538 /* try again with length=-1 */
1539 uiter_setUTF8(&iter2, bytes, -1);
1540 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1541
1542 /* test get/set state */
1543 length=UPRV_LENGTHOF(text)-1;
1544 uiter_setUTF8(&iter1, bytes, -1);
1545 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1546 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1547
1548 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1549 errorCode=U_ZERO_ERROR;
1550 cnv=ucnv_open("UTF-16BE", &errorCode);
1551 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1552 ucnv_close(cnv);
1553 if(U_FAILURE(errorCode)) {
1554 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1555 return;
1556 }
1557
1558 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1559 bytes[length]=bytes[length+1]=0;
1560
1561 uiter_setString(&iter1, text, -1);
1562 uiter_setUTF16BE(&iter2, bytes, length);
1563 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1564
1565 /* try again with length=-1 */
1566 uiter_setUTF16BE(&iter2, bytes, -1);
1567 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1568
1569 /* try again after moving the bytes up one, and with length=-1 */
1570 memmove(bytes+1, bytes, length+2);
1571 uiter_setUTF16BE(&iter2, bytes+1, -1);
1572 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1573
1574 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1575 }
1576