1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 2002-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: custrtst.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2002oct09
16 * created by: Markus W. Scherer
17 *
18 * Tests of ustring.h Unicode string API functions.
19 */
20
21 #include "unicode/ustring.h"
22 #include "unicode/ucnv.h"
23 #include "unicode/uiter.h"
24 #include "cintltst.h"
25 #include "cmemory.h"
26 #include <string.h>
27
28 /* get the sign of an integer */
29 #define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
30
31 /* test setup --------------------------------------------------------------- */
32
33 static void setUpDataTable(void);
34 static void TestStringCopy(void);
35 static void TestStringFunctions(void);
36 static void TestStringSearching(void);
37 static void TestSurrogateSearching(void);
38 static void TestUnescape(void);
39 static void TestCountChar32(void);
40 static void TestUCharIterator(void);
41
42 void addUStringTest(TestNode** root);
43
addUStringTest(TestNode ** root)44 void addUStringTest(TestNode** root)
45 {
46 addTest(root, &TestStringCopy, "tsutil/custrtst/TestStringCopy");
47 addTest(root, &TestStringFunctions, "tsutil/custrtst/TestStringFunctions");
48 addTest(root, &TestStringSearching, "tsutil/custrtst/TestStringSearching");
49 addTest(root, &TestSurrogateSearching, "tsutil/custrtst/TestSurrogateSearching");
50 addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
51 addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
52 addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
53 }
54
55 /* test data for TestStringFunctions ---------------------------------------- */
56
57 UChar*** dataTable = NULL;
58
59 static const char* raw[3][4] = {
60
61 /* First String */
62 { "English_", "French_", "Croatian_", "English_"},
63 /* Second String */
64 { "United States", "France", "Croatia", "Unites States"},
65
66 /* Concatenated string */
67 { "English_United States", "French_France", "Croatian_Croatia", "English_United States"}
68 };
69
setUpDataTable()70 static void setUpDataTable()
71 {
72 int32_t i,j;
73 if(dataTable == NULL) {
74 dataTable = (UChar***)calloc(sizeof(UChar**),3);
75
76 for (i = 0; i < 3; i++) {
77 dataTable[i] = (UChar**)calloc(sizeof(UChar*),4);
78 for (j = 0; j < 4; j++){
79 dataTable[i][j] = (UChar*) malloc(sizeof(UChar)*(strlen(raw[i][j])+1));
80 u_uastrcpy(dataTable[i][j],raw[i][j]);
81 }
82 }
83 }
84 }
85
cleanUpDataTable()86 static void cleanUpDataTable()
87 {
88 int32_t i,j;
89 if(dataTable != NULL) {
90 for (i=0; i<3; i++) {
91 for(j = 0; j<4; j++) {
92 free(dataTable[i][j]);
93 }
94 free(dataTable[i]);
95 }
96 free(dataTable);
97 }
98 dataTable = NULL;
99 }
100
101 /*Tests for u_strcat(),u_strcmp(), u_strlen(), u_strcpy(),u_strncat(),u_strncmp(),u_strncpy, u_uastrcpy(),u_austrcpy(), u_uastrncpy(); */
TestStringFunctions()102 static void TestStringFunctions()
103 {
104 int32_t i,j,k;
105 UChar temp[512];
106 UChar nullTemp[512];
107 char test[512];
108 char tempOut[512];
109
110 setUpDataTable();
111
112 log_verbose("Testing u_strlen()\n");
113 if( u_strlen(dataTable[0][0])!= u_strlen(dataTable[0][3]) || u_strlen(dataTable[0][0]) == u_strlen(dataTable[0][2]))
114 log_err("There is an error in u_strlen()");
115
116 log_verbose("Testing u_memcpy() and u_memcmp()\n");
117
118 for(i=0;i<3;++i)
119 {
120 for(j=0;j<4;++j)
121 {
122 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
123 temp[0] = 0;
124 temp[7] = 0xA4; /* Mark the end */
125 u_memcpy(temp,dataTable[i][j], 7);
126
127 if(temp[7] != 0xA4)
128 log_err("an error occured in u_memcpy()\n");
129 if(u_memcmp(temp, dataTable[i][j], 7)!=0)
130 log_err("an error occured in u_memcpy() or u_memcmp()\n");
131 }
132 }
133 if(u_memcmp(dataTable[0][0], dataTable[1][1], 7)==0)
134 log_err("an error occured in u_memcmp()\n");
135
136 log_verbose("Testing u_memset()\n");
137 nullTemp[0] = 0;
138 nullTemp[7] = 0;
139 u_memset(nullTemp, 0xa4, 7);
140 for (i = 0; i < 7; i++) {
141 if(nullTemp[i] != 0xa4) {
142 log_err("an error occured in u_memset()\n");
143 }
144 }
145 if(nullTemp[7] != 0) {
146 log_err("u_memset() went too far\n");
147 }
148
149 u_memset(nullTemp, 0, 7);
150 nullTemp[7] = 0xa4;
151 temp[7] = 0;
152 u_memcpy(temp,nullTemp, 7);
153 if(u_memcmp(temp, nullTemp, 7)!=0 || temp[7]!=0)
154 log_err("an error occured in u_memcpy() or u_memcmp()\n");
155
156
157 log_verbose("Testing u_memmove()\n");
158 for (i = 0; i < 7; i++) {
159 temp[i] = (UChar)i;
160 }
161 u_memmove(temp + 1, temp, 7);
162 if(temp[0] != 0) {
163 log_err("an error occured in u_memmove()\n");
164 }
165 for (i = 1; i <= 7; i++) {
166 if(temp[i] != (i - 1)) {
167 log_err("an error occured in u_memmove()\n");
168 }
169 }
170
171 log_verbose("Testing u_strcpy() and u_strcmp()\n");
172
173 for(i=0;i<3;++i)
174 {
175 for(j=0;j<4;++j)
176 {
177 log_verbose("Testing %s\n", u_austrcpy(tempOut, dataTable[i][j]));
178 temp[0] = 0;
179 u_strcpy(temp,dataTable[i][j]);
180
181 if(u_strcmp(temp,dataTable[i][j])!=0)
182 log_err("something threw an error in u_strcpy() or u_strcmp()\n");
183 }
184 }
185 if(u_strcmp(dataTable[0][0], dataTable[1][1])==0)
186 log_err("an error occured in u_memcmp()\n");
187
188 log_verbose("testing u_strcat()\n");
189 i=0;
190 for(j=0; j<2;++j)
191 {
192 u_uastrcpy(temp, "");
193 u_strcpy(temp,dataTable[i][j]);
194 u_strcat(temp,dataTable[i+1][j]);
195 if(u_strcmp(temp,dataTable[i+2][j])!=0)
196 log_err("something threw an error in u_strcat()\n");
197
198 }
199 log_verbose("Testing u_strncmp()\n");
200 for(i=0,j=0;j<4; ++j)
201 {
202 k=u_strlen(dataTable[i][j]);
203 if(u_strncmp(dataTable[i][j],dataTable[i+2][j],k)!=0)
204 log_err("Something threw an error in u_strncmp\n");
205 }
206 if(u_strncmp(dataTable[0][0], dataTable[1][1], 7)==0)
207 log_err("an error occured in u_memcmp()\n");
208
209
210 log_verbose("Testing u_strncat\n");
211 for(i=0,j=0;j<4; ++j)
212 {
213 k=u_strlen(dataTable[i][j]);
214
215 u_uastrcpy(temp,"");
216
217 if(u_strcmp(u_strncat(temp,dataTable[i+2][j],k),dataTable[i][j])!=0)
218 log_err("something threw an error in u_strncat or u_uastrcpy()\n");
219
220 }
221
222 log_verbose("Testing u_strncpy() and u_uastrcpy()\n");
223 for(i=2,j=0;j<4; ++j)
224 {
225 k=u_strlen(dataTable[i][j]);
226 u_strncpy(temp, dataTable[i][j],k);
227 temp[k] = 0xa4;
228
229 if(u_strncmp(temp, dataTable[i][j],k)!=0)
230 log_err("something threw an error in u_strncpy()\n");
231
232 if(temp[k] != 0xa4)
233 log_err("something threw an error in u_strncpy()\n");
234
235 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
236 u_uastrncpy(temp, raw[i][j], k-1);
237 if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
238 log_err("something threw an error in u_uastrncpy(k-1)\n");
239
240 if(temp[k-1] != 0x3F)
241 log_err("something threw an error in u_uastrncpy(k-1)\n");
242
243 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
244 u_uastrncpy(temp, raw[i][j], k+1);
245 if(u_strcmp(temp, dataTable[i][j])!=0)
246 log_err("something threw an error in u_uastrncpy(k+1)\n");
247
248 if(temp[k] != 0)
249 log_err("something threw an error in u_uastrncpy(k+1)\n");
250
251 u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
252 u_uastrncpy(temp, raw[i][j], k);
253 if(u_strncmp(temp, dataTable[i][j], k)!=0)
254 log_err("something threw an error in u_uastrncpy(k)\n");
255
256 if(temp[k] != 0x3F)
257 log_err("something threw an error in u_uastrncpy(k)\n");
258 }
259
260 log_verbose("Testing u_strchr() and u_memchr()\n");
261
262 for(i=2,j=0;j<4;j++)
263 {
264 UChar saveVal = dataTable[i][j][0];
265 UChar *findPtr = u_strchr(dataTable[i][j], 0x005F);
266 int32_t dataSize = (int32_t)(u_strlen(dataTable[i][j]) + 1);
267
268 log_verbose("%s ", u_austrcpy(tempOut, findPtr));
269
270 if (findPtr == NULL || *findPtr != 0x005F) {
271 log_err("u_strchr can't find '_' in the string\n");
272 }
273
274 findPtr = u_strchr32(dataTable[i][j], 0x005F);
275 if (findPtr == NULL || *findPtr != 0x005F) {
276 log_err("u_strchr32 can't find '_' in the string\n");
277 }
278
279 findPtr = u_strchr(dataTable[i][j], 0);
280 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
281 log_err("u_strchr can't find NULL in the string\n");
282 }
283
284 findPtr = u_strchr32(dataTable[i][j], 0);
285 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
286 log_err("u_strchr32 can't find NULL in the string\n");
287 }
288
289 findPtr = u_memchr(dataTable[i][j], 0, dataSize);
290 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
291 log_err("u_memchr can't find NULL in the string\n");
292 }
293
294 findPtr = u_memchr32(dataTable[i][j], 0, dataSize);
295 if (findPtr != (&(dataTable[i][j][dataSize - 1]))) {
296 log_err("u_memchr32 can't find NULL in the string\n");
297 }
298
299 dataTable[i][j][0] = 0;
300 /* Make sure we skip over the NULL termination */
301 findPtr = u_memchr(dataTable[i][j], 0x005F, dataSize);
302 if (findPtr == NULL || *findPtr != 0x005F) {
303 log_err("u_memchr can't find '_' in the string\n");
304 }
305
306 findPtr = u_memchr32(dataTable[i][j], 0x005F, dataSize);
307 if (findPtr == NULL || *findPtr != 0x005F) {
308 log_err("u_memchr32 can't find '_' in the string\n");
309 }
310 findPtr = u_memchr32(dataTable[i][j], 0xFFFD, dataSize);
311 if (findPtr != NULL) {
312 log_err("Should have found NULL when the character is not there.\n");
313 }
314 dataTable[i][j][0] = saveVal; /* Put it back for the other tests */
315 }
316
317 /*
318 * test that u_strchr32()
319 * does not find surrogate code points when they are part of matched pairs
320 * (= part of supplementary code points)
321 * Jitterbug 1542
322 */
323 {
324 static const UChar s[]={
325 /* 0 1 2 3 4 5 6 7 8 9 */
326 0x0061, 0xd841, 0xdc02, 0xd841, 0x0062, 0xdc02, 0xd841, 0xdc02, 0x0063, 0
327 };
328
329 if(u_strchr32(s, 0xd841)!=(s+3) || u_strchr32(s, 0xdc02)!=(s+5)) {
330 log_err("error: u_strchr32(surrogate) finds a partial supplementary code point\n");
331 }
332 if(u_memchr32(s, 0xd841, 9)!=(s+3) || u_memchr32(s, 0xdc02, 9)!=(s+5)) {
333 log_err("error: u_memchr32(surrogate) finds a partial supplementary code point\n");
334 }
335 }
336
337 log_verbose("Testing u_austrcpy()");
338 u_austrcpy(test,dataTable[0][0]);
339 if(strcmp(test,raw[0][0])!=0)
340 log_err("There is an error in u_austrcpy()");
341
342
343 log_verbose("Testing u_strtok_r()");
344 {
345 const char tokString[] = " , 1 2 3 AHHHHH! 5.5 6 7 , 8\n";
346 const char *tokens[] = {",", "1", "2", "3", "AHHHHH!", "5.5", "6", "7", "8\n"};
347 UChar delimBuf[sizeof(test)];
348 UChar currTokenBuf[sizeof(tokString)];
349 UChar *state;
350 uint32_t currToken = 0;
351 UChar *ptr;
352
353 u_uastrcpy(temp, tokString);
354 u_uastrcpy(delimBuf, " ");
355
356 ptr = u_strtok_r(temp, delimBuf, &state);
357 u_uastrcpy(delimBuf, " ,");
358 while (ptr != NULL) {
359 u_uastrcpy(currTokenBuf, tokens[currToken]);
360 if (u_strcmp(ptr, currTokenBuf) != 0) {
361 log_err("u_strtok_r mismatch at %d. Got: %s, Expected: %s\n", currToken, ptr, tokens[currToken]);
362 }
363 ptr = u_strtok_r(NULL, delimBuf, &state);
364 currToken++;
365 }
366
367 if (currToken != UPRV_LENGTHOF(tokens)) {
368 log_err("Didn't get correct number of tokens\n");
369 }
370 state = delimBuf; /* Give it an "invalid" saveState */
371 u_uastrcpy(currTokenBuf, "");
372 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
373 log_err("Didn't get NULL for empty string\n");
374 }
375 if (state != NULL) {
376 log_err("State should be NULL for empty string\n");
377 }
378 state = delimBuf; /* Give it an "invalid" saveState */
379 u_uastrcpy(currTokenBuf, ", ,");
380 if (u_strtok_r(currTokenBuf, delimBuf, &state) != NULL) {
381 log_err("Didn't get NULL for a string of delimiters\n");
382 }
383 if (state != NULL) {
384 log_err("State should be NULL for a string of delimiters\n");
385 }
386
387 state = delimBuf; /* Give it an "invalid" saveState */
388 u_uastrcpy(currTokenBuf, "q, ,");
389 if (u_strtok_r(currTokenBuf, delimBuf, &state) == NULL) {
390 log_err("Got NULL for a string that does not begin with delimiters\n");
391 }
392 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
393 log_err("Didn't get NULL for a string that ends in delimiters\n");
394 }
395 if (state != NULL) {
396 log_err("State should be NULL for empty string\n");
397 }
398
399 state = delimBuf; /* Give it an "invalid" saveState */
400 u_uastrcpy(currTokenBuf, tokString);
401 u_uastrcpy(temp, tokString);
402 u_uastrcpy(delimBuf, "q"); /* Give it a delimiter that it can't find. */
403 ptr = u_strtok_r(currTokenBuf, delimBuf, &state);
404 if (ptr == NULL || u_strcmp(ptr, temp) != 0) {
405 log_err("Should have recieved the same string when there are no delimiters\n");
406 }
407 if (u_strtok_r(NULL, delimBuf, &state) != NULL) {
408 log_err("Should not have found another token in a one token string\n");
409 }
410 }
411
412 /* test u_strcmpCodePointOrder() */
413 {
414 /* these strings are in ascending order */
415 static const UChar strings[][4]={
416 { 0x61, 0 }, /* U+0061 */
417 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
418 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
419 { 0xd800, 0 }, /* U+d800 */
420 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
421 { 0xdfff, 0 }, /* U+dfff */
422 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
423 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
424 { 0xd800, 0xdc02, 0 }, /* U+10002 */
425 { 0xd84d, 0xdc56, 0 } /* U+23456 */
426 };
427
428 UCharIterator iter1, iter2;
429 int32_t len1, len2, r1, r2;
430
431 for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
432 if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
433 log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
434 }
435 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 10)>=0) {
436 log_err("error: u_strncmpCodePointOrder() fails for string %d and the following one\n", i);
437 }
438
439 /* There are at least 2 UChars in each string - verify that strncmp()==memcmp(). */
440 if(u_strncmpCodePointOrder(strings[i], strings[i+1], 2)!=u_memcmpCodePointOrder(strings[i], strings[i+1], 2)) {
441 log_err("error: u_strncmpCodePointOrder(2)!=u_memcmpCodePointOrder(2) for string %d and the following one\n", i);
442 }
443
444 /* test u_strCompare(TRUE) */
445 len1=u_strlen(strings[i]);
446 len2=u_strlen(strings[i+1]);
447 if( u_strCompare(strings[i], -1, strings[i+1], -1, TRUE)>=0 ||
448 u_strCompare(strings[i], -1, strings[i+1], len2, TRUE)>=0 ||
449 u_strCompare(strings[i], len1, strings[i+1], -1, TRUE)>=0 ||
450 u_strCompare(strings[i], len1, strings[i+1], len2, TRUE)>=0
451 ) {
452 log_err("error: u_strCompare(code point order) fails for string %d and the following one\n", i);
453 }
454
455 /* test u_strCompare(FALSE) */
456 r1=u_strCompare(strings[i], -1, strings[i+1], -1, FALSE);
457 r2=u_strcmp(strings[i], strings[i+1]);
458 if(_SIGN(r1)!=_SIGN(r2)) {
459 log_err("error: u_strCompare(code unit order)!=u_strcmp() for string %d and the following one\n", i);
460 }
461
462 /* test u_strCompareIter() */
463 uiter_setString(&iter1, strings[i], len1);
464 uiter_setString(&iter2, strings[i+1], len2);
465 if(u_strCompareIter(&iter1, &iter2, TRUE)>=0) {
466 log_err("error: u_strCompareIter(code point order) fails for string %d and the following one\n", i);
467 }
468 r1=u_strCompareIter(&iter1, &iter2, FALSE);
469 if(_SIGN(r1)!=_SIGN(u_strcmp(strings[i], strings[i+1]))) {
470 log_err("error: u_strCompareIter(code unit order)!=u_strcmp() for string %d and the following one\n", i);
471 }
472 }
473 }
474
475 cleanUpDataTable();
476 }
477
TestStringSearching()478 static void TestStringSearching()
479 {
480 const UChar testString[] = {0x0061, 0x0062, 0x0063, 0x0064, 0x0064, 0x0061, 0};
481 const UChar testSurrogateString[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0x0063, 0x0064, 0x0064, 0xdbff, 0xdfff, 0xdb00, 0xdf00, 0x0061, 0};
482 const UChar surrMatchSet1[] = {0xdbff, 0xdfff, 0};
483 const UChar surrMatchSet2[] = {0x0061, 0x0062, 0xdbff, 0xdfff, 0};
484 const UChar surrMatchSet3[] = {0xdb00, 0xdf00, 0xdbff, 0xdfff, 0};
485 const UChar surrMatchSet4[] = {0x0000};
486 const UChar surrMatchSetBad[] = {0xdbff, 0x0061, 0};
487 const UChar surrMatchSetBad2[] = {0x0061, 0xdbff, 0};
488 const UChar surrMatchSetBad3[] = {0xdbff, 0x0061, 0x0062, 0xdbff, 0xdfff, 0}; /* has partial surrogate */
489 const UChar
490 empty[] = { 0 },
491 a[] = { 0x61, 0 },
492 ab[] = { 0x61, 0x62, 0 },
493 ba[] = { 0x62, 0x61, 0 },
494 abcd[] = { 0x61, 0x62, 0x63, 0x64, 0 },
495 cd[] = { 0x63, 0x64, 0 },
496 dc[] = { 0x64, 0x63, 0 },
497 cdh[] = { 0x63, 0x64, 0x68, 0 },
498 f[] = { 0x66, 0 },
499 fg[] = { 0x66, 0x67, 0 },
500 gf[] = { 0x67, 0x66, 0 };
501
502 log_verbose("Testing u_strpbrk()");
503
504 if (u_strpbrk(testString, a) != &testString[0]) {
505 log_err("u_strpbrk couldn't find first letter a.\n");
506 }
507 if (u_strpbrk(testString, dc) != &testString[2]) {
508 log_err("u_strpbrk couldn't find d or c.\n");
509 }
510 if (u_strpbrk(testString, cd) != &testString[2]) {
511 log_err("u_strpbrk couldn't find c or d.\n");
512 }
513 if (u_strpbrk(testString, cdh) != &testString[2]) {
514 log_err("u_strpbrk couldn't find c, d or h.\n");
515 }
516 if (u_strpbrk(testString, f) != NULL) {
517 log_err("u_strpbrk didn't return NULL for \"f\".\n");
518 }
519 if (u_strpbrk(testString, fg) != NULL) {
520 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
521 }
522 if (u_strpbrk(testString, gf) != NULL) {
523 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
524 }
525 if (u_strpbrk(testString, empty) != NULL) {
526 log_err("u_strpbrk didn't return NULL for \"\".\n");
527 }
528
529 log_verbose("Testing u_strpbrk() with surrogates");
530
531 if (u_strpbrk(testSurrogateString, a) != &testSurrogateString[1]) {
532 log_err("u_strpbrk couldn't find first letter a.\n");
533 }
534 if (u_strpbrk(testSurrogateString, dc) != &testSurrogateString[5]) {
535 log_err("u_strpbrk couldn't find d or c.\n");
536 }
537 if (u_strpbrk(testSurrogateString, cd) != &testSurrogateString[5]) {
538 log_err("u_strpbrk couldn't find c or d.\n");
539 }
540 if (u_strpbrk(testSurrogateString, cdh) != &testSurrogateString[5]) {
541 log_err("u_strpbrk couldn't find c, d or h.\n");
542 }
543 if (u_strpbrk(testSurrogateString, f) != NULL) {
544 log_err("u_strpbrk didn't return NULL for \"f\".\n");
545 }
546 if (u_strpbrk(testSurrogateString, fg) != NULL) {
547 log_err("u_strpbrk didn't return NULL for \"fg\".\n");
548 }
549 if (u_strpbrk(testSurrogateString, gf) != NULL) {
550 log_err("u_strpbrk didn't return NULL for \"gf\".\n");
551 }
552 if (u_strpbrk(testSurrogateString, surrMatchSet1) != &testSurrogateString[3]) {
553 log_err("u_strpbrk couldn't find \"0xdbff, 0xdfff\".\n");
554 }
555 if (u_strpbrk(testSurrogateString, surrMatchSet2) != &testSurrogateString[1]) {
556 log_err("u_strpbrk couldn't find \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
557 }
558 if (u_strpbrk(testSurrogateString, surrMatchSet3) != &testSurrogateString[3]) {
559 log_err("u_strpbrk couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
560 }
561 if (u_strpbrk(testSurrogateString, surrMatchSet4) != NULL) {
562 log_err("u_strpbrk should have returned NULL for empty string.\n");
563 }
564 if (u_strpbrk(testSurrogateString, surrMatchSetBad) != &testSurrogateString[0]) {
565 log_err("u_strpbrk should have found bad surrogate.\n");
566 }
567
568 log_verbose("Testing u_strcspn()");
569
570 if (u_strcspn(testString, a) != 0) {
571 log_err("u_strcspn couldn't find first letter a.\n");
572 }
573 if (u_strcspn(testString, dc) != 2) {
574 log_err("u_strcspn couldn't find d or c.\n");
575 }
576 if (u_strcspn(testString, cd) != 2) {
577 log_err("u_strcspn couldn't find c or d.\n");
578 }
579 if (u_strcspn(testString, cdh) != 2) {
580 log_err("u_strcspn couldn't find c, d or h.\n");
581 }
582 if (u_strcspn(testString, f) != u_strlen(testString)) {
583 log_err("u_strcspn didn't return NULL for \"f\".\n");
584 }
585 if (u_strcspn(testString, fg) != u_strlen(testString)) {
586 log_err("u_strcspn didn't return NULL for \"fg\".\n");
587 }
588 if (u_strcspn(testString, gf) != u_strlen(testString)) {
589 log_err("u_strcspn didn't return NULL for \"gf\".\n");
590 }
591
592 log_verbose("Testing u_strcspn() with surrogates");
593
594 if (u_strcspn(testSurrogateString, a) != 1) {
595 log_err("u_strcspn couldn't find first letter a.\n");
596 }
597 if (u_strcspn(testSurrogateString, dc) != 5) {
598 log_err("u_strcspn couldn't find d or c.\n");
599 }
600 if (u_strcspn(testSurrogateString, cd) != 5) {
601 log_err("u_strcspn couldn't find c or d.\n");
602 }
603 if (u_strcspn(testSurrogateString, cdh) != 5) {
604 log_err("u_strcspn couldn't find c, d or h.\n");
605 }
606 if (u_strcspn(testSurrogateString, f) != u_strlen(testSurrogateString)) {
607 log_err("u_strcspn didn't return NULL for \"f\".\n");
608 }
609 if (u_strcspn(testSurrogateString, fg) != u_strlen(testSurrogateString)) {
610 log_err("u_strcspn didn't return NULL for \"fg\".\n");
611 }
612 if (u_strcspn(testSurrogateString, gf) != u_strlen(testSurrogateString)) {
613 log_err("u_strcspn didn't return NULL for \"gf\".\n");
614 }
615 if (u_strcspn(testSurrogateString, surrMatchSet1) != 3) {
616 log_err("u_strcspn couldn't find \"0xdbff, 0xdfff\".\n");
617 }
618 if (u_strcspn(testSurrogateString, surrMatchSet2) != 1) {
619 log_err("u_strcspn couldn't find \"a, b, 0xdbff, 0xdfff\".\n");
620 }
621 if (u_strcspn(testSurrogateString, surrMatchSet3) != 3) {
622 log_err("u_strcspn couldn't find \"0xdb00, 0xdf00, 0xdbff, 0xdfff\".\n");
623 }
624 if (u_strcspn(testSurrogateString, surrMatchSet4) != u_strlen(testSurrogateString)) {
625 log_err("u_strcspn should have returned strlen for empty string.\n");
626 }
627
628
629 log_verbose("Testing u_strspn()");
630
631 if (u_strspn(testString, a) != 1) {
632 log_err("u_strspn couldn't skip first letter a.\n");
633 }
634 if (u_strspn(testString, ab) != 2) {
635 log_err("u_strspn couldn't skip a or b.\n");
636 }
637 if (u_strspn(testString, ba) != 2) {
638 log_err("u_strspn couldn't skip a or b.\n");
639 }
640 if (u_strspn(testString, f) != 0) {
641 log_err("u_strspn didn't return 0 for \"f\".\n");
642 }
643 if (u_strspn(testString, dc) != 0) {
644 log_err("u_strspn couldn't find first letter a (skip d or c).\n");
645 }
646 if (u_strspn(testString, abcd) != u_strlen(testString)) {
647 log_err("u_strspn couldn't skip over the whole string.\n");
648 }
649 if (u_strspn(testString, empty) != 0) {
650 log_err("u_strspn should have returned 0 for empty string.\n");
651 }
652
653 log_verbose("Testing u_strspn() with surrogates");
654 if (u_strspn(testSurrogateString, surrMatchSetBad) != 2) {
655 log_err("u_strspn couldn't skip 0xdbff or a.\n");
656 }
657 if (u_strspn(testSurrogateString, surrMatchSetBad2) != 2) {
658 log_err("u_strspn couldn't skip 0xdbff or a.\n");
659 }
660 if (u_strspn(testSurrogateString, f) != 0) {
661 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
662 }
663 if (u_strspn(testSurrogateString, dc) != 0) {
664 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
665 }
666 if (u_strspn(testSurrogateString, cd) != 0) {
667 log_err("u_strspn couldn't skip d or c (skip first letter).\n");
668 }
669 if (u_strspn(testSurrogateString, testSurrogateString) != u_strlen(testSurrogateString)) {
670 log_err("u_strspn couldn't skip whole string.\n");
671 }
672 if (u_strspn(testSurrogateString, surrMatchSet1) != 0) {
673 log_err("u_strspn couldn't skip \"0xdbff, 0xdfff\" (get first letter).\n");
674 }
675 if (u_strspn(testSurrogateString, surrMatchSetBad3) != 5) {
676 log_err("u_strspn couldn't skip \"0xdbff, a, b, 0xdbff, 0xdfff\".\n");
677 }
678 if (u_strspn(testSurrogateString, surrMatchSet4) != 0) {
679 log_err("u_strspn should have returned 0 for empty string.\n");
680 }
681 }
682
683 /*
684 * All binary Unicode string searches should behave the same for equivalent input.
685 * See Jitterbug 2145.
686 * There are some new functions, too - just test them all.
687 */
688 static void
TestSurrogateSearching()689 TestSurrogateSearching() {
690 static const UChar s[]={
691 /* 0 1 2 3 4 5 6 7 8 9 10 11 */
692 0x61, 0xd801, 0xdc02, 0x61, 0xdc02, 0x61, 0xd801, 0x61, 0xd801, 0xdc02, 0x61, 0
693 }, sub_a[]={
694 0x61, 0
695 }, sub_b[]={
696 0x62, 0
697 }, sub_lead[]={
698 0xd801, 0
699 }, sub_trail[]={
700 0xdc02, 0
701 }, sub_supp[]={
702 0xd801, 0xdc02, 0
703 }, sub_supp2[]={
704 0xd801, 0xdc03, 0
705 }, sub_a_lead[]={
706 0x61, 0xd801, 0
707 }, sub_trail_a[]={
708 0xdc02, 0x61, 0
709 }, sub_aba[]={
710 0x61, 0x62, 0x61, 0
711 };
712 static const UChar a=0x61, b=0x62, lead=0xd801, trail=0xdc02, nul=0;
713 static const UChar32 supp=0x10402, supp2=0x10403, ill=0x123456;
714
715 const UChar *first, *last;
716
717 /* search for NUL code point: find end of string */
718 first=s+u_strlen(s);
719
720 if(
721 first!=u_strchr(s, nul) ||
722 first!=u_strchr32(s, nul) ||
723 first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
724 first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
725 first!=u_strrchr(s, nul) ||
726 first!=u_strrchr32(s, nul) ||
727 first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
728 first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
729 ) {
730 log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
731 }
732
733 /* search for empty substring: find beginning of string */
734 if(
735 s!=u_strstr(s, &nul) ||
736 s!=u_strFindFirst(s, -1, &nul, -1) ||
737 s!=u_strFindFirst(s, -1, &nul, 0) ||
738 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
739 s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
740 s!=u_strrstr(s, &nul) ||
741 s!=u_strFindLast(s, -1, &nul, -1) ||
742 s!=u_strFindLast(s, -1, &nul, 0) ||
743 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
744 s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
745 ) {
746 log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
747 }
748
749 /* find 'a' in s[1..10[ */
750 first=s+3;
751 last=s+7;
752 if(
753 first!=u_strchr(s+1, a) ||
754 first!=u_strchr32(s+1, a) ||
755 first!=u_memchr(s+1, a, 9) ||
756 first!=u_memchr32(s+1, a, 9) ||
757 first!=u_strstr(s+1, sub_a) ||
758 first!=u_strFindFirst(s+1, -1, sub_a, -1) ||
759 first!=u_strFindFirst(s+1, -1, &a, 1) ||
760 first!=u_strFindFirst(s+1, 9, sub_a, -1) ||
761 first!=u_strFindFirst(s+1, 9, &a, 1) ||
762 (s+10)!=u_strrchr(s+1, a) ||
763 (s+10)!=u_strrchr32(s+1, a) ||
764 last!=u_memrchr(s+1, a, 9) ||
765 last!=u_memrchr32(s+1, a, 9) ||
766 (s+10)!=u_strrstr(s+1, sub_a) ||
767 (s+10)!=u_strFindLast(s+1, -1, sub_a, -1) ||
768 (s+10)!=u_strFindLast(s+1, -1, &a, 1) ||
769 last!=u_strFindLast(s+1, 9, sub_a, -1) ||
770 last!=u_strFindLast(s+1, 9, &a, 1)
771 ) {
772 log_err("error: one of the u_str[chr etc]('a') does not find the correct place\n");
773 }
774
775 /* do not find 'b' in s[1..10[ */
776 if(
777 NULL!=u_strchr(s+1, b) ||
778 NULL!=u_strchr32(s+1, b) ||
779 NULL!=u_memchr(s+1, b, 9) ||
780 NULL!=u_memchr32(s+1, b, 9) ||
781 NULL!=u_strstr(s+1, sub_b) ||
782 NULL!=u_strFindFirst(s+1, -1, sub_b, -1) ||
783 NULL!=u_strFindFirst(s+1, -1, &b, 1) ||
784 NULL!=u_strFindFirst(s+1, 9, sub_b, -1) ||
785 NULL!=u_strFindFirst(s+1, 9, &b, 1) ||
786 NULL!=u_strrchr(s+1, b) ||
787 NULL!=u_strrchr32(s+1, b) ||
788 NULL!=u_memrchr(s+1, b, 9) ||
789 NULL!=u_memrchr32(s+1, b, 9) ||
790 NULL!=u_strrstr(s+1, sub_b) ||
791 NULL!=u_strFindLast(s+1, -1, sub_b, -1) ||
792 NULL!=u_strFindLast(s+1, -1, &b, 1) ||
793 NULL!=u_strFindLast(s+1, 9, sub_b, -1) ||
794 NULL!=u_strFindLast(s+1, 9, &b, 1)
795 ) {
796 log_err("error: one of the u_str[chr etc]('b') incorrectly finds something\n");
797 }
798
799 /* do not find a non-code point in s[1..10[ */
800 if(
801 NULL!=u_strchr32(s+1, ill) ||
802 NULL!=u_memchr32(s+1, ill, 9) ||
803 NULL!=u_strrchr32(s+1, ill) ||
804 NULL!=u_memrchr32(s+1, ill, 9)
805 ) {
806 log_err("error: one of the u_str[chr etc](illegal code point) incorrectly finds something\n");
807 }
808
809 /* find U+d801 in s[1..10[ */
810 first=s+6;
811 if(
812 first!=u_strchr(s+1, lead) ||
813 first!=u_strchr32(s+1, lead) ||
814 first!=u_memchr(s+1, lead, 9) ||
815 first!=u_memchr32(s+1, lead, 9) ||
816 first!=u_strstr(s+1, sub_lead) ||
817 first!=u_strFindFirst(s+1, -1, sub_lead, -1) ||
818 first!=u_strFindFirst(s+1, -1, &lead, 1) ||
819 first!=u_strFindFirst(s+1, 9, sub_lead, -1) ||
820 first!=u_strFindFirst(s+1, 9, &lead, 1) ||
821 first!=u_strrchr(s+1, lead) ||
822 first!=u_strrchr32(s+1, lead) ||
823 first!=u_memrchr(s+1, lead, 9) ||
824 first!=u_memrchr32(s+1, lead, 9) ||
825 first!=u_strrstr(s+1, sub_lead) ||
826 first!=u_strFindLast(s+1, -1, sub_lead, -1) ||
827 first!=u_strFindLast(s+1, -1, &lead, 1) ||
828 first!=u_strFindLast(s+1, 9, sub_lead, -1) ||
829 first!=u_strFindLast(s+1, 9, &lead, 1)
830 ) {
831 log_err("error: one of the u_str[chr etc](U+d801) does not find the correct place\n");
832 }
833
834 /* find U+dc02 in s[1..10[ */
835 first=s+4;
836 if(
837 first!=u_strchr(s+1, trail) ||
838 first!=u_strchr32(s+1, trail) ||
839 first!=u_memchr(s+1, trail, 9) ||
840 first!=u_memchr32(s+1, trail, 9) ||
841 first!=u_strstr(s+1, sub_trail) ||
842 first!=u_strFindFirst(s+1, -1, sub_trail, -1) ||
843 first!=u_strFindFirst(s+1, -1, &trail, 1) ||
844 first!=u_strFindFirst(s+1, 9, sub_trail, -1) ||
845 first!=u_strFindFirst(s+1, 9, &trail, 1) ||
846 first!=u_strrchr(s+1, trail) ||
847 first!=u_strrchr32(s+1, trail) ||
848 first!=u_memrchr(s+1, trail, 9) ||
849 first!=u_memrchr32(s+1, trail, 9) ||
850 first!=u_strrstr(s+1, sub_trail) ||
851 first!=u_strFindLast(s+1, -1, sub_trail, -1) ||
852 first!=u_strFindLast(s+1, -1, &trail, 1) ||
853 first!=u_strFindLast(s+1, 9, sub_trail, -1) ||
854 first!=u_strFindLast(s+1, 9, &trail, 1)
855 ) {
856 log_err("error: one of the u_str[chr etc](U+dc02) does not find the correct place\n");
857 }
858
859 /* find U+10402 in s[1..10[ */
860 first=s+1;
861 last=s+8;
862 if(
863 first!=u_strchr32(s+1, supp) ||
864 first!=u_memchr32(s+1, supp, 9) ||
865 first!=u_strstr(s+1, sub_supp) ||
866 first!=u_strFindFirst(s+1, -1, sub_supp, -1) ||
867 first!=u_strFindFirst(s+1, -1, sub_supp, 2) ||
868 first!=u_strFindFirst(s+1, 9, sub_supp, -1) ||
869 first!=u_strFindFirst(s+1, 9, sub_supp, 2) ||
870 last!=u_strrchr32(s+1, supp) ||
871 last!=u_memrchr32(s+1, supp, 9) ||
872 last!=u_strrstr(s+1, sub_supp) ||
873 last!=u_strFindLast(s+1, -1, sub_supp, -1) ||
874 last!=u_strFindLast(s+1, -1, sub_supp, 2) ||
875 last!=u_strFindLast(s+1, 9, sub_supp, -1) ||
876 last!=u_strFindLast(s+1, 9, sub_supp, 2)
877 ) {
878 log_err("error: one of the u_str[chr etc](U+10402) does not find the correct place\n");
879 }
880
881 /* do not find U+10402 in a single UChar */
882 if(
883 NULL!=u_memchr32(s+1, supp, 1) ||
884 NULL!=u_strFindFirst(s+1, 1, sub_supp, -1) ||
885 NULL!=u_strFindFirst(s+1, 1, sub_supp, 2) ||
886 NULL!=u_memrchr32(s+1, supp, 1) ||
887 NULL!=u_strFindLast(s+1, 1, sub_supp, -1) ||
888 NULL!=u_strFindLast(s+1, 1, sub_supp, 2) ||
889 NULL!=u_memrchr32(s+2, supp, 1) ||
890 NULL!=u_strFindLast(s+2, 1, sub_supp, -1) ||
891 NULL!=u_strFindLast(s+2, 1, sub_supp, 2)
892 ) {
893 log_err("error: one of the u_str[chr etc](U+10402) incorrectly finds a supplementary c.p. in a single UChar\n");
894 }
895
896 /* do not find U+10403 in s[1..10[ */
897 if(
898 NULL!=u_strchr32(s+1, supp2) ||
899 NULL!=u_memchr32(s+1, supp2, 9) ||
900 NULL!=u_strstr(s+1, sub_supp2) ||
901 NULL!=u_strFindFirst(s+1, -1, sub_supp2, -1) ||
902 NULL!=u_strFindFirst(s+1, -1, sub_supp2, 2) ||
903 NULL!=u_strFindFirst(s+1, 9, sub_supp2, -1) ||
904 NULL!=u_strFindFirst(s+1, 9, sub_supp2, 2) ||
905 NULL!=u_strrchr32(s+1, supp2) ||
906 NULL!=u_memrchr32(s+1, supp2, 9) ||
907 NULL!=u_strrstr(s+1, sub_supp2) ||
908 NULL!=u_strFindLast(s+1, -1, sub_supp2, -1) ||
909 NULL!=u_strFindLast(s+1, -1, sub_supp2, 2) ||
910 NULL!=u_strFindLast(s+1, 9, sub_supp2, -1) ||
911 NULL!=u_strFindLast(s+1, 9, sub_supp2, 2)
912 ) {
913 log_err("error: one of the u_str[chr etc](U+10403) incorrectly finds something\n");
914 }
915
916 /* find <0061 d801> in s[1..10[ */
917 first=s+5;
918 if(
919 first!=u_strstr(s+1, sub_a_lead) ||
920 first!=u_strFindFirst(s+1, -1, sub_a_lead, -1) ||
921 first!=u_strFindFirst(s+1, -1, sub_a_lead, 2) ||
922 first!=u_strFindFirst(s+1, 9, sub_a_lead, -1) ||
923 first!=u_strFindFirst(s+1, 9, sub_a_lead, 2) ||
924 first!=u_strrstr(s+1, sub_a_lead) ||
925 first!=u_strFindLast(s+1, -1, sub_a_lead, -1) ||
926 first!=u_strFindLast(s+1, -1, sub_a_lead, 2) ||
927 first!=u_strFindLast(s+1, 9, sub_a_lead, -1) ||
928 first!=u_strFindLast(s+1, 9, sub_a_lead, 2)
929 ) {
930 log_err("error: one of the u_str[str etc](<0061 d801>) does not find the correct place\n");
931 }
932
933 /* find <dc02 0061> in s[1..10[ */
934 first=s+4;
935 if(
936 first!=u_strstr(s+1, sub_trail_a) ||
937 first!=u_strFindFirst(s+1, -1, sub_trail_a, -1) ||
938 first!=u_strFindFirst(s+1, -1, sub_trail_a, 2) ||
939 first!=u_strFindFirst(s+1, 9, sub_trail_a, -1) ||
940 first!=u_strFindFirst(s+1, 9, sub_trail_a, 2) ||
941 first!=u_strrstr(s+1, sub_trail_a) ||
942 first!=u_strFindLast(s+1, -1, sub_trail_a, -1) ||
943 first!=u_strFindLast(s+1, -1, sub_trail_a, 2) ||
944 first!=u_strFindLast(s+1, 9, sub_trail_a, -1) ||
945 first!=u_strFindLast(s+1, 9, sub_trail_a, 2)
946 ) {
947 log_err("error: one of the u_str[str etc](<dc02 0061>) does not find the correct place\n");
948 }
949
950 /* do not find "aba" in s[1..10[ */
951 if(
952 NULL!=u_strstr(s+1, sub_aba) ||
953 NULL!=u_strFindFirst(s+1, -1, sub_aba, -1) ||
954 NULL!=u_strFindFirst(s+1, -1, sub_aba, 3) ||
955 NULL!=u_strFindFirst(s+1, 9, sub_aba, -1) ||
956 NULL!=u_strFindFirst(s+1, 9, sub_aba, 3) ||
957 NULL!=u_strrstr(s+1, sub_aba) ||
958 NULL!=u_strFindLast(s+1, -1, sub_aba, -1) ||
959 NULL!=u_strFindLast(s+1, -1, sub_aba, 3) ||
960 NULL!=u_strFindLast(s+1, 9, sub_aba, -1) ||
961 NULL!=u_strFindLast(s+1, 9, sub_aba, 3)
962 ) {
963 log_err("error: one of the u_str[str etc](\"aba\") incorrectly finds something\n");
964 }
965 }
966
TestStringCopy()967 static void TestStringCopy()
968 {
969 UChar temp[40];
970 UChar *result=0;
971 UChar subString[5];
972 UChar uchars[]={0x61, 0x62, 0x63, 0x00};
973 char charOut[40];
974 char chars[]="abc"; /* needs default codepage */
975
976 log_verbose("Testing u_uastrncpy() and u_uastrcpy()");
977
978 u_uastrcpy(temp, "abc");
979 if(u_strcmp(temp, uchars) != 0) {
980 log_err("There is an error in u_uastrcpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
981 }
982
983 temp[0] = 0xFB; /* load garbage into it */
984 temp[1] = 0xFB;
985 temp[2] = 0xFB;
986 temp[3] = 0xFB;
987
988 u_uastrncpy(temp, "abcabcabc", 3);
989 if(u_strncmp(uchars, temp, 3) != 0){
990 log_err("There is an error in u_uastrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
991 }
992 if(temp[3] != 0xFB) {
993 log_err("u_uastrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
994 }
995
996 charOut[0] = (char)0x7B; /* load garbage into it */
997 charOut[1] = (char)0x7B;
998 charOut[2] = (char)0x7B;
999 charOut[3] = (char)0x7B;
1000
1001 temp[0] = 0x0061;
1002 temp[1] = 0x0062;
1003 temp[2] = 0x0063;
1004 temp[3] = 0x0061;
1005 temp[4] = 0x0062;
1006 temp[5] = 0x0063;
1007 temp[6] = 0x0000;
1008
1009 u_austrncpy(charOut, temp, 3);
1010 if(strncmp(chars, charOut, 3) != 0){
1011 log_err("There is an error in u_austrncpy() Expected %s Got %s\n", austrdup(uchars), austrdup(temp));
1012 }
1013 if(charOut[3] != (char)0x7B) {
1014 log_err("u_austrncpy wrote past it's bounds. Expected undisturbed byte at 3\n");
1015 }
1016
1017 /*Testing u_strchr()*/
1018 log_verbose("Testing u_strchr\n");
1019 temp[0]=0x42;
1020 temp[1]=0x62;
1021 temp[2]=0x62;
1022 temp[3]=0x63;
1023 temp[4]=0xd841;
1024 temp[5]=0xd841;
1025 temp[6]=0xdc02;
1026 temp[7]=0;
1027 result=u_strchr(temp, (UChar)0x62);
1028 if(result != temp+1){
1029 log_err("There is an error in u_strchr() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1030 }
1031 /*Testing u_strstr()*/
1032 log_verbose("Testing u_strstr\n");
1033 subString[0]=0x62;
1034 subString[1]=0x63;
1035 subString[2]=0;
1036 result=u_strstr(temp, subString);
1037 if(result != temp+2){
1038 log_err("There is an error in u_strstr() Expected match at position 2 Got %ld (pointer 0x%lx)\n", result-temp, result);
1039 }
1040 result=u_strstr(temp, subString+2); /* subString+2 is an empty string */
1041 if(result != temp){
1042 log_err("There is an error in u_strstr() Expected match at position 0 Got %ld (pointer 0x%lx)\n", result-temp, result);
1043 }
1044 result=u_strstr(subString, temp);
1045 if(result != NULL){
1046 log_err("There is an error in u_strstr() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1047 }
1048
1049 /*Testing u_strchr32*/
1050 log_verbose("Testing u_strchr32\n");
1051 result=u_strchr32(temp, (UChar32)0x62);
1052 if(result != temp+1){
1053 log_err("There is an error in u_strchr32() Expected match at position 1 Got %ld (pointer 0x%lx)\n", result-temp, result);
1054 }
1055 result=u_strchr32(temp, (UChar32)0xfb);
1056 if(result != NULL){
1057 log_err("There is an error in u_strchr32() Expected NULL \"not found\" Got non-NULL \"found\" result\n");
1058 }
1059 result=u_strchr32(temp, (UChar32)0x20402);
1060 if(result != temp+5){
1061 log_err("There is an error in u_strchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1062 }
1063
1064 temp[7]=0xfc00;
1065 result=u_memchr32(temp, (UChar32)0x20402, 7);
1066 if(result != temp+5){
1067 log_err("There is an error in u_memchr32() Expected match at position 5 Got %ld (pointer 0x%lx)\n", result-temp, result);
1068 }
1069 result=u_memchr32(temp, (UChar32)0x20402, 6);
1070 if(result != NULL){
1071 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1072 }
1073 result=u_memchr32(temp, (UChar32)0x20402, 1);
1074 if(result != NULL){
1075 log_err("There is an error in u_memchr32() Expected no match Got %ld (pointer 0x%lx)\n", result-temp, result);
1076 }
1077 result=u_memchr32(temp, (UChar32)0xfc00, 8);
1078 if(result != temp+7){
1079 log_err("There is an error in u_memchr32() Expected match at position 7 Got %ld (pointer 0x%lx)\n", result-temp, result);
1080 }
1081 }
1082
1083 /* test u_unescape() and u_unescapeAt() ------------------------------------- */
1084
1085 static void
TestUnescape()1086 TestUnescape() {
1087 static UChar buffer[200];
1088
1089 static const char* input =
1090 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\e\\cC\\n \\x1b\\x{263a}";
1091
1092 static const UChar expect[]={
1093 0x53, 0x63, 0x68, 0xf6, 0x6e, 0x65, 0x73, 0x20, 0x41, 0x75, 0x74, 0x6f, 0x3a, 0x20,
1094 0x20ac, 0x20, 0x31, 0x31, 0x32, 0x34, 0x30, 0x2e, 0x0c,
1095 0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
1096 0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
1097 };
1098 static const int32_t explength = UPRV_LENGTHOF(expect)-1;
1099 int32_t length;
1100
1101 /* test u_unescape() */
1102 length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
1103 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1104 log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
1105 explength);
1106 }
1107
1108 /* try preflighting */
1109 length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
1110 if(length!=explength || u_strcmp(buffer, expect)!=0) {
1111 log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
1112 }
1113
1114 /* ### TODO: test u_unescapeAt() */
1115 }
1116
1117 /* test code point counting functions --------------------------------------- */
1118
1119 /* reference implementation of u_strHasMoreChar32Than() */
1120 static int32_t
_refStrHasMoreChar32Than(const UChar * s,int32_t length,int32_t number)1121 _refStrHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1122 int32_t count=u_countChar32(s, length);
1123 return count>number;
1124 }
1125
1126 /* compare the real function against the reference */
1127 static void
_testStrHasMoreChar32Than(const UChar * s,int32_t i,int32_t length,int32_t number)1128 _testStrHasMoreChar32Than(const UChar *s, int32_t i, int32_t length, int32_t number) {
1129 if(u_strHasMoreChar32Than(s, length, number)!=_refStrHasMoreChar32Than(s, length, number)) {
1130 log_err("u_strHasMoreChar32Than(s+%d, %d, %d)=%hd is wrong\n",
1131 i, length, number, u_strHasMoreChar32Than(s, length, number));
1132 }
1133 }
1134
1135 static void
TestCountChar32()1136 TestCountChar32() {
1137 static const UChar string[]={
1138 0x61, 0x62, 0xd800, 0xdc00,
1139 0xd801, 0xdc01, 0x63, 0xd802,
1140 0x64, 0xdc03, 0x65, 0x66,
1141 0xd804, 0xdc04, 0xd805, 0xdc05,
1142 0x67
1143 };
1144 UChar buffer[100];
1145 int32_t i, length, number;
1146
1147 /* test u_strHasMoreChar32Than() with length>=0 */
1148 length=UPRV_LENGTHOF(string);
1149 while(length>=0) {
1150 for(i=0; i<=length; ++i) {
1151 for(number=-1; number<=((length-i)+2); ++number) {
1152 _testStrHasMoreChar32Than(string+i, i, length-i, number);
1153 }
1154 }
1155 --length;
1156 }
1157
1158 /* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
1159 length=UPRV_LENGTHOF(string);
1160 u_memcpy(buffer, string, length);
1161 while(length>=0) {
1162 buffer[length]=0;
1163 for(i=0; i<=length; ++i) {
1164 for(number=-1; number<=((length-i)+2); ++number) {
1165 _testStrHasMoreChar32Than(buffer+i, i, -1, number);
1166 }
1167 }
1168 --length;
1169 }
1170
1171 /* test u_strHasMoreChar32Than() with NULL string (bad input) */
1172 for(length=-1; length<=1; ++length) {
1173 for(i=0; i<=length; ++i) {
1174 for(number=-2; number<=2; ++number) {
1175 _testStrHasMoreChar32Than(NULL, 0, length, number);
1176 }
1177 }
1178 }
1179 }
1180
1181 /* UCharIterator ------------------------------------------------------------ */
1182
1183 /*
1184 * Compare results from two iterators, should be same.
1185 * Assume that the text is not empty and that
1186 * iteration start==0 and iteration limit==length.
1187 */
1188 static void
compareIterators(UCharIterator * iter1,const char * n1,UCharIterator * iter2,const char * n2)1189 compareIterators(UCharIterator *iter1, const char *n1,
1190 UCharIterator *iter2, const char *n2) {
1191 int32_t i, pos1, pos2, middle, length;
1192 UChar32 c1, c2;
1193
1194 /* compare lengths */
1195 length=iter1->getIndex(iter1, UITER_LENGTH);
1196 pos2=iter2->getIndex(iter2, UITER_LENGTH);
1197 if(length!=pos2) {
1198 log_err("%s->getIndex(length)=%d != %d=%s->getIndex(length)\n", n1, length, pos2, n2);
1199 return;
1200 }
1201
1202 /* set into the middle */
1203 middle=length/2;
1204
1205 pos1=iter1->move(iter1, middle, UITER_ZERO);
1206 if(pos1!=middle) {
1207 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1208 return;
1209 }
1210
1211 pos2=iter2->move(iter2, middle, UITER_ZERO);
1212 if(pos2!=middle) {
1213 log_err("%s->move(from 0 to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1214 return;
1215 }
1216
1217 /* test current() */
1218 c1=iter1->current(iter1);
1219 c2=iter2->current(iter2);
1220 if(c1!=c2) {
1221 log_err("%s->current()=U+%04x != U+%04x=%s->current() at middle=%d\n", n1, c1, c2, n2, middle);
1222 return;
1223 }
1224
1225 /* move forward 3 UChars */
1226 for(i=0; i<3; ++i) {
1227 c1=iter1->next(iter1);
1228 c2=iter2->next(iter2);
1229 if(c1!=c2) {
1230 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1231 return;
1232 }
1233 }
1234
1235 /* move backward 5 UChars */
1236 for(i=0; i<5; ++i) {
1237 c1=iter1->previous(iter1);
1238 c2=iter2->previous(iter2);
1239 if(c1!=c2) {
1240 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d (started in middle)\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1241 return;
1242 }
1243 }
1244
1245 /* iterate forward from the beginning */
1246 pos1=iter1->move(iter1, 0, UITER_START);
1247 if(pos1<0) {
1248 log_err("%s->move(start) failed\n", n1);
1249 return;
1250 }
1251 if(!iter1->hasNext(iter1)) {
1252 log_err("%s->hasNext() at the start returns FALSE\n", n1);
1253 return;
1254 }
1255
1256 pos2=iter2->move(iter2, 0, UITER_START);
1257 if(pos2<0) {
1258 log_err("%s->move(start) failed\n", n2);
1259 return;
1260 }
1261 if(!iter2->hasNext(iter2)) {
1262 log_err("%s->hasNext() at the start returns FALSE\n", n2);
1263 return;
1264 }
1265
1266 do {
1267 c1=iter1->next(iter1);
1268 c2=iter2->next(iter2);
1269 if(c1!=c2) {
1270 log_err("%s->next()=U+%04x != U+%04x=%s->next() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1271 return;
1272 }
1273 } while(c1>=0);
1274
1275 if(iter1->hasNext(iter1)) {
1276 log_err("%s->hasNext() at the end returns TRUE\n", n1);
1277 return;
1278 }
1279 if(iter2->hasNext(iter2)) {
1280 log_err("%s->hasNext() at the end returns TRUE\n", n2);
1281 return;
1282 }
1283
1284 /* back to the middle */
1285 pos1=iter1->move(iter1, middle, UITER_ZERO);
1286 if(pos1!=middle) {
1287 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n1, middle, pos1);
1288 return;
1289 }
1290
1291 pos2=iter2->move(iter2, middle, UITER_ZERO);
1292 if(pos2!=middle) {
1293 log_err("%s->move(from end to middle %d)=%d does not move to the middle\n", n2, middle, pos2);
1294 return;
1295 }
1296
1297 /* move to index 1 */
1298 pos1=iter1->move(iter1, 1, UITER_ZERO);
1299 if(pos1!=1) {
1300 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n1, middle, pos1);
1301 return;
1302 }
1303
1304 pos2=iter2->move(iter2, 1, UITER_ZERO);
1305 if(pos2!=1) {
1306 log_err("%s->move(from middle %d to 1)=%d does not move to 1\n", n2, middle, pos2);
1307 return;
1308 }
1309
1310 /* iterate backward from the end */
1311 pos1=iter1->move(iter1, 0, UITER_LIMIT);
1312 if(pos1<0) {
1313 log_err("%s->move(limit) failed\n", n1);
1314 return;
1315 }
1316 if(!iter1->hasPrevious(iter1)) {
1317 log_err("%s->hasPrevious() at the end returns FALSE\n", n1);
1318 return;
1319 }
1320
1321 pos2=iter2->move(iter2, 0, UITER_LIMIT);
1322 if(pos2<0) {
1323 log_err("%s->move(limit) failed\n", n2);
1324 return;
1325 }
1326 if(!iter2->hasPrevious(iter2)) {
1327 log_err("%s->hasPrevious() at the end returns FALSE\n", n2);
1328 return;
1329 }
1330
1331 do {
1332 c1=iter1->previous(iter1);
1333 c2=iter2->previous(iter2);
1334 if(c1!=c2) {
1335 log_err("%s->previous()=U+%04x != U+%04x=%s->previous() at %d\n", n1, c1, c2, n2, iter1->getIndex(iter1, UITER_CURRENT));
1336 return;
1337 }
1338 } while(c1>=0);
1339
1340 if(iter1->hasPrevious(iter1)) {
1341 log_err("%s->hasPrevious() at the start returns TRUE\n", n1);
1342 return;
1343 }
1344 if(iter2->hasPrevious(iter2)) {
1345 log_err("%s->hasPrevious() at the start returns TRUE\n", n2);
1346 return;
1347 }
1348 }
1349
1350 /*
1351 * Test the iterator's getState() and setState() functions.
1352 * iter1 and iter2 must be set up for the same iterator type and the same string
1353 * but may be physically different structs (different addresses).
1354 *
1355 * Assume that the text is not empty and that
1356 * iteration start==0 and iteration limit==length.
1357 * It must be 2<=middle<=length-2.
1358 */
1359 static void
testIteratorState(UCharIterator * iter1,UCharIterator * iter2,const char * n,int32_t middle)1360 testIteratorState(UCharIterator *iter1, UCharIterator *iter2, const char *n, int32_t middle) {
1361 UChar32 u[4];
1362
1363 UErrorCode errorCode;
1364 UChar32 c;
1365 uint32_t state;
1366 int32_t i, j;
1367
1368 /* get four UChars from the middle of the string */
1369 iter1->move(iter1, middle-2, UITER_ZERO);
1370 for(i=0; i<4; ++i) {
1371 c=iter1->next(iter1);
1372 if(c<0) {
1373 /* the test violates the assumptions, see comment above */
1374 log_err("test error: %s[%d]=%d\n", n, middle-2+i, c);
1375 return;
1376 }
1377 u[i]=c;
1378 }
1379
1380 /* move to the middle and get the state */
1381 iter1->move(iter1, -2, UITER_CURRENT);
1382 state=uiter_getState(iter1);
1383
1384 /* set the state into the second iterator and compare the results */
1385 errorCode=U_ZERO_ERROR;
1386 uiter_setState(iter2, state, &errorCode);
1387 if(U_FAILURE(errorCode)) {
1388 log_err("%s->setState(0x%x) failed: %s\n", n, state, u_errorName(errorCode));
1389 return;
1390 }
1391
1392 c=iter2->current(iter2);
1393 if(c!=u[2]) {
1394 log_err("%s->current(at %d)=U+%04x!=U+%04x\n", n, middle, c, u[2]);
1395 }
1396
1397 c=iter2->previous(iter2);
1398 if(c!=u[1]) {
1399 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-1, c, u[1]);
1400 }
1401
1402 iter2->move(iter2, 2, UITER_CURRENT);
1403 c=iter2->next(iter2);
1404 if(c!=u[3]) {
1405 log_err("%s->next(at %d)=U+%04x!=U+%04x\n", n, middle+1, c, u[3]);
1406 }
1407
1408 iter2->move(iter2, -3, UITER_CURRENT);
1409 c=iter2->previous(iter2);
1410 if(c!=u[0]) {
1411 log_err("%s->previous(at %d)=U+%04x!=U+%04x\n", n, middle-2, c, u[0]);
1412 }
1413
1414 /* move the second iterator back to the middle */
1415 iter2->move(iter2, 1, UITER_CURRENT);
1416 iter2->next(iter2);
1417
1418 /* check that both are in the middle */
1419 i=iter1->getIndex(iter1, UITER_CURRENT);
1420 j=iter2->getIndex(iter2, UITER_CURRENT);
1421 if(i!=middle) {
1422 log_err("%s->getIndex(current)=%d!=%d as expected\n", n, i, middle);
1423 }
1424 if(i!=j) {
1425 log_err("%s->getIndex(current)=%d!=%d after setState()\n", n, j, i);
1426 }
1427
1428 /* compare lengths */
1429 i=iter1->getIndex(iter1, UITER_LENGTH);
1430 j=iter2->getIndex(iter2, UITER_LENGTH);
1431 if(i!=j) {
1432 log_err("%s->getIndex(length)=%d!=%d before/after setState()\n", n, i, j);
1433 }
1434 }
1435
1436 static void
TestUCharIterator()1437 TestUCharIterator() {
1438 static const UChar text[]={
1439 0x61, 0x62, 0x63, 0xd801, 0xdffd, 0x78, 0x79, 0x7a, 0
1440 };
1441 char bytes[40];
1442
1443 UCharIterator iter, iter1, iter2;
1444 UConverter *cnv;
1445 UErrorCode errorCode;
1446 int32_t length;
1447
1448 /* simple API/code coverage - test NOOP UCharIterator */
1449 uiter_setString(&iter, NULL, 0);
1450 if( iter.current(&iter)!=-1 || iter.next(&iter)!=-1 || iter.previous(&iter)!=-1 ||
1451 iter.move(&iter, 1, UITER_CURRENT) || iter.getIndex(&iter, UITER_CURRENT)!=0 ||
1452 iter.hasNext(&iter) || iter.hasPrevious(&iter)
1453 ) {
1454 log_err("NOOP UCharIterator behaves unexpectedly\n");
1455 }
1456
1457 /* test get/set state */
1458 length=UPRV_LENGTHOF(text)-1;
1459 uiter_setString(&iter1, text, -1);
1460 uiter_setString(&iter2, text, length);
1461 testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
1462 testIteratorState(&iter1, &iter2, "UTF16IteratorStatePlus1", length/2+1);
1463
1464 /* compare the same string between UTF-16 and UTF-8 UCharIterators ------ */
1465 errorCode=U_ZERO_ERROR;
1466 u_strToUTF8(bytes, sizeof(bytes), &length, text, -1, &errorCode);
1467 if(U_FAILURE(errorCode)) {
1468 log_err("u_strToUTF8() failed, %s\n", u_errorName(errorCode));
1469 return;
1470 }
1471
1472 uiter_setString(&iter1, text, -1);
1473 uiter_setUTF8(&iter2, bytes, length);
1474 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator");
1475
1476 /* try again with length=-1 */
1477 uiter_setUTF8(&iter2, bytes, -1);
1478 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
1479
1480 /* test get/set state */
1481 length=UPRV_LENGTHOF(text)-1;
1482 uiter_setUTF8(&iter1, bytes, -1);
1483 testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
1484 testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
1485
1486 /* compare the same string between UTF-16 and UTF-16BE UCharIterators --- */
1487 errorCode=U_ZERO_ERROR;
1488 cnv=ucnv_open("UTF-16BE", &errorCode);
1489 length=ucnv_fromUChars(cnv, bytes, sizeof(bytes), text, -1, &errorCode);
1490 ucnv_close(cnv);
1491 if(U_FAILURE(errorCode)) {
1492 log_err("ucnv_fromUChars(UTF-16BE) failed, %s\n", u_errorName(errorCode));
1493 return;
1494 }
1495
1496 /* terminate with a _pair_ of 0 bytes - a UChar NUL in UTF-16BE (length is known to be ok) */
1497 bytes[length]=bytes[length+1]=0;
1498
1499 uiter_setString(&iter1, text, -1);
1500 uiter_setUTF16BE(&iter2, bytes, length);
1501 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator");
1502
1503 /* try again with length=-1 */
1504 uiter_setUTF16BE(&iter2, bytes, -1);
1505 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIterator_1");
1506
1507 /* try again after moving the bytes up one, and with length=-1 */
1508 memmove(bytes+1, bytes, length+2);
1509 uiter_setUTF16BE(&iter2, bytes+1, -1);
1510 compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF16BEIteratorMoved1");
1511
1512 /* ### TODO test other iterators: CharacterIterator, Replaceable */
1513 }
1514