1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "cintltst.h"
33 #include "cmemory.h"
34
35 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
36 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
37
38 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
39 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
40
41 /*
42 * TEST_SETUP and TEST_TEARDOWN
43 * macros to handle the boilerplate around setting up regex test cases.
44 * parameteres to setup:
45 * pattern: The regex pattern, a (char *) null terminated C string.
46 * testString: The string data, also a (char *) C string.
47 * flags: Regex flags to set when compiling the pattern
48 *
49 * Put arbitrary test code between SETUP and TEARDOWN.
50 * 're" is the compiled, ready-to-go regular expression.
51 */
52 #define TEST_SETUP(pattern, testString, flags) { \
53 UChar *srcString = NULL; \
54 status = U_ZERO_ERROR; \
55 re = uregex_openC(pattern, flags, NULL, &status); \
56 TEST_ASSERT_SUCCESS(status); \
57 srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
58 u_uastrncpy(srcString, testString, strlen(testString)+1); \
59 uregex_setText(re, srcString, -1, &status); \
60 TEST_ASSERT_SUCCESS(status); \
61 if (U_SUCCESS(status)) {
62
63 #define TEST_TEARDOWN \
64 } \
65 TEST_ASSERT_SUCCESS(status); \
66 uregex_close(re); \
67 free(srcString); \
68 }
69
70
71 /**
72 * @param expected utf-8 array of bytes to be expected
73 */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)74 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
75 char buf_inside_macro[120];
76 int32_t len = (int32_t)strlen(expected);
77 UBool success;
78 if (nulTerm) {
79 u_austrncpy(buf_inside_macro, (actual), len+1);
80 buf_inside_macro[len+2] = 0;
81 success = (strcmp((expected), buf_inside_macro) == 0);
82 } else {
83 u_austrncpy(buf_inside_macro, (actual), len);
84 buf_inside_macro[len+1] = 0;
85 success = (strncmp((expected), buf_inside_macro, len) == 0);
86 }
87 if (success == FALSE) {
88 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
89 file, line, (expected), buf_inside_macro);
90 }
91 }
92
93 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
94
95
equals_utf8_utext(const char * utf8,UText * utext)96 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
97 int32_t u8i = 0;
98 UChar32 u8c = 0;
99 UChar32 utc = 0;
100 UBool stringsEqual = TRUE;
101 utext_setNativeIndex(utext, 0);
102 for (;;) {
103 U8_NEXT_UNSAFE(utf8, u8i, u8c);
104 utc = utext_next32(utext);
105 if (u8c == 0 && utc == U_SENTINEL) {
106 break;
107 }
108 if (u8c != utc || u8c == 0) {
109 stringsEqual = FALSE;
110 break;
111 }
112 }
113 return stringsEqual;
114 }
115
116
test_assert_utext(const char * expected,UText * actual,const char * file,int line)117 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
118 utext_setNativeIndex(actual, 0);
119 if (!equals_utf8_utext(expected, actual)) {
120 UChar32 c;
121 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
122 c = utext_next32From(actual, 0);
123 while (c != U_SENTINEL) {
124 if (0x20<c && c <0x7e) {
125 log_err("%c", c);
126 } else {
127 log_err("%#x", c);
128 }
129 c = UTEXT_NEXT32(actual);
130 }
131 log_err("\"\n");
132 }
133 }
134
135 /*
136 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
137 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
138 */
139 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
140
testUTextEqual(UText * uta,UText * utb)141 static UBool testUTextEqual(UText *uta, UText *utb) {
142 UChar32 ca = 0;
143 UChar32 cb = 0;
144 utext_setNativeIndex(uta, 0);
145 utext_setNativeIndex(utb, 0);
146 do {
147 ca = utext_next32(uta);
148 cb = utext_next32(utb);
149 if (ca != cb) {
150 break;
151 }
152 } while (ca != U_SENTINEL);
153 return ca == cb;
154 }
155
156
157
158
159 static void TestRegexCAPI(void);
160 static void TestBug4315(void);
161 static void TestUTextAPI(void);
162 static void TestRefreshInput(void);
163 static void TestBug8421(void);
164 static void TestBug10815(void);
165
166 void addURegexTest(TestNode** root);
167
addURegexTest(TestNode ** root)168 void addURegexTest(TestNode** root)
169 {
170 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
171 addTest(root, &TestBug4315, "regex/TestBug4315");
172 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
173 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
174 addTest(root, &TestBug8421, "regex/TestBug8421");
175 addTest(root, &TestBug10815, "regex/TestBug10815");
176 }
177
178 /*
179 * Call back function and context struct used for testing
180 * regular expression user callbacks. This test is mostly the same as
181 * the corresponding C++ test in intltest.
182 */
183 typedef struct callBackContext {
184 int32_t maxCalls;
185 int32_t numCalls;
186 int32_t lastSteps;
187 } callBackContext;
188
189 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)190 TestCallbackFn(const void *context, int32_t steps) {
191 callBackContext *info = (callBackContext *)context;
192 if (info->lastSteps+1 != steps) {
193 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
194 }
195 info->lastSteps = steps;
196 info->numCalls++;
197 return (info->numCalls < info->maxCalls);
198 }
199
200 /*
201 * Regular Expression C API Tests
202 */
TestRegexCAPI(void)203 static void TestRegexCAPI(void) {
204 UErrorCode status = U_ZERO_ERROR;
205 URegularExpression *re;
206 UChar pat[200];
207 UChar *minus1;
208
209 memset(&minus1, -1, sizeof(minus1));
210
211 /* Mimimalist open/close */
212 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
213 re = uregex_open(pat, -1, 0, 0, &status);
214 if (U_FAILURE(status)) {
215 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
216 return;
217 }
218 uregex_close(re);
219
220 /* Open with all flag values set */
221 status = U_ZERO_ERROR;
222 re = uregex_open(pat, -1,
223 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
224 0, &status);
225 TEST_ASSERT_SUCCESS(status);
226 uregex_close(re);
227
228 /* Open with an invalid flag */
229 status = U_ZERO_ERROR;
230 re = uregex_open(pat, -1, 0x40000000, 0, &status);
231 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
232 uregex_close(re);
233
234 /* Open with an unimplemented flag */
235 status = U_ZERO_ERROR;
236 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
237 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
238 uregex_close(re);
239
240 /* openC with an invalid parameter */
241 status = U_ZERO_ERROR;
242 re = uregex_openC(NULL,
243 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
244 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
245
246 /* openC with an invalid parameter */
247 status = U_USELESS_COLLATOR_ERROR;
248 re = uregex_openC(NULL,
249 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
250 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
251
252 /* openC open from a C string */
253 {
254 const UChar *p;
255 int32_t len;
256 status = U_ZERO_ERROR;
257 re = uregex_openC("abc*", 0, 0, &status);
258 TEST_ASSERT_SUCCESS(status);
259 p = uregex_pattern(re, &len, &status);
260 TEST_ASSERT_SUCCESS(status);
261
262 /* The TEST_ASSERT_SUCCESS above should change too... */
263 if(U_SUCCESS(status)) {
264 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
265 TEST_ASSERT(u_strcmp(pat, p) == 0);
266 TEST_ASSERT(len==(int32_t)strlen("abc*"));
267 }
268
269 uregex_close(re);
270
271 /* TODO: Open with ParseError parameter */
272 }
273
274 /*
275 * clone
276 */
277 {
278 URegularExpression *clone1;
279 URegularExpression *clone2;
280 URegularExpression *clone3;
281 UChar testString1[30];
282 UChar testString2[30];
283 UBool result;
284
285
286 status = U_ZERO_ERROR;
287 re = uregex_openC("abc*", 0, 0, &status);
288 TEST_ASSERT_SUCCESS(status);
289 clone1 = uregex_clone(re, &status);
290 TEST_ASSERT_SUCCESS(status);
291 TEST_ASSERT(clone1 != NULL);
292
293 status = U_ZERO_ERROR;
294 clone2 = uregex_clone(re, &status);
295 TEST_ASSERT_SUCCESS(status);
296 TEST_ASSERT(clone2 != NULL);
297 uregex_close(re);
298
299 status = U_ZERO_ERROR;
300 clone3 = uregex_clone(clone2, &status);
301 TEST_ASSERT_SUCCESS(status);
302 TEST_ASSERT(clone3 != NULL);
303
304 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
305 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
306
307 status = U_ZERO_ERROR;
308 uregex_setText(clone1, testString1, -1, &status);
309 TEST_ASSERT_SUCCESS(status);
310 result = uregex_lookingAt(clone1, 0, &status);
311 TEST_ASSERT_SUCCESS(status);
312 TEST_ASSERT(result==TRUE);
313
314 status = U_ZERO_ERROR;
315 uregex_setText(clone2, testString2, -1, &status);
316 TEST_ASSERT_SUCCESS(status);
317 result = uregex_lookingAt(clone2, 0, &status);
318 TEST_ASSERT_SUCCESS(status);
319 TEST_ASSERT(result==FALSE);
320 result = uregex_find(clone2, 0, &status);
321 TEST_ASSERT_SUCCESS(status);
322 TEST_ASSERT(result==TRUE);
323
324 uregex_close(clone1);
325 uregex_close(clone2);
326 uregex_close(clone3);
327
328 }
329
330 /*
331 * pattern()
332 */
333 {
334 const UChar *resultPat;
335 int32_t resultLen;
336 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
337 status = U_ZERO_ERROR;
338 re = uregex_open(pat, -1, 0, NULL, &status);
339 resultPat = uregex_pattern(re, &resultLen, &status);
340 TEST_ASSERT_SUCCESS(status);
341
342 /* The TEST_ASSERT_SUCCESS above should change too... */
343 if (U_SUCCESS(status)) {
344 TEST_ASSERT(resultLen == -1);
345 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
346 }
347
348 uregex_close(re);
349
350 status = U_ZERO_ERROR;
351 re = uregex_open(pat, 3, 0, NULL, &status);
352 resultPat = uregex_pattern(re, &resultLen, &status);
353 TEST_ASSERT_SUCCESS(status);
354 TEST_ASSERT_SUCCESS(status);
355
356 /* The TEST_ASSERT_SUCCESS above should change too... */
357 if (U_SUCCESS(status)) {
358 TEST_ASSERT(resultLen == 3);
359 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
360 TEST_ASSERT(u_strlen(resultPat) == 3);
361 }
362
363 uregex_close(re);
364 }
365
366 /*
367 * flags()
368 */
369 {
370 int32_t t;
371
372 status = U_ZERO_ERROR;
373 re = uregex_open(pat, -1, 0, NULL, &status);
374 t = uregex_flags(re, &status);
375 TEST_ASSERT_SUCCESS(status);
376 TEST_ASSERT(t == 0);
377 uregex_close(re);
378
379 status = U_ZERO_ERROR;
380 re = uregex_open(pat, -1, 0, NULL, &status);
381 t = uregex_flags(re, &status);
382 TEST_ASSERT_SUCCESS(status);
383 TEST_ASSERT(t == 0);
384 uregex_close(re);
385
386 status = U_ZERO_ERROR;
387 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
388 t = uregex_flags(re, &status);
389 TEST_ASSERT_SUCCESS(status);
390 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
391 uregex_close(re);
392 }
393
394 /*
395 * setText() and lookingAt()
396 */
397 {
398 UChar text1[50];
399 UChar text2[50];
400 UBool result;
401
402 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
403 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
404 status = U_ZERO_ERROR;
405 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
406 re = uregex_open(pat, -1, 0, NULL, &status);
407 TEST_ASSERT_SUCCESS(status);
408
409 /* Operation before doing a setText should fail... */
410 status = U_ZERO_ERROR;
411 uregex_lookingAt(re, 0, &status);
412 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
413
414 status = U_ZERO_ERROR;
415 uregex_setText(re, text1, -1, &status);
416 result = uregex_lookingAt(re, 0, &status);
417 TEST_ASSERT(result == TRUE);
418 TEST_ASSERT_SUCCESS(status);
419
420 status = U_ZERO_ERROR;
421 uregex_setText(re, text2, -1, &status);
422 result = uregex_lookingAt(re, 0, &status);
423 TEST_ASSERT(result == FALSE);
424 TEST_ASSERT_SUCCESS(status);
425
426 status = U_ZERO_ERROR;
427 uregex_setText(re, text1, -1, &status);
428 result = uregex_lookingAt(re, 0, &status);
429 TEST_ASSERT(result == TRUE);
430 TEST_ASSERT_SUCCESS(status);
431
432 status = U_ZERO_ERROR;
433 uregex_setText(re, text1, 5, &status);
434 result = uregex_lookingAt(re, 0, &status);
435 TEST_ASSERT(result == FALSE);
436 TEST_ASSERT_SUCCESS(status);
437
438 status = U_ZERO_ERROR;
439 uregex_setText(re, text1, 6, &status);
440 result = uregex_lookingAt(re, 0, &status);
441 TEST_ASSERT(result == TRUE);
442 TEST_ASSERT_SUCCESS(status);
443
444 uregex_close(re);
445 }
446
447
448 /*
449 * getText()
450 */
451 {
452 UChar text1[50];
453 UChar text2[50];
454 const UChar *result;
455 int32_t textLength;
456
457 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
458 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
459 status = U_ZERO_ERROR;
460 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
461 re = uregex_open(pat, -1, 0, NULL, &status);
462
463 uregex_setText(re, text1, -1, &status);
464 result = uregex_getText(re, &textLength, &status);
465 TEST_ASSERT(result == text1);
466 TEST_ASSERT(textLength == -1);
467 TEST_ASSERT_SUCCESS(status);
468
469 status = U_ZERO_ERROR;
470 uregex_setText(re, text2, 7, &status);
471 result = uregex_getText(re, &textLength, &status);
472 TEST_ASSERT(result == text2);
473 TEST_ASSERT(textLength == 7);
474 TEST_ASSERT_SUCCESS(status);
475
476 status = U_ZERO_ERROR;
477 uregex_setText(re, text2, 4, &status);
478 result = uregex_getText(re, &textLength, &status);
479 TEST_ASSERT(result == text2);
480 TEST_ASSERT(textLength == 4);
481 TEST_ASSERT_SUCCESS(status);
482 uregex_close(re);
483 }
484
485 /*
486 * matches()
487 */
488 {
489 UChar text1[50];
490 UBool result;
491 int len;
492 UChar nullString[] = {0,0,0};
493
494 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
495 status = U_ZERO_ERROR;
496 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
497 re = uregex_open(pat, -1, 0, NULL, &status);
498
499 uregex_setText(re, text1, -1, &status);
500 result = uregex_matches(re, 0, &status);
501 TEST_ASSERT(result == FALSE);
502 TEST_ASSERT_SUCCESS(status);
503
504 status = U_ZERO_ERROR;
505 uregex_setText(re, text1, 6, &status);
506 result = uregex_matches(re, 0, &status);
507 TEST_ASSERT(result == TRUE);
508 TEST_ASSERT_SUCCESS(status);
509
510 status = U_ZERO_ERROR;
511 uregex_setText(re, text1, 6, &status);
512 result = uregex_matches(re, 1, &status);
513 TEST_ASSERT(result == FALSE);
514 TEST_ASSERT_SUCCESS(status);
515 uregex_close(re);
516
517 status = U_ZERO_ERROR;
518 re = uregex_openC(".?", 0, NULL, &status);
519 uregex_setText(re, text1, -1, &status);
520 len = u_strlen(text1);
521 result = uregex_matches(re, len, &status);
522 TEST_ASSERT(result == TRUE);
523 TEST_ASSERT_SUCCESS(status);
524
525 status = U_ZERO_ERROR;
526 uregex_setText(re, nullString, -1, &status);
527 TEST_ASSERT_SUCCESS(status);
528 result = uregex_matches(re, 0, &status);
529 TEST_ASSERT(result == TRUE);
530 TEST_ASSERT_SUCCESS(status);
531 uregex_close(re);
532 }
533
534
535 /*
536 * lookingAt() Used in setText test.
537 */
538
539
540 /*
541 * find(), findNext, start, end, reset
542 */
543 {
544 UChar text1[50];
545 UBool result;
546 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
547 status = U_ZERO_ERROR;
548 re = uregex_openC("rx", 0, NULL, &status);
549
550 uregex_setText(re, text1, -1, &status);
551 result = uregex_find(re, 0, &status);
552 TEST_ASSERT(result == TRUE);
553 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
554 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
555 TEST_ASSERT_SUCCESS(status);
556
557 result = uregex_find(re, 9, &status);
558 TEST_ASSERT(result == TRUE);
559 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
560 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
561 TEST_ASSERT_SUCCESS(status);
562
563 result = uregex_find(re, 14, &status);
564 TEST_ASSERT(result == FALSE);
565 TEST_ASSERT_SUCCESS(status);
566
567 status = U_ZERO_ERROR;
568 uregex_reset(re, 0, &status);
569
570 result = uregex_findNext(re, &status);
571 TEST_ASSERT(result == TRUE);
572 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
573 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
574 TEST_ASSERT_SUCCESS(status);
575
576 result = uregex_findNext(re, &status);
577 TEST_ASSERT(result == TRUE);
578 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
579 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
580 TEST_ASSERT_SUCCESS(status);
581
582 status = U_ZERO_ERROR;
583 uregex_reset(re, 12, &status);
584
585 result = uregex_findNext(re, &status);
586 TEST_ASSERT(result == TRUE);
587 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
588 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
589 TEST_ASSERT_SUCCESS(status);
590
591 result = uregex_findNext(re, &status);
592 TEST_ASSERT(result == FALSE);
593 TEST_ASSERT_SUCCESS(status);
594
595 uregex_close(re);
596 }
597
598 /*
599 * groupCount
600 */
601 {
602 int32_t result;
603
604 status = U_ZERO_ERROR;
605 re = uregex_openC("abc", 0, NULL, &status);
606 result = uregex_groupCount(re, &status);
607 TEST_ASSERT_SUCCESS(status);
608 TEST_ASSERT(result == 0);
609 uregex_close(re);
610
611 status = U_ZERO_ERROR;
612 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
613 result = uregex_groupCount(re, &status);
614 TEST_ASSERT_SUCCESS(status);
615 TEST_ASSERT(result == 3);
616 uregex_close(re);
617
618 }
619
620
621 /*
622 * group()
623 */
624 {
625 UChar text1[80];
626 UChar buf[80];
627 UBool result;
628 int32_t resultSz;
629 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
630
631 status = U_ZERO_ERROR;
632 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
633 TEST_ASSERT_SUCCESS(status);
634
635
636 uregex_setText(re, text1, -1, &status);
637 result = uregex_find(re, 0, &status);
638 TEST_ASSERT(result==TRUE);
639
640 /* Capture Group 0, the full match. Should succeed. */
641 status = U_ZERO_ERROR;
642 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
643 TEST_ASSERT_SUCCESS(status);
644 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
645 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
646
647 /* Capture group #1. Should succeed. */
648 status = U_ZERO_ERROR;
649 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
650 TEST_ASSERT_SUCCESS(status);
651 TEST_ASSERT_STRING(" interior ", buf, TRUE);
652 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
653
654 /* Capture group out of range. Error. */
655 status = U_ZERO_ERROR;
656 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
657 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
658
659 /* NULL buffer, pure pre-flight */
660 status = U_ZERO_ERROR;
661 resultSz = uregex_group(re, 0, NULL, 0, &status);
662 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
663 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
664
665 /* Too small buffer, truncated string */
666 status = U_ZERO_ERROR;
667 memset(buf, -1, sizeof(buf));
668 resultSz = uregex_group(re, 0, buf, 5, &status);
669 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
670 TEST_ASSERT_STRING("abc i", buf, FALSE);
671 TEST_ASSERT(buf[5] == (UChar)0xffff);
672 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
673
674 /* Output string just fits buffer, no NUL term. */
675 status = U_ZERO_ERROR;
676 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
677 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
678 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
679 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
680 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
681
682 uregex_close(re);
683
684 }
685
686 /*
687 * Regions
688 */
689
690
691 /* SetRegion(), getRegion() do something */
692 TEST_SETUP(".*", "0123456789ABCDEF", 0)
693 UChar resultString[40];
694 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
695 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
696 uregex_setRegion(re, 3, 6, &status);
697 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
698 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
699 TEST_ASSERT(uregex_findNext(re, &status));
700 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
701 TEST_ASSERT_STRING("345", resultString, TRUE);
702 TEST_TEARDOWN;
703
704 /* find(start=-1) uses regions */
705 TEST_SETUP(".*", "0123456789ABCDEF", 0);
706 uregex_setRegion(re, 4, 6, &status);
707 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
708 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
709 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
710 TEST_TEARDOWN;
711
712 /* find (start >=0) does not use regions */
713 TEST_SETUP(".*", "0123456789ABCDEF", 0);
714 uregex_setRegion(re, 4, 6, &status);
715 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
716 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
717 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
718 TEST_TEARDOWN;
719
720 /* findNext() obeys regions */
721 TEST_SETUP(".", "0123456789ABCDEF", 0);
722 uregex_setRegion(re, 4, 6, &status);
723 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
724 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
725 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
726 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
727 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
728 TEST_TEARDOWN;
729
730 /* matches(start=-1) uses regions */
731 /* Also, verify that non-greedy *? succeeds in finding the full match. */
732 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
733 uregex_setRegion(re, 4, 6, &status);
734 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
735 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
736 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
737 TEST_TEARDOWN;
738
739 /* matches (start >=0) does not use regions */
740 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
741 uregex_setRegion(re, 4, 6, &status);
742 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
743 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
744 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
745 TEST_TEARDOWN;
746
747 /* lookingAt(start=-1) uses regions */
748 /* Also, verify that non-greedy *? finds the first (shortest) match. */
749 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
750 uregex_setRegion(re, 4, 6, &status);
751 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
752 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
753 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
754 TEST_TEARDOWN;
755
756 /* lookingAt (start >=0) does not use regions */
757 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
758 uregex_setRegion(re, 4, 6, &status);
759 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
760 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
761 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
762 TEST_TEARDOWN;
763
764 /* hitEnd() */
765 TEST_SETUP("[a-f]*", "abcdefghij", 0);
766 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
767 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
768 TEST_TEARDOWN;
769
770 TEST_SETUP("[a-f]*", "abcdef", 0);
771 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
772 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
773 TEST_TEARDOWN;
774
775 /* requireEnd */
776 TEST_SETUP("abcd", "abcd", 0);
777 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
778 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
779 TEST_TEARDOWN;
780
781 TEST_SETUP("abcd$", "abcd", 0);
782 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
783 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
784 TEST_TEARDOWN;
785
786 /* anchoringBounds */
787 TEST_SETUP("abc$", "abcdef", 0);
788 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
789 uregex_useAnchoringBounds(re, FALSE, &status);
790 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
791
792 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
793 uregex_useAnchoringBounds(re, TRUE, &status);
794 uregex_setRegion(re, 0, 3, &status);
795 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
796 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
797 TEST_TEARDOWN;
798
799 /* Transparent Bounds */
800 TEST_SETUP("abc(?=def)", "abcdef", 0);
801 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
802 uregex_useTransparentBounds(re, TRUE, &status);
803 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
804
805 uregex_useTransparentBounds(re, FALSE, &status);
806 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
807 uregex_setRegion(re, 0, 3, &status);
808 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
809 uregex_useTransparentBounds(re, TRUE, &status);
810 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
811 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
812 TEST_TEARDOWN;
813
814
815 /*
816 * replaceFirst()
817 */
818 {
819 UChar text1[80];
820 UChar text2[80];
821 UChar replText[80];
822 UChar buf[80];
823 int32_t resultSz;
824 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
825 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
826 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
827
828 status = U_ZERO_ERROR;
829 re = uregex_openC("x(.*?)x", 0, NULL, &status);
830 TEST_ASSERT_SUCCESS(status);
831
832 /* Normal case, with match */
833 uregex_setText(re, text1, -1, &status);
834 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
835 TEST_ASSERT_SUCCESS(status);
836 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
837 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
838
839 /* No match. Text should copy to output with no changes. */
840 status = U_ZERO_ERROR;
841 uregex_setText(re, text2, -1, &status);
842 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
843 TEST_ASSERT_SUCCESS(status);
844 TEST_ASSERT_STRING("No match here.", buf, TRUE);
845 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
846
847 /* Match, output just fills buffer, no termination warning. */
848 status = U_ZERO_ERROR;
849 uregex_setText(re, text1, -1, &status);
850 memset(buf, -1, sizeof(buf));
851 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
852 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
853 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
854 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
855 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
856
857 /* Do the replaceFirst again, without first resetting anything.
858 * Should give the same results.
859 */
860 status = U_ZERO_ERROR;
861 memset(buf, -1, sizeof(buf));
862 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
863 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
864 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
865 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
866 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
867
868 /* NULL buffer, zero buffer length */
869 status = U_ZERO_ERROR;
870 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
871 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
872 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
873
874 /* Buffer too small by one */
875 status = U_ZERO_ERROR;
876 memset(buf, -1, sizeof(buf));
877 resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
878 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
879 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
880 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
881 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
882
883 uregex_close(re);
884 }
885
886
887 /*
888 * replaceAll()
889 */
890 {
891 UChar text1[80]; /* "Replace xaax x1x x...x." */
892 UChar text2[80]; /* "No match Here" */
893 UChar replText[80]; /* "<$1>" */
894 UChar replText2[80]; /* "<<$1>>" */
895 const char * pattern = "x(.*?)x";
896 const char * expectedResult = "Replace <aa> <1> <...>.";
897 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
898 UChar buf[80];
899 int32_t resultSize;
900 int32_t expectedResultSize;
901 int32_t expectedResultSize2;
902 int32_t i;
903
904 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
905 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
906 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
907 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
908 expectedResultSize = strlen(expectedResult);
909 expectedResultSize2 = strlen(expectedResult2);
910
911 status = U_ZERO_ERROR;
912 re = uregex_openC(pattern, 0, NULL, &status);
913 TEST_ASSERT_SUCCESS(status);
914
915 /* Normal case, with match */
916 uregex_setText(re, text1, -1, &status);
917 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
918 TEST_ASSERT_SUCCESS(status);
919 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
920 TEST_ASSERT(resultSize == expectedResultSize);
921
922 /* No match. Text should copy to output with no changes. */
923 status = U_ZERO_ERROR;
924 uregex_setText(re, text2, -1, &status);
925 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
926 TEST_ASSERT_SUCCESS(status);
927 TEST_ASSERT_STRING("No match here.", buf, TRUE);
928 TEST_ASSERT(resultSize == u_strlen(text2));
929
930 /* Match, output just fills buffer, no termination warning. */
931 status = U_ZERO_ERROR;
932 uregex_setText(re, text1, -1, &status);
933 memset(buf, -1, sizeof(buf));
934 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
935 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
936 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
937 TEST_ASSERT(resultSize == expectedResultSize);
938 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
939
940 /* Do the replaceFirst again, without first resetting anything.
941 * Should give the same results.
942 */
943 status = U_ZERO_ERROR;
944 memset(buf, -1, sizeof(buf));
945 resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
946 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
947 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
948 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
949 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
950
951 /* NULL buffer, zero buffer length */
952 status = U_ZERO_ERROR;
953 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
954 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
955 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
956
957 /* Buffer too small. Try every size, which will tickle edge cases
958 * in uregex_appendReplacement (used by replaceAll) */
959 for (i=0; i<expectedResultSize; i++) {
960 char expected[80];
961 status = U_ZERO_ERROR;
962 memset(buf, -1, sizeof(buf));
963 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
964 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
965 strcpy(expected, expectedResult);
966 expected[i] = 0;
967 TEST_ASSERT_STRING(expected, buf, FALSE);
968 TEST_ASSERT(resultSize == expectedResultSize);
969 TEST_ASSERT(buf[i] == (UChar)0xffff);
970 }
971
972 /* Buffer too small. Same as previous test, except this time the replacement
973 * text is longer than the match capture group, making the length of the complete
974 * replacement longer than the original string.
975 */
976 for (i=0; i<expectedResultSize2; i++) {
977 char expected[80];
978 status = U_ZERO_ERROR;
979 memset(buf, -1, sizeof(buf));
980 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
981 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
982 strcpy(expected, expectedResult2);
983 expected[i] = 0;
984 TEST_ASSERT_STRING(expected, buf, FALSE);
985 TEST_ASSERT(resultSize == expectedResultSize2);
986 TEST_ASSERT(buf[i] == (UChar)0xffff);
987 }
988
989
990 uregex_close(re);
991 }
992
993
994 /*
995 * appendReplacement()
996 */
997 {
998 UChar text[100];
999 UChar repl[100];
1000 UChar buf[100];
1001 UChar *bufPtr;
1002 int32_t bufCap;
1003
1004
1005 status = U_ZERO_ERROR;
1006 re = uregex_openC(".*", 0, 0, &status);
1007 TEST_ASSERT_SUCCESS(status);
1008
1009 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1010 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1011 uregex_setText(re, text, -1, &status);
1012
1013 /* match covers whole target string */
1014 uregex_find(re, 0, &status);
1015 TEST_ASSERT_SUCCESS(status);
1016 bufPtr = buf;
1017 bufCap = UPRV_LENGTHOF(buf);
1018 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1019 TEST_ASSERT_SUCCESS(status);
1020 TEST_ASSERT_STRING("some other", buf, TRUE);
1021
1022 /* Match has \u \U escapes */
1023 uregex_find(re, 0, &status);
1024 TEST_ASSERT_SUCCESS(status);
1025 bufPtr = buf;
1026 bufCap = UPRV_LENGTHOF(buf);
1027 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1028 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1029 TEST_ASSERT_SUCCESS(status);
1030 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1031
1032 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1033 status = U_ZERO_ERROR;
1034 uregex_find(re, 0, &status);
1035 TEST_ASSERT_SUCCESS(status);
1036 bufPtr = buf;
1037 status = U_BUFFER_OVERFLOW_ERROR;
1038 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1039 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1040
1041 uregex_close(re);
1042 }
1043
1044
1045 /*
1046 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1047 */
1048
1049 /*
1050 * split()
1051 */
1052 {
1053 UChar textToSplit[80];
1054 UChar text2[80];
1055 UChar buf[200];
1056 UChar *fields[10];
1057 int32_t numFields;
1058 int32_t requiredCapacity;
1059 int32_t spaceNeeded;
1060 int32_t sz;
1061
1062 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1063 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1064
1065 status = U_ZERO_ERROR;
1066 re = uregex_openC(":", 0, NULL, &status);
1067
1068
1069 /* Simple split */
1070
1071 uregex_setText(re, textToSplit, -1, &status);
1072 TEST_ASSERT_SUCCESS(status);
1073
1074 /* The TEST_ASSERT_SUCCESS call above should change too... */
1075 if (U_SUCCESS(status)) {
1076 memset(fields, -1, sizeof(fields));
1077 numFields =
1078 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1079 TEST_ASSERT_SUCCESS(status);
1080
1081 /* The TEST_ASSERT_SUCCESS call above should change too... */
1082 if(U_SUCCESS(status)) {
1083 TEST_ASSERT(numFields == 3);
1084 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1085 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1086 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1087 TEST_ASSERT(fields[3] == NULL);
1088
1089 spaceNeeded = u_strlen(textToSplit) -
1090 (numFields - 1) + /* Field delimiters do not appear in output */
1091 numFields; /* Each field gets a NUL terminator */
1092
1093 TEST_ASSERT(spaceNeeded == requiredCapacity);
1094 }
1095 }
1096
1097 uregex_close(re);
1098
1099
1100 /* Split with too few output strings available */
1101 status = U_ZERO_ERROR;
1102 re = uregex_openC(":", 0, NULL, &status);
1103 uregex_setText(re, textToSplit, -1, &status);
1104 TEST_ASSERT_SUCCESS(status);
1105
1106 /* The TEST_ASSERT_SUCCESS call above should change too... */
1107 if(U_SUCCESS(status)) {
1108 memset(fields, -1, sizeof(fields));
1109 numFields =
1110 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1111 TEST_ASSERT_SUCCESS(status);
1112
1113 /* The TEST_ASSERT_SUCCESS call above should change too... */
1114 if(U_SUCCESS(status)) {
1115 TEST_ASSERT(numFields == 2);
1116 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1117 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1118 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1119
1120 spaceNeeded = u_strlen(textToSplit) -
1121 (numFields - 1) + /* Field delimiters do not appear in output */
1122 numFields; /* Each field gets a NUL terminator */
1123
1124 TEST_ASSERT(spaceNeeded == requiredCapacity);
1125
1126 /* Split with a range of output buffer sizes. */
1127 spaceNeeded = u_strlen(textToSplit) -
1128 (numFields - 1) + /* Field delimiters do not appear in output */
1129 numFields; /* Each field gets a NUL terminator */
1130
1131 for (sz=0; sz < spaceNeeded+1; sz++) {
1132 memset(fields, -1, sizeof(fields));
1133 status = U_ZERO_ERROR;
1134 numFields =
1135 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1136 if (sz >= spaceNeeded) {
1137 TEST_ASSERT_SUCCESS(status);
1138 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1139 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1140 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1141 } else {
1142 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1143 }
1144 TEST_ASSERT(numFields == 3);
1145 TEST_ASSERT(fields[3] == NULL);
1146 TEST_ASSERT(spaceNeeded == requiredCapacity);
1147 }
1148 }
1149 }
1150
1151 uregex_close(re);
1152 }
1153
1154
1155
1156
1157 /* Split(), part 2. Patterns with capture groups. The capture group text
1158 * comes out as additional fields. */
1159 {
1160 UChar textToSplit[80];
1161 UChar buf[200];
1162 UChar *fields[10];
1163 int32_t numFields;
1164 int32_t requiredCapacity;
1165 int32_t spaceNeeded;
1166 int32_t sz;
1167
1168 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1169
1170 status = U_ZERO_ERROR;
1171 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1172
1173 uregex_setText(re, textToSplit, -1, &status);
1174 TEST_ASSERT_SUCCESS(status);
1175
1176 /* The TEST_ASSERT_SUCCESS call above should change too... */
1177 if(U_SUCCESS(status)) {
1178 memset(fields, -1, sizeof(fields));
1179 numFields =
1180 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1181 TEST_ASSERT_SUCCESS(status);
1182
1183 /* The TEST_ASSERT_SUCCESS call above should change too... */
1184 if(U_SUCCESS(status)) {
1185 TEST_ASSERT(numFields == 5);
1186 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1187 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1188 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1189 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1190 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1191 TEST_ASSERT(fields[5] == NULL);
1192 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1193 TEST_ASSERT(spaceNeeded == requiredCapacity);
1194 }
1195 }
1196
1197 /* Split with too few output strings available (2) */
1198 status = U_ZERO_ERROR;
1199 memset(fields, -1, sizeof(fields));
1200 numFields =
1201 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1202 TEST_ASSERT_SUCCESS(status);
1203
1204 /* The TEST_ASSERT_SUCCESS call above should change too... */
1205 if(U_SUCCESS(status)) {
1206 TEST_ASSERT(numFields == 2);
1207 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1208 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1209 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1210
1211 spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */
1212 TEST_ASSERT(spaceNeeded == requiredCapacity);
1213 }
1214
1215 /* Split with too few output strings available (3) */
1216 status = U_ZERO_ERROR;
1217 memset(fields, -1, sizeof(fields));
1218 numFields =
1219 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1220 TEST_ASSERT_SUCCESS(status);
1221
1222 /* The TEST_ASSERT_SUCCESS call above should change too... */
1223 if(U_SUCCESS(status)) {
1224 TEST_ASSERT(numFields == 3);
1225 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1226 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1227 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1228 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1229
1230 spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1231 TEST_ASSERT(spaceNeeded == requiredCapacity);
1232 }
1233
1234 /* Split with just enough output strings available (5) */
1235 status = U_ZERO_ERROR;
1236 memset(fields, -1, sizeof(fields));
1237 numFields =
1238 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1239 TEST_ASSERT_SUCCESS(status);
1240
1241 /* The TEST_ASSERT_SUCCESS call above should change too... */
1242 if(U_SUCCESS(status)) {
1243 TEST_ASSERT(numFields == 5);
1244 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1245 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1246 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1247 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1248 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1249 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1250
1251 spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1252 TEST_ASSERT(spaceNeeded == requiredCapacity);
1253 }
1254
1255 /* Split, end of text is a field delimiter. */
1256 status = U_ZERO_ERROR;
1257 sz = strlen("first <tag-a> second<tag-b>");
1258 uregex_setText(re, textToSplit, sz, &status);
1259 TEST_ASSERT_SUCCESS(status);
1260
1261 /* The TEST_ASSERT_SUCCESS call above should change too... */
1262 if(U_SUCCESS(status)) {
1263 memset(fields, -1, sizeof(fields));
1264 numFields =
1265 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1266 TEST_ASSERT_SUCCESS(status);
1267
1268 /* The TEST_ASSERT_SUCCESS call above should change too... */
1269 if(U_SUCCESS(status)) {
1270 TEST_ASSERT(numFields == 5);
1271 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1272 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1273 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1274 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1275 TEST_ASSERT_STRING("", fields[4], TRUE);
1276 TEST_ASSERT(fields[5] == NULL);
1277 TEST_ASSERT(fields[8] == NULL);
1278 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1279 spaceNeeded = strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1280 TEST_ASSERT(spaceNeeded == requiredCapacity);
1281 }
1282 }
1283
1284 uregex_close(re);
1285 }
1286
1287 /*
1288 * set/getTimeLimit
1289 */
1290 TEST_SETUP("abc$", "abcdef", 0);
1291 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1292 uregex_setTimeLimit(re, 1000, &status);
1293 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1294 TEST_ASSERT_SUCCESS(status);
1295 uregex_setTimeLimit(re, -1, &status);
1296 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1297 status = U_ZERO_ERROR;
1298 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1299 TEST_TEARDOWN;
1300
1301 /*
1302 * set/get Stack Limit
1303 */
1304 TEST_SETUP("abc$", "abcdef", 0);
1305 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1306 uregex_setStackLimit(re, 40000, &status);
1307 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1308 TEST_ASSERT_SUCCESS(status);
1309 uregex_setStackLimit(re, -1, &status);
1310 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1311 status = U_ZERO_ERROR;
1312 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1313 TEST_TEARDOWN;
1314
1315
1316 /*
1317 * Get/Set callback functions
1318 * This test is copied from intltest regex/Callbacks
1319 * The pattern and test data will run long enough to cause the callback
1320 * to be invoked. The nested '+' operators give exponential time
1321 * behavior with increasing string length.
1322 */
1323 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1324 callBackContext cbInfo = {4, 0, 0};
1325 const void *pContext = &cbInfo;
1326 URegexMatchCallback *returnedFn = &TestCallbackFn;
1327
1328 /* Getting the callback fn when it hasn't been set must return NULL */
1329 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1330 TEST_ASSERT_SUCCESS(status);
1331 TEST_ASSERT(returnedFn == NULL);
1332 TEST_ASSERT(pContext == NULL);
1333
1334 /* Set thecallback and do a match. */
1335 /* The callback function should record that it has been called. */
1336 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1337 TEST_ASSERT_SUCCESS(status);
1338 TEST_ASSERT(cbInfo.numCalls == 0);
1339 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1340 TEST_ASSERT_SUCCESS(status);
1341 TEST_ASSERT(cbInfo.numCalls > 0);
1342
1343 /* Getting the callback should return the values that were set above. */
1344 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1345 TEST_ASSERT(returnedFn == &TestCallbackFn);
1346 TEST_ASSERT(pContext == &cbInfo);
1347
1348 TEST_TEARDOWN;
1349 }
1350
1351
1352
TestBug4315(void)1353 static void TestBug4315(void) {
1354 UErrorCode theICUError = U_ZERO_ERROR;
1355 URegularExpression *theRegEx;
1356 UChar *textBuff;
1357 const char *thePattern;
1358 UChar theString[100];
1359 UChar *destFields[24];
1360 int32_t neededLength1;
1361 int32_t neededLength2;
1362
1363 int32_t wordCount = 0;
1364 int32_t destFieldsSize = 24;
1365
1366 thePattern = "ck ";
1367 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1368
1369 /* open a regex */
1370 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1371 TEST_ASSERT_SUCCESS(theICUError);
1372
1373 /* set the input string */
1374 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1375 TEST_ASSERT_SUCCESS(theICUError);
1376
1377 /* split */
1378 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1379 * error occurs! */
1380 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1381 destFieldsSize, &theICUError);
1382
1383 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1384 TEST_ASSERT(wordCount==3);
1385
1386 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1387 {
1388 theICUError = U_ZERO_ERROR;
1389 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1390 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1391 destFields, destFieldsSize, &theICUError);
1392 TEST_ASSERT(wordCount==3);
1393 TEST_ASSERT_SUCCESS(theICUError);
1394 TEST_ASSERT(neededLength1 == neededLength2);
1395 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1396 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1397 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1398 TEST_ASSERT(destFields[3] == NULL);
1399 free(textBuff);
1400 }
1401 uregex_close(theRegEx);
1402 }
1403
1404 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1405 static void TestUTextAPI(void) {
1406 UErrorCode status = U_ZERO_ERROR;
1407 URegularExpression *re;
1408 UText patternText = UTEXT_INITIALIZER;
1409 UChar pat[200];
1410 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1411
1412 /* Mimimalist open/close */
1413 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1414 re = uregex_openUText(&patternText, 0, 0, &status);
1415 if (U_FAILURE(status)) {
1416 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1417 utext_close(&patternText);
1418 return;
1419 }
1420 uregex_close(re);
1421
1422 /* Open with all flag values set */
1423 status = U_ZERO_ERROR;
1424 re = uregex_openUText(&patternText,
1425 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1426 0, &status);
1427 TEST_ASSERT_SUCCESS(status);
1428 uregex_close(re);
1429
1430 /* Open with an invalid flag */
1431 status = U_ZERO_ERROR;
1432 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1433 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1434 uregex_close(re);
1435
1436 /* open with an invalid parameter */
1437 status = U_ZERO_ERROR;
1438 re = uregex_openUText(NULL,
1439 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1440 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1441
1442 /*
1443 * clone
1444 */
1445 {
1446 URegularExpression *clone1;
1447 URegularExpression *clone2;
1448 URegularExpression *clone3;
1449 UChar testString1[30];
1450 UChar testString2[30];
1451 UBool result;
1452
1453
1454 status = U_ZERO_ERROR;
1455 re = uregex_openUText(&patternText, 0, 0, &status);
1456 TEST_ASSERT_SUCCESS(status);
1457 clone1 = uregex_clone(re, &status);
1458 TEST_ASSERT_SUCCESS(status);
1459 TEST_ASSERT(clone1 != NULL);
1460
1461 status = U_ZERO_ERROR;
1462 clone2 = uregex_clone(re, &status);
1463 TEST_ASSERT_SUCCESS(status);
1464 TEST_ASSERT(clone2 != NULL);
1465 uregex_close(re);
1466
1467 status = U_ZERO_ERROR;
1468 clone3 = uregex_clone(clone2, &status);
1469 TEST_ASSERT_SUCCESS(status);
1470 TEST_ASSERT(clone3 != NULL);
1471
1472 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1473 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1474
1475 status = U_ZERO_ERROR;
1476 uregex_setText(clone1, testString1, -1, &status);
1477 TEST_ASSERT_SUCCESS(status);
1478 result = uregex_lookingAt(clone1, 0, &status);
1479 TEST_ASSERT_SUCCESS(status);
1480 TEST_ASSERT(result==TRUE);
1481
1482 status = U_ZERO_ERROR;
1483 uregex_setText(clone2, testString2, -1, &status);
1484 TEST_ASSERT_SUCCESS(status);
1485 result = uregex_lookingAt(clone2, 0, &status);
1486 TEST_ASSERT_SUCCESS(status);
1487 TEST_ASSERT(result==FALSE);
1488 result = uregex_find(clone2, 0, &status);
1489 TEST_ASSERT_SUCCESS(status);
1490 TEST_ASSERT(result==TRUE);
1491
1492 uregex_close(clone1);
1493 uregex_close(clone2);
1494 uregex_close(clone3);
1495
1496 }
1497
1498 /*
1499 * pattern() and patternText()
1500 */
1501 {
1502 const UChar *resultPat;
1503 int32_t resultLen;
1504 UText *resultText;
1505 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1506 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1507 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1508 status = U_ZERO_ERROR;
1509
1510 utext_openUTF8(&patternText, str_hello, -1, &status);
1511 re = uregex_open(pat, -1, 0, NULL, &status);
1512 resultPat = uregex_pattern(re, &resultLen, &status);
1513 TEST_ASSERT_SUCCESS(status);
1514
1515 /* The TEST_ASSERT_SUCCESS above should change too... */
1516 if (U_SUCCESS(status)) {
1517 TEST_ASSERT(resultLen == -1);
1518 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1519 }
1520
1521 resultText = uregex_patternUText(re, &status);
1522 TEST_ASSERT_SUCCESS(status);
1523 TEST_ASSERT_UTEXT(str_hello, resultText);
1524
1525 uregex_close(re);
1526
1527 status = U_ZERO_ERROR;
1528 re = uregex_open(pat, 3, 0, NULL, &status);
1529 resultPat = uregex_pattern(re, &resultLen, &status);
1530 TEST_ASSERT_SUCCESS(status);
1531
1532 /* The TEST_ASSERT_SUCCESS above should change too... */
1533 if (U_SUCCESS(status)) {
1534 TEST_ASSERT(resultLen == 3);
1535 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1536 TEST_ASSERT(u_strlen(resultPat) == 3);
1537 }
1538
1539 resultText = uregex_patternUText(re, &status);
1540 TEST_ASSERT_SUCCESS(status);
1541 TEST_ASSERT_UTEXT(str_hel, resultText);
1542
1543 uregex_close(re);
1544 }
1545
1546 /*
1547 * setUText() and lookingAt()
1548 */
1549 {
1550 UText text1 = UTEXT_INITIALIZER;
1551 UText text2 = UTEXT_INITIALIZER;
1552 UBool result;
1553 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1554 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1555 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1556 status = U_ZERO_ERROR;
1557 utext_openUTF8(&text1, str_abcccd, -1, &status);
1558 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1559
1560 utext_openUTF8(&patternText, str_abcd, -1, &status);
1561 re = uregex_openUText(&patternText, 0, NULL, &status);
1562 TEST_ASSERT_SUCCESS(status);
1563
1564 /* Operation before doing a setText should fail... */
1565 status = U_ZERO_ERROR;
1566 uregex_lookingAt(re, 0, &status);
1567 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1568
1569 status = U_ZERO_ERROR;
1570 uregex_setUText(re, &text1, &status);
1571 result = uregex_lookingAt(re, 0, &status);
1572 TEST_ASSERT(result == TRUE);
1573 TEST_ASSERT_SUCCESS(status);
1574
1575 status = U_ZERO_ERROR;
1576 uregex_setUText(re, &text2, &status);
1577 result = uregex_lookingAt(re, 0, &status);
1578 TEST_ASSERT(result == FALSE);
1579 TEST_ASSERT_SUCCESS(status);
1580
1581 status = U_ZERO_ERROR;
1582 uregex_setUText(re, &text1, &status);
1583 result = uregex_lookingAt(re, 0, &status);
1584 TEST_ASSERT(result == TRUE);
1585 TEST_ASSERT_SUCCESS(status);
1586
1587 uregex_close(re);
1588 utext_close(&text1);
1589 utext_close(&text2);
1590 }
1591
1592
1593 /*
1594 * getText() and getUText()
1595 */
1596 {
1597 UText text1 = UTEXT_INITIALIZER;
1598 UText text2 = UTEXT_INITIALIZER;
1599 UChar text2Chars[20];
1600 UText *resultText;
1601 const UChar *result;
1602 int32_t textLength;
1603 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1604 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1605 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1606
1607
1608 status = U_ZERO_ERROR;
1609 utext_openUTF8(&text1, str_abcccd, -1, &status);
1610 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1611 utext_openUChars(&text2, text2Chars, -1, &status);
1612
1613 utext_openUTF8(&patternText, str_abcd, -1, &status);
1614 re = uregex_openUText(&patternText, 0, NULL, &status);
1615
1616 /* First set a UText */
1617 uregex_setUText(re, &text1, &status);
1618 resultText = uregex_getUText(re, NULL, &status);
1619 TEST_ASSERT_SUCCESS(status);
1620 TEST_ASSERT(resultText != &text1);
1621 utext_setNativeIndex(resultText, 0);
1622 utext_setNativeIndex(&text1, 0);
1623 TEST_ASSERT(testUTextEqual(resultText, &text1));
1624 utext_close(resultText);
1625
1626 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1627 (void)result; /* Suppress set but not used warning. */
1628 TEST_ASSERT(textLength == -1 || textLength == 6);
1629 resultText = uregex_getUText(re, NULL, &status);
1630 TEST_ASSERT_SUCCESS(status);
1631 TEST_ASSERT(resultText != &text1);
1632 utext_setNativeIndex(resultText, 0);
1633 utext_setNativeIndex(&text1, 0);
1634 TEST_ASSERT(testUTextEqual(resultText, &text1));
1635 utext_close(resultText);
1636
1637 /* Then set a UChar * */
1638 uregex_setText(re, text2Chars, 7, &status);
1639 resultText = uregex_getUText(re, NULL, &status);
1640 TEST_ASSERT_SUCCESS(status);
1641 utext_setNativeIndex(resultText, 0);
1642 utext_setNativeIndex(&text2, 0);
1643 TEST_ASSERT(testUTextEqual(resultText, &text2));
1644 utext_close(resultText);
1645 result = uregex_getText(re, &textLength, &status);
1646 TEST_ASSERT(textLength == 7);
1647
1648 uregex_close(re);
1649 utext_close(&text1);
1650 utext_close(&text2);
1651 }
1652
1653 /*
1654 * matches()
1655 */
1656 {
1657 UText text1 = UTEXT_INITIALIZER;
1658 UBool result;
1659 UText nullText = UTEXT_INITIALIZER;
1660 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1661 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1662
1663 status = U_ZERO_ERROR;
1664 utext_openUTF8(&text1, str_abcccde, -1, &status);
1665 utext_openUTF8(&patternText, str_abcd, -1, &status);
1666 re = uregex_openUText(&patternText, 0, NULL, &status);
1667
1668 uregex_setUText(re, &text1, &status);
1669 result = uregex_matches(re, 0, &status);
1670 TEST_ASSERT(result == FALSE);
1671 TEST_ASSERT_SUCCESS(status);
1672 uregex_close(re);
1673
1674 status = U_ZERO_ERROR;
1675 re = uregex_openC(".?", 0, NULL, &status);
1676 uregex_setUText(re, &text1, &status);
1677 result = uregex_matches(re, 7, &status);
1678 TEST_ASSERT(result == TRUE);
1679 TEST_ASSERT_SUCCESS(status);
1680
1681 status = U_ZERO_ERROR;
1682 utext_openUTF8(&nullText, "", -1, &status);
1683 uregex_setUText(re, &nullText, &status);
1684 TEST_ASSERT_SUCCESS(status);
1685 result = uregex_matches(re, 0, &status);
1686 TEST_ASSERT(result == TRUE);
1687 TEST_ASSERT_SUCCESS(status);
1688
1689 uregex_close(re);
1690 utext_close(&text1);
1691 utext_close(&nullText);
1692 }
1693
1694
1695 /*
1696 * lookingAt() Used in setText test.
1697 */
1698
1699
1700 /*
1701 * find(), findNext, start, end, reset
1702 */
1703 {
1704 UChar text1[50];
1705 UBool result;
1706 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1707 status = U_ZERO_ERROR;
1708 re = uregex_openC("rx", 0, NULL, &status);
1709
1710 uregex_setText(re, text1, -1, &status);
1711 result = uregex_find(re, 0, &status);
1712 TEST_ASSERT(result == TRUE);
1713 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1714 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1715 TEST_ASSERT_SUCCESS(status);
1716
1717 result = uregex_find(re, 9, &status);
1718 TEST_ASSERT(result == TRUE);
1719 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1720 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1721 TEST_ASSERT_SUCCESS(status);
1722
1723 result = uregex_find(re, 14, &status);
1724 TEST_ASSERT(result == FALSE);
1725 TEST_ASSERT_SUCCESS(status);
1726
1727 status = U_ZERO_ERROR;
1728 uregex_reset(re, 0, &status);
1729
1730 result = uregex_findNext(re, &status);
1731 TEST_ASSERT(result == TRUE);
1732 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1733 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1734 TEST_ASSERT_SUCCESS(status);
1735
1736 result = uregex_findNext(re, &status);
1737 TEST_ASSERT(result == TRUE);
1738 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1739 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1740 TEST_ASSERT_SUCCESS(status);
1741
1742 status = U_ZERO_ERROR;
1743 uregex_reset(re, 12, &status);
1744
1745 result = uregex_findNext(re, &status);
1746 TEST_ASSERT(result == TRUE);
1747 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1748 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1749 TEST_ASSERT_SUCCESS(status);
1750
1751 result = uregex_findNext(re, &status);
1752 TEST_ASSERT(result == FALSE);
1753 TEST_ASSERT_SUCCESS(status);
1754
1755 uregex_close(re);
1756 }
1757
1758 /*
1759 * groupUText()
1760 */
1761 {
1762 UChar text1[80];
1763 UText *actual;
1764 UBool result;
1765 int64_t groupLen = 0;
1766 UChar groupBuf[20];
1767
1768 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1769
1770 status = U_ZERO_ERROR;
1771 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1772 TEST_ASSERT_SUCCESS(status);
1773
1774 uregex_setText(re, text1, -1, &status);
1775 result = uregex_find(re, 0, &status);
1776 TEST_ASSERT(result==TRUE);
1777
1778 /* Capture Group 0 with shallow clone API. Should succeed. */
1779 status = U_ZERO_ERROR;
1780 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1781 TEST_ASSERT_SUCCESS(status);
1782
1783 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1784 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1785 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1786
1787 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1788 utext_close(actual);
1789
1790 /* Capture group #1. Should succeed. */
1791 status = U_ZERO_ERROR;
1792
1793 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1794 TEST_ASSERT_SUCCESS(status);
1795 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1796 /* (within the string text1) */
1797 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1798 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1799 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1800
1801 utext_close(actual);
1802
1803 /* Capture group out of range. Error. */
1804 status = U_ZERO_ERROR;
1805 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1806 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1807 utext_close(actual);
1808
1809 uregex_close(re);
1810 }
1811
1812 /*
1813 * replaceFirst()
1814 */
1815 {
1816 UChar text1[80];
1817 UChar text2[80];
1818 UText replText = UTEXT_INITIALIZER;
1819 UText *result;
1820 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1821 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1822 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1823 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1824 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1825 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1826 status = U_ZERO_ERROR;
1827 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1828 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1829 utext_openUTF8(&replText, str_1x, -1, &status);
1830
1831 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1832 TEST_ASSERT_SUCCESS(status);
1833
1834 /* Normal case, with match */
1835 uregex_setText(re, text1, -1, &status);
1836 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1837 TEST_ASSERT_SUCCESS(status);
1838 TEST_ASSERT_UTEXT(str_Replxxx, result);
1839 utext_close(result);
1840
1841 /* No match. Text should copy to output with no changes. */
1842 uregex_setText(re, text2, -1, &status);
1843 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1844 TEST_ASSERT_SUCCESS(status);
1845 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1846 utext_close(result);
1847
1848 /* Unicode escapes */
1849 uregex_setText(re, text1, -1, &status);
1850 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1851 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1852 TEST_ASSERT_SUCCESS(status);
1853 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1854 utext_close(result);
1855
1856 uregex_close(re);
1857 utext_close(&replText);
1858 }
1859
1860
1861 /*
1862 * replaceAll()
1863 */
1864 {
1865 UChar text1[80];
1866 UChar text2[80];
1867 UText replText = UTEXT_INITIALIZER;
1868 UText *result;
1869 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1870 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1871 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1872 status = U_ZERO_ERROR;
1873 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1874 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1875 utext_openUTF8(&replText, str_1, -1, &status);
1876
1877 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1878 TEST_ASSERT_SUCCESS(status);
1879
1880 /* Normal case, with match */
1881 uregex_setText(re, text1, -1, &status);
1882 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1883 TEST_ASSERT_SUCCESS(status);
1884 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1885 utext_close(result);
1886
1887 /* No match. Text should copy to output with no changes. */
1888 uregex_setText(re, text2, -1, &status);
1889 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1890 TEST_ASSERT_SUCCESS(status);
1891 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1892 utext_close(result);
1893
1894 uregex_close(re);
1895 utext_close(&replText);
1896 }
1897
1898
1899 /*
1900 * appendReplacement()
1901 */
1902 {
1903 UChar text[100];
1904 UChar repl[100];
1905 UChar buf[100];
1906 UChar *bufPtr;
1907 int32_t bufCap;
1908
1909 status = U_ZERO_ERROR;
1910 re = uregex_openC(".*", 0, 0, &status);
1911 TEST_ASSERT_SUCCESS(status);
1912
1913 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1914 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1915 uregex_setText(re, text, -1, &status);
1916
1917 /* match covers whole target string */
1918 uregex_find(re, 0, &status);
1919 TEST_ASSERT_SUCCESS(status);
1920 bufPtr = buf;
1921 bufCap = UPRV_LENGTHOF(buf);
1922 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1923 TEST_ASSERT_SUCCESS(status);
1924 TEST_ASSERT_STRING("some other", buf, TRUE);
1925
1926 /* Match has \u \U escapes */
1927 uregex_find(re, 0, &status);
1928 TEST_ASSERT_SUCCESS(status);
1929 bufPtr = buf;
1930 bufCap = UPRV_LENGTHOF(buf);
1931 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1932 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1933 TEST_ASSERT_SUCCESS(status);
1934 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1935
1936 uregex_close(re);
1937 }
1938
1939
1940 /*
1941 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1942 */
1943
1944 /*
1945 * splitUText()
1946 */
1947 {
1948 UChar textToSplit[80];
1949 UChar text2[80];
1950 UText *fields[10];
1951 int32_t numFields;
1952 int32_t i;
1953
1954 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1955 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1956
1957 status = U_ZERO_ERROR;
1958 re = uregex_openC(":", 0, NULL, &status);
1959
1960
1961 /* Simple split */
1962
1963 uregex_setText(re, textToSplit, -1, &status);
1964 TEST_ASSERT_SUCCESS(status);
1965
1966 /* The TEST_ASSERT_SUCCESS call above should change too... */
1967 if (U_SUCCESS(status)) {
1968 memset(fields, 0, sizeof(fields));
1969 numFields = uregex_splitUText(re, fields, 10, &status);
1970 TEST_ASSERT_SUCCESS(status);
1971
1972 /* The TEST_ASSERT_SUCCESS call above should change too... */
1973 if(U_SUCCESS(status)) {
1974 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1975 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1976 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1977 TEST_ASSERT(numFields == 3);
1978 TEST_ASSERT_UTEXT(str_first, fields[0]);
1979 TEST_ASSERT_UTEXT(str_second, fields[1]);
1980 TEST_ASSERT_UTEXT(str_third, fields[2]);
1981 TEST_ASSERT(fields[3] == NULL);
1982 }
1983 for(i = 0; i < numFields; i++) {
1984 utext_close(fields[i]);
1985 }
1986 }
1987
1988 uregex_close(re);
1989
1990
1991 /* Split with too few output strings available */
1992 status = U_ZERO_ERROR;
1993 re = uregex_openC(":", 0, NULL, &status);
1994 uregex_setText(re, textToSplit, -1, &status);
1995 TEST_ASSERT_SUCCESS(status);
1996
1997 /* The TEST_ASSERT_SUCCESS call above should change too... */
1998 if(U_SUCCESS(status)) {
1999 fields[0] = NULL;
2000 fields[1] = NULL;
2001 fields[2] = &patternText;
2002 numFields = uregex_splitUText(re, fields, 2, &status);
2003 TEST_ASSERT_SUCCESS(status);
2004
2005 /* The TEST_ASSERT_SUCCESS call above should change too... */
2006 if(U_SUCCESS(status)) {
2007 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2008 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2009 TEST_ASSERT(numFields == 2);
2010 TEST_ASSERT_UTEXT(str_first, fields[0]);
2011 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2012 TEST_ASSERT(fields[2] == &patternText);
2013 }
2014 for(i = 0; i < numFields; i++) {
2015 utext_close(fields[i]);
2016 }
2017 }
2018
2019 uregex_close(re);
2020 }
2021
2022 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2023 * comes out as additional fields. */
2024 {
2025 UChar textToSplit[80];
2026 UText *fields[10];
2027 int32_t numFields;
2028 int32_t i;
2029
2030 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2031
2032 status = U_ZERO_ERROR;
2033 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2034
2035 uregex_setText(re, textToSplit, -1, &status);
2036 TEST_ASSERT_SUCCESS(status);
2037
2038 /* The TEST_ASSERT_SUCCESS call above should change too... */
2039 if(U_SUCCESS(status)) {
2040 memset(fields, 0, sizeof(fields));
2041 numFields = uregex_splitUText(re, fields, 10, &status);
2042 TEST_ASSERT_SUCCESS(status);
2043
2044 /* The TEST_ASSERT_SUCCESS call above should change too... */
2045 if(U_SUCCESS(status)) {
2046 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2047 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2048 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2049 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2050 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2051
2052 TEST_ASSERT(numFields == 5);
2053 TEST_ASSERT_UTEXT(str_first, fields[0]);
2054 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2055 TEST_ASSERT_UTEXT(str_second, fields[2]);
2056 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2057 TEST_ASSERT_UTEXT(str_third, fields[4]);
2058 TEST_ASSERT(fields[5] == NULL);
2059 }
2060 for(i = 0; i < numFields; i++) {
2061 utext_close(fields[i]);
2062 }
2063 }
2064
2065 /* Split with too few output strings available (2) */
2066 status = U_ZERO_ERROR;
2067 fields[0] = NULL;
2068 fields[1] = NULL;
2069 fields[2] = &patternText;
2070 numFields = uregex_splitUText(re, fields, 2, &status);
2071 TEST_ASSERT_SUCCESS(status);
2072
2073 /* The TEST_ASSERT_SUCCESS call above should change too... */
2074 if(U_SUCCESS(status)) {
2075 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2076 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2077 TEST_ASSERT(numFields == 2);
2078 TEST_ASSERT_UTEXT(str_first, fields[0]);
2079 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2080 TEST_ASSERT(fields[2] == &patternText);
2081 }
2082 for(i = 0; i < numFields; i++) {
2083 utext_close(fields[i]);
2084 }
2085
2086
2087 /* Split with too few output strings available (3) */
2088 status = U_ZERO_ERROR;
2089 fields[0] = NULL;
2090 fields[1] = NULL;
2091 fields[2] = NULL;
2092 fields[3] = &patternText;
2093 numFields = uregex_splitUText(re, fields, 3, &status);
2094 TEST_ASSERT_SUCCESS(status);
2095
2096 /* The TEST_ASSERT_SUCCESS call above should change too... */
2097 if(U_SUCCESS(status)) {
2098 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2099 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2100 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2101 TEST_ASSERT(numFields == 3);
2102 TEST_ASSERT_UTEXT(str_first, fields[0]);
2103 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2104 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2105 TEST_ASSERT(fields[3] == &patternText);
2106 }
2107 for(i = 0; i < numFields; i++) {
2108 utext_close(fields[i]);
2109 }
2110
2111 /* Split with just enough output strings available (5) */
2112 status = U_ZERO_ERROR;
2113 fields[0] = NULL;
2114 fields[1] = NULL;
2115 fields[2] = NULL;
2116 fields[3] = NULL;
2117 fields[4] = NULL;
2118 fields[5] = &patternText;
2119 numFields = uregex_splitUText(re, fields, 5, &status);
2120 TEST_ASSERT_SUCCESS(status);
2121
2122 /* The TEST_ASSERT_SUCCESS call above should change too... */
2123 if(U_SUCCESS(status)) {
2124 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2125 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2126 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2127 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2128 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2129
2130 TEST_ASSERT(numFields == 5);
2131 TEST_ASSERT_UTEXT(str_first, fields[0]);
2132 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2133 TEST_ASSERT_UTEXT(str_second, fields[2]);
2134 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2135 TEST_ASSERT_UTEXT(str_third, fields[4]);
2136 TEST_ASSERT(fields[5] == &patternText);
2137 }
2138 for(i = 0; i < numFields; i++) {
2139 utext_close(fields[i]);
2140 }
2141
2142 /* Split, end of text is a field delimiter. */
2143 status = U_ZERO_ERROR;
2144 uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2145 TEST_ASSERT_SUCCESS(status);
2146
2147 /* The TEST_ASSERT_SUCCESS call above should change too... */
2148 if(U_SUCCESS(status)) {
2149 memset(fields, 0, sizeof(fields));
2150 fields[9] = &patternText;
2151 numFields = uregex_splitUText(re, fields, 9, &status);
2152 TEST_ASSERT_SUCCESS(status);
2153
2154 /* The TEST_ASSERT_SUCCESS call above should change too... */
2155 if(U_SUCCESS(status)) {
2156 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2157 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2158 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2159 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2160 const char str_empty[] = { 0x00 };
2161
2162 TEST_ASSERT(numFields == 5);
2163 TEST_ASSERT_UTEXT(str_first, fields[0]);
2164 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2165 TEST_ASSERT_UTEXT(str_second, fields[2]);
2166 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2167 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2168 TEST_ASSERT(fields[5] == NULL);
2169 TEST_ASSERT(fields[8] == NULL);
2170 TEST_ASSERT(fields[9] == &patternText);
2171 }
2172 for(i = 0; i < numFields; i++) {
2173 utext_close(fields[i]);
2174 }
2175 }
2176
2177 uregex_close(re);
2178 }
2179 utext_close(&patternText);
2180 }
2181
2182
TestRefreshInput(void)2183 static void TestRefreshInput(void) {
2184 /*
2185 * RefreshInput changes out the input of a URegularExpression without
2186 * changing anything else in the match state. Used with Java JNI,
2187 * when Java moves the underlying string storage. This test
2188 * runs a find() loop, moving the text after the first match.
2189 * The right number of matches should still be found.
2190 */
2191 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2192 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2193 UErrorCode status = U_ZERO_ERROR;
2194 URegularExpression *re;
2195 UText ut1 = UTEXT_INITIALIZER;
2196 UText ut2 = UTEXT_INITIALIZER;
2197
2198 re = uregex_openC("[ABC]", 0, 0, &status);
2199 TEST_ASSERT_SUCCESS(status);
2200
2201 utext_openUChars(&ut1, testStr, -1, &status);
2202 TEST_ASSERT_SUCCESS(status);
2203 uregex_setUText(re, &ut1, &status);
2204 TEST_ASSERT_SUCCESS(status);
2205
2206 /* Find the first match "A" in the original string */
2207 TEST_ASSERT(uregex_findNext(re, &status));
2208 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2209
2210 /* Move the string, kill the original string. */
2211 u_strcpy(movedStr, testStr);
2212 u_memset(testStr, 0, u_strlen(testStr));
2213 utext_openUChars(&ut2, movedStr, -1, &status);
2214 TEST_ASSERT_SUCCESS(status);
2215 uregex_refreshUText(re, &ut2, &status);
2216 TEST_ASSERT_SUCCESS(status);
2217
2218 /* Find the following two matches, now working in the moved string. */
2219 TEST_ASSERT(uregex_findNext(re, &status));
2220 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2221 TEST_ASSERT(uregex_findNext(re, &status));
2222 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2223 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2224
2225 uregex_close(re);
2226 }
2227
2228
TestBug8421(void)2229 static void TestBug8421(void) {
2230 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2231 * was failing.
2232 */
2233 URegularExpression *re;
2234 UErrorCode status = U_ZERO_ERROR;
2235 int32_t limit = -1;
2236
2237 re = uregex_openC("abc", 0, 0, &status);
2238 TEST_ASSERT_SUCCESS(status);
2239
2240 limit = uregex_getTimeLimit(re, &status);
2241 TEST_ASSERT_SUCCESS(status);
2242 TEST_ASSERT(limit == 0);
2243
2244 uregex_setTimeLimit(re, 100, &status);
2245 TEST_ASSERT_SUCCESS(status);
2246 limit = uregex_getTimeLimit(re, &status);
2247 TEST_ASSERT_SUCCESS(status);
2248 TEST_ASSERT(limit == 100);
2249
2250 uregex_close(re);
2251 }
2252
FindCallback(const void * context,int64_t matchIndex)2253 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2254 return FALSE;
2255 }
2256
MatchCallback(const void * context,int32_t steps)2257 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2258 return FALSE;
2259 }
2260
TestBug10815()2261 static void TestBug10815() {
2262 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2263 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2264 */
2265 URegularExpression *re;
2266 UErrorCode status = U_ZERO_ERROR;
2267 UChar text[100];
2268
2269
2270 // findNext() with a find progress callback function.
2271
2272 re = uregex_openC(".z", 0, 0, &status);
2273 TEST_ASSERT_SUCCESS(status);
2274
2275 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2276 uregex_setText(re, text, -1, &status);
2277 TEST_ASSERT_SUCCESS(status);
2278
2279 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2280 TEST_ASSERT_SUCCESS(status);
2281
2282 uregex_findNext(re, &status);
2283 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2284
2285 uregex_close(re);
2286
2287 // findNext() with a match progress callback function.
2288
2289 status = U_ZERO_ERROR;
2290 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2291 TEST_ASSERT_SUCCESS(status);
2292
2293 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2294 // it will appear to be stuck in a (near) infinite loop.
2295 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2296 uregex_setText(re, text, -1, &status);
2297 TEST_ASSERT_SUCCESS(status);
2298
2299 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2300 TEST_ASSERT_SUCCESS(status);
2301
2302 uregex_findNext(re, &status);
2303 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2304
2305 uregex_close(re);
2306 }
2307
2308
2309 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
2310