1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35
36 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
37 if (U_FAILURE(status)) { \
38 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
39 } \
40 } UPRV_BLOCK_MACRO_END
41
42 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
43 if ((expr)==FALSE) { \
44 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
45 } \
46 } UPRV_BLOCK_MACRO_END
47
48 /*
49 * TEST_SETUP and TEST_TEARDOWN
50 * macros to handle the boilerplate around setting up regex test cases.
51 * parameteres to setup:
52 * pattern: The regex pattern, a (char *) null terminated C string.
53 * testString: The string data, also a (char *) C string.
54 * flags: Regex flags to set when compiling the pattern
55 *
56 * Put arbitrary test code between SETUP and TEARDOWN.
57 * 're" is the compiled, ready-to-go regular expression.
58 */
59 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
60 UChar *srcString = NULL; \
61 status = U_ZERO_ERROR; \
62 re = uregex_openC(pattern, flags, NULL, &status); \
63 TEST_ASSERT_SUCCESS(status); \
64 int32_t testStringLen = (int32_t)strlen(testString); \
65 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
66 u_uastrncpy(srcString, testString, testStringLen + 1); \
67 uregex_setText(re, srcString, -1, &status); \
68 TEST_ASSERT_SUCCESS(status); \
69 if (U_SUCCESS(status)) { \
70 UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
71
72 #define TEST_TEARDOWN \
73 } \
74 TEST_ASSERT_SUCCESS(status); \
75 uregex_close(re); \
76 free(srcString); \
77 } UPRV_BLOCK_MACRO_END
78
79
80 /**
81 * @param expected utf-8 array of bytes to be expected
82 */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)83 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
84 char buf_inside_macro[120];
85 int32_t len = (int32_t)strlen(expected);
86 UBool success;
87 if (nulTerm) {
88 u_austrncpy(buf_inside_macro, (actual), len+1);
89 buf_inside_macro[len+2] = 0;
90 success = (strcmp((expected), buf_inside_macro) == 0);
91 } else {
92 u_austrncpy(buf_inside_macro, (actual), len);
93 buf_inside_macro[len+1] = 0;
94 success = (strncmp((expected), buf_inside_macro, len) == 0);
95 }
96 if (success == FALSE) {
97 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
98 file, line, (expected), buf_inside_macro);
99 }
100 }
101
102 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
103
104
equals_utf8_utext(const char * utf8,UText * utext)105 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
106 int32_t u8i = 0;
107 UChar32 u8c = 0;
108 UChar32 utc = 0;
109 UBool stringsEqual = TRUE;
110 utext_setNativeIndex(utext, 0);
111 for (;;) {
112 U8_NEXT_UNSAFE(utf8, u8i, u8c);
113 utc = utext_next32(utext);
114 if (u8c == 0 && utc == U_SENTINEL) {
115 break;
116 }
117 if (u8c != utc || u8c == 0) {
118 stringsEqual = FALSE;
119 break;
120 }
121 }
122 return stringsEqual;
123 }
124
125
test_assert_utext(const char * expected,UText * actual,const char * file,int line)126 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
127 utext_setNativeIndex(actual, 0);
128 if (!equals_utf8_utext(expected, actual)) {
129 UChar32 c;
130 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
131 c = utext_next32From(actual, 0);
132 while (c != U_SENTINEL) {
133 if (0x20<c && c <0x7e) {
134 log_err("%c", c);
135 } else {
136 log_err("%#x", c);
137 }
138 c = UTEXT_NEXT32(actual);
139 }
140 log_err("\"\n");
141 }
142 }
143
144 /*
145 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
146 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
147 */
148 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
149
testUTextEqual(UText * uta,UText * utb)150 static UBool testUTextEqual(UText *uta, UText *utb) {
151 UChar32 ca = 0;
152 UChar32 cb = 0;
153 utext_setNativeIndex(uta, 0);
154 utext_setNativeIndex(utb, 0);
155 do {
156 ca = utext_next32(uta);
157 cb = utext_next32(utb);
158 if (ca != cb) {
159 break;
160 }
161 } while (ca != U_SENTINEL);
162 return ca == cb;
163 }
164
165
166
167
168 static void TestRegexCAPI(void);
169 static void TestBug4315(void);
170 static void TestUTextAPI(void);
171 static void TestRefreshInput(void);
172 static void TestBug8421(void);
173 static void TestBug10815(void);
174
175 void addURegexTest(TestNode** root);
176
addURegexTest(TestNode ** root)177 void addURegexTest(TestNode** root)
178 {
179 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
180 addTest(root, &TestBug4315, "regex/TestBug4315");
181 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
182 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
183 addTest(root, &TestBug8421, "regex/TestBug8421");
184 addTest(root, &TestBug10815, "regex/TestBug10815");
185 }
186
187 /*
188 * Call back function and context struct used for testing
189 * regular expression user callbacks. This test is mostly the same as
190 * the corresponding C++ test in intltest.
191 */
192 typedef struct callBackContext {
193 int32_t maxCalls;
194 int32_t numCalls;
195 int32_t lastSteps;
196 } callBackContext;
197
198 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)199 TestCallbackFn(const void *context, int32_t steps) {
200 callBackContext *info = (callBackContext *)context;
201 if (info->lastSteps+1 != steps) {
202 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
203 }
204 info->lastSteps = steps;
205 info->numCalls++;
206 return (info->numCalls < info->maxCalls);
207 }
208
209 /*
210 * Regular Expression C API Tests
211 */
TestRegexCAPI(void)212 static void TestRegexCAPI(void) {
213 UErrorCode status = U_ZERO_ERROR;
214 URegularExpression *re;
215 UChar pat[200];
216 UChar *minus1;
217
218 memset(&minus1, -1, sizeof(minus1));
219
220 /* Mimimalist open/close */
221 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
222 re = uregex_open(pat, -1, 0, 0, &status);
223 if (U_FAILURE(status)) {
224 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
225 return;
226 }
227 uregex_close(re);
228
229 /* Open with all flag values set */
230 status = U_ZERO_ERROR;
231 re = uregex_open(pat, -1,
232 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
233 0, &status);
234 TEST_ASSERT_SUCCESS(status);
235 uregex_close(re);
236
237 /* Open with an invalid flag */
238 status = U_ZERO_ERROR;
239 re = uregex_open(pat, -1, 0x40000000, 0, &status);
240 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
241 uregex_close(re);
242
243 /* Open with an unimplemented flag */
244 status = U_ZERO_ERROR;
245 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
246 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
247 uregex_close(re);
248
249 /* openC with an invalid parameter */
250 status = U_ZERO_ERROR;
251 re = uregex_openC(NULL,
252 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
253 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
254
255 /* openC with an invalid parameter */
256 status = U_USELESS_COLLATOR_ERROR;
257 re = uregex_openC(NULL,
258 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
259 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
260
261 /* openC open from a C string */
262 {
263 const UChar *p;
264 int32_t len;
265 status = U_ZERO_ERROR;
266 re = uregex_openC("abc*", 0, 0, &status);
267 TEST_ASSERT_SUCCESS(status);
268 p = uregex_pattern(re, &len, &status);
269 TEST_ASSERT_SUCCESS(status);
270
271 /* The TEST_ASSERT_SUCCESS above should change too... */
272 if(U_SUCCESS(status)) {
273 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
274 TEST_ASSERT(u_strcmp(pat, p) == 0);
275 TEST_ASSERT(len==(int32_t)strlen("abc*"));
276 }
277
278 uregex_close(re);
279
280 /* TODO: Open with ParseError parameter */
281 }
282
283 /*
284 * clone
285 */
286 {
287 URegularExpression *clone1;
288 URegularExpression *clone2;
289 URegularExpression *clone3;
290 UChar testString1[30];
291 UChar testString2[30];
292 UBool result;
293
294
295 status = U_ZERO_ERROR;
296 re = uregex_openC("abc*", 0, 0, &status);
297 TEST_ASSERT_SUCCESS(status);
298 clone1 = uregex_clone(re, &status);
299 TEST_ASSERT_SUCCESS(status);
300 TEST_ASSERT(clone1 != NULL);
301
302 status = U_ZERO_ERROR;
303 clone2 = uregex_clone(re, &status);
304 TEST_ASSERT_SUCCESS(status);
305 TEST_ASSERT(clone2 != NULL);
306 uregex_close(re);
307
308 status = U_ZERO_ERROR;
309 clone3 = uregex_clone(clone2, &status);
310 TEST_ASSERT_SUCCESS(status);
311 TEST_ASSERT(clone3 != NULL);
312
313 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
314 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
315
316 status = U_ZERO_ERROR;
317 uregex_setText(clone1, testString1, -1, &status);
318 TEST_ASSERT_SUCCESS(status);
319 result = uregex_lookingAt(clone1, 0, &status);
320 TEST_ASSERT_SUCCESS(status);
321 TEST_ASSERT(result==TRUE);
322
323 status = U_ZERO_ERROR;
324 uregex_setText(clone2, testString2, -1, &status);
325 TEST_ASSERT_SUCCESS(status);
326 result = uregex_lookingAt(clone2, 0, &status);
327 TEST_ASSERT_SUCCESS(status);
328 TEST_ASSERT(result==FALSE);
329 result = uregex_find(clone2, 0, &status);
330 TEST_ASSERT_SUCCESS(status);
331 TEST_ASSERT(result==TRUE);
332
333 uregex_close(clone1);
334 uregex_close(clone2);
335 uregex_close(clone3);
336
337 }
338
339 /*
340 * pattern()
341 */
342 {
343 const UChar *resultPat;
344 int32_t resultLen;
345 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
346 status = U_ZERO_ERROR;
347 re = uregex_open(pat, -1, 0, NULL, &status);
348 resultPat = uregex_pattern(re, &resultLen, &status);
349 TEST_ASSERT_SUCCESS(status);
350
351 /* The TEST_ASSERT_SUCCESS above should change too... */
352 if (U_SUCCESS(status)) {
353 TEST_ASSERT(resultLen == -1);
354 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
355 }
356
357 uregex_close(re);
358
359 status = U_ZERO_ERROR;
360 re = uregex_open(pat, 3, 0, NULL, &status);
361 resultPat = uregex_pattern(re, &resultLen, &status);
362 TEST_ASSERT_SUCCESS(status);
363 TEST_ASSERT_SUCCESS(status);
364
365 /* The TEST_ASSERT_SUCCESS above should change too... */
366 if (U_SUCCESS(status)) {
367 TEST_ASSERT(resultLen == 3);
368 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
369 TEST_ASSERT(u_strlen(resultPat) == 3);
370 }
371
372 uregex_close(re);
373 }
374
375 /*
376 * flags()
377 */
378 {
379 int32_t t;
380
381 status = U_ZERO_ERROR;
382 re = uregex_open(pat, -1, 0, NULL, &status);
383 t = uregex_flags(re, &status);
384 TEST_ASSERT_SUCCESS(status);
385 TEST_ASSERT(t == 0);
386 uregex_close(re);
387
388 status = U_ZERO_ERROR;
389 re = uregex_open(pat, -1, 0, NULL, &status);
390 t = uregex_flags(re, &status);
391 TEST_ASSERT_SUCCESS(status);
392 TEST_ASSERT(t == 0);
393 uregex_close(re);
394
395 status = U_ZERO_ERROR;
396 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
397 t = uregex_flags(re, &status);
398 TEST_ASSERT_SUCCESS(status);
399 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
400 uregex_close(re);
401 }
402
403 /*
404 * setText() and lookingAt()
405 */
406 {
407 UChar text1[50];
408 UChar text2[50];
409 UBool result;
410
411 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
412 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
413 status = U_ZERO_ERROR;
414 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
415 re = uregex_open(pat, -1, 0, NULL, &status);
416 TEST_ASSERT_SUCCESS(status);
417
418 /* Operation before doing a setText should fail... */
419 status = U_ZERO_ERROR;
420 uregex_lookingAt(re, 0, &status);
421 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
422
423 status = U_ZERO_ERROR;
424 uregex_setText(re, text1, -1, &status);
425 result = uregex_lookingAt(re, 0, &status);
426 TEST_ASSERT(result == TRUE);
427 TEST_ASSERT_SUCCESS(status);
428
429 status = U_ZERO_ERROR;
430 uregex_setText(re, text2, -1, &status);
431 result = uregex_lookingAt(re, 0, &status);
432 TEST_ASSERT(result == FALSE);
433 TEST_ASSERT_SUCCESS(status);
434
435 status = U_ZERO_ERROR;
436 uregex_setText(re, text1, -1, &status);
437 result = uregex_lookingAt(re, 0, &status);
438 TEST_ASSERT(result == TRUE);
439 TEST_ASSERT_SUCCESS(status);
440
441 status = U_ZERO_ERROR;
442 uregex_setText(re, text1, 5, &status);
443 result = uregex_lookingAt(re, 0, &status);
444 TEST_ASSERT(result == FALSE);
445 TEST_ASSERT_SUCCESS(status);
446
447 status = U_ZERO_ERROR;
448 uregex_setText(re, text1, 6, &status);
449 result = uregex_lookingAt(re, 0, &status);
450 TEST_ASSERT(result == TRUE);
451 TEST_ASSERT_SUCCESS(status);
452
453 uregex_close(re);
454 }
455
456
457 /*
458 * getText()
459 */
460 {
461 UChar text1[50];
462 UChar text2[50];
463 const UChar *result;
464 int32_t textLength;
465
466 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
467 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
468 status = U_ZERO_ERROR;
469 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
470 re = uregex_open(pat, -1, 0, NULL, &status);
471
472 uregex_setText(re, text1, -1, &status);
473 result = uregex_getText(re, &textLength, &status);
474 TEST_ASSERT(result == text1);
475 TEST_ASSERT(textLength == -1);
476 TEST_ASSERT_SUCCESS(status);
477
478 status = U_ZERO_ERROR;
479 uregex_setText(re, text2, 7, &status);
480 result = uregex_getText(re, &textLength, &status);
481 TEST_ASSERT(result == text2);
482 TEST_ASSERT(textLength == 7);
483 TEST_ASSERT_SUCCESS(status);
484
485 status = U_ZERO_ERROR;
486 uregex_setText(re, text2, 4, &status);
487 result = uregex_getText(re, &textLength, &status);
488 TEST_ASSERT(result == text2);
489 TEST_ASSERT(textLength == 4);
490 TEST_ASSERT_SUCCESS(status);
491 uregex_close(re);
492 }
493
494 /*
495 * matches()
496 */
497 {
498 UChar text1[50];
499 UBool result;
500 int len;
501 UChar nullString[] = {0,0,0};
502
503 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
504 status = U_ZERO_ERROR;
505 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
506 re = uregex_open(pat, -1, 0, NULL, &status);
507
508 uregex_setText(re, text1, -1, &status);
509 result = uregex_matches(re, 0, &status);
510 TEST_ASSERT(result == FALSE);
511 TEST_ASSERT_SUCCESS(status);
512
513 status = U_ZERO_ERROR;
514 uregex_setText(re, text1, 6, &status);
515 result = uregex_matches(re, 0, &status);
516 TEST_ASSERT(result == TRUE);
517 TEST_ASSERT_SUCCESS(status);
518
519 status = U_ZERO_ERROR;
520 uregex_setText(re, text1, 6, &status);
521 result = uregex_matches(re, 1, &status);
522 TEST_ASSERT(result == FALSE);
523 TEST_ASSERT_SUCCESS(status);
524 uregex_close(re);
525
526 status = U_ZERO_ERROR;
527 re = uregex_openC(".?", 0, NULL, &status);
528 uregex_setText(re, text1, -1, &status);
529 len = u_strlen(text1);
530 result = uregex_matches(re, len, &status);
531 TEST_ASSERT(result == TRUE);
532 TEST_ASSERT_SUCCESS(status);
533
534 status = U_ZERO_ERROR;
535 uregex_setText(re, nullString, -1, &status);
536 TEST_ASSERT_SUCCESS(status);
537 result = uregex_matches(re, 0, &status);
538 TEST_ASSERT(result == TRUE);
539 TEST_ASSERT_SUCCESS(status);
540 uregex_close(re);
541 }
542
543
544 /*
545 * lookingAt() Used in setText test.
546 */
547
548
549 /*
550 * find(), findNext, start, end, reset
551 */
552 {
553 UChar text1[50];
554 UBool result;
555 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
556 status = U_ZERO_ERROR;
557 re = uregex_openC("rx", 0, NULL, &status);
558
559 uregex_setText(re, text1, -1, &status);
560 result = uregex_find(re, 0, &status);
561 TEST_ASSERT(result == TRUE);
562 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
563 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
564 TEST_ASSERT_SUCCESS(status);
565
566 result = uregex_find(re, 9, &status);
567 TEST_ASSERT(result == TRUE);
568 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
569 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
570 TEST_ASSERT_SUCCESS(status);
571
572 result = uregex_find(re, 14, &status);
573 TEST_ASSERT(result == FALSE);
574 TEST_ASSERT_SUCCESS(status);
575
576 status = U_ZERO_ERROR;
577 uregex_reset(re, 0, &status);
578
579 result = uregex_findNext(re, &status);
580 TEST_ASSERT(result == TRUE);
581 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
582 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
583 TEST_ASSERT_SUCCESS(status);
584
585 result = uregex_findNext(re, &status);
586 TEST_ASSERT(result == TRUE);
587 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
588 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
589 TEST_ASSERT_SUCCESS(status);
590
591 status = U_ZERO_ERROR;
592 uregex_reset(re, 12, &status);
593
594 result = uregex_findNext(re, &status);
595 TEST_ASSERT(result == TRUE);
596 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
597 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
598 TEST_ASSERT_SUCCESS(status);
599
600 result = uregex_findNext(re, &status);
601 TEST_ASSERT(result == FALSE);
602 TEST_ASSERT_SUCCESS(status);
603
604 uregex_close(re);
605 }
606
607 /*
608 * groupCount
609 */
610 {
611 int32_t result;
612
613 status = U_ZERO_ERROR;
614 re = uregex_openC("abc", 0, NULL, &status);
615 result = uregex_groupCount(re, &status);
616 TEST_ASSERT_SUCCESS(status);
617 TEST_ASSERT(result == 0);
618 uregex_close(re);
619
620 status = U_ZERO_ERROR;
621 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
622 result = uregex_groupCount(re, &status);
623 TEST_ASSERT_SUCCESS(status);
624 TEST_ASSERT(result == 3);
625 uregex_close(re);
626
627 }
628
629
630 /*
631 * group()
632 */
633 {
634 UChar text1[80];
635 UChar buf[80];
636 UBool result;
637 int32_t resultSz;
638 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
639
640 status = U_ZERO_ERROR;
641 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
642 TEST_ASSERT_SUCCESS(status);
643
644
645 uregex_setText(re, text1, -1, &status);
646 result = uregex_find(re, 0, &status);
647 TEST_ASSERT(result==TRUE);
648
649 /* Capture Group 0, the full match. Should succeed. */
650 status = U_ZERO_ERROR;
651 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
652 TEST_ASSERT_SUCCESS(status);
653 TEST_ASSERT_STRING("abc interior def", buf, TRUE);
654 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
655
656 /* Capture group #1. Should succeed. */
657 status = U_ZERO_ERROR;
658 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
659 TEST_ASSERT_SUCCESS(status);
660 TEST_ASSERT_STRING(" interior ", buf, TRUE);
661 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
662
663 /* Capture group out of range. Error. */
664 status = U_ZERO_ERROR;
665 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
666 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
667
668 /* NULL buffer, pure pre-flight */
669 status = U_ZERO_ERROR;
670 resultSz = uregex_group(re, 0, NULL, 0, &status);
671 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
672 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
673
674 /* Too small buffer, truncated string */
675 status = U_ZERO_ERROR;
676 memset(buf, -1, sizeof(buf));
677 resultSz = uregex_group(re, 0, buf, 5, &status);
678 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
679 TEST_ASSERT_STRING("abc i", buf, FALSE);
680 TEST_ASSERT(buf[5] == (UChar)0xffff);
681 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
682
683 /* Output string just fits buffer, no NUL term. */
684 status = U_ZERO_ERROR;
685 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
686 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
687 TEST_ASSERT_STRING("abc interior def", buf, FALSE);
688 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
689 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
690
691 uregex_close(re);
692
693 }
694
695 /*
696 * Regions
697 */
698
699
700 /* SetRegion(), getRegion() do something */
701 TEST_SETUP(".*", "0123456789ABCDEF", 0);
702 UChar resultString[40];
703 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
704 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
705 uregex_setRegion(re, 3, 6, &status);
706 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
707 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
708 TEST_ASSERT(uregex_findNext(re, &status));
709 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
710 TEST_ASSERT_STRING("345", resultString, TRUE);
711 TEST_TEARDOWN;
712
713 /* find(start=-1) uses regions */
714 TEST_SETUP(".*", "0123456789ABCDEF", 0);
715 uregex_setRegion(re, 4, 6, &status);
716 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
717 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
718 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
719 TEST_TEARDOWN;
720
721 /* find (start >=0) does not use regions */
722 TEST_SETUP(".*", "0123456789ABCDEF", 0);
723 uregex_setRegion(re, 4, 6, &status);
724 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
725 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
726 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
727 TEST_TEARDOWN;
728
729 /* findNext() obeys regions */
730 TEST_SETUP(".", "0123456789ABCDEF", 0);
731 uregex_setRegion(re, 4, 6, &status);
732 TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
733 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
734 TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
735 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
736 TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
737 TEST_TEARDOWN;
738
739 /* matches(start=-1) uses regions */
740 /* Also, verify that non-greedy *? succeeds in finding the full match. */
741 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
742 uregex_setRegion(re, 4, 6, &status);
743 TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
744 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
745 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
746 TEST_TEARDOWN;
747
748 /* matches (start >=0) does not use regions */
749 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
750 uregex_setRegion(re, 4, 6, &status);
751 TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
752 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
753 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
754 TEST_TEARDOWN;
755
756 /* lookingAt(start=-1) uses regions */
757 /* Also, verify that non-greedy *? finds the first (shortest) match. */
758 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
759 uregex_setRegion(re, 4, 6, &status);
760 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
761 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
762 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
763 TEST_TEARDOWN;
764
765 /* lookingAt (start >=0) does not use regions */
766 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
767 uregex_setRegion(re, 4, 6, &status);
768 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
769 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
770 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
771 TEST_TEARDOWN;
772
773 /* hitEnd() */
774 TEST_SETUP("[a-f]*", "abcdefghij", 0);
775 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776 TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
777 TEST_TEARDOWN;
778
779 TEST_SETUP("[a-f]*", "abcdef", 0);
780 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781 TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
782 TEST_TEARDOWN;
783
784 /* requireEnd */
785 TEST_SETUP("abcd", "abcd", 0);
786 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
787 TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
788 TEST_TEARDOWN;
789
790 TEST_SETUP("abcd$", "abcd", 0);
791 TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
792 TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
793 TEST_TEARDOWN;
794
795 /* anchoringBounds */
796 TEST_SETUP("abc$", "abcdef", 0);
797 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
798 uregex_useAnchoringBounds(re, FALSE, &status);
799 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
800
801 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
802 uregex_useAnchoringBounds(re, TRUE, &status);
803 uregex_setRegion(re, 0, 3, &status);
804 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
805 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
806 TEST_TEARDOWN;
807
808 /* Transparent Bounds */
809 TEST_SETUP("abc(?=def)", "abcdef", 0);
810 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
811 uregex_useTransparentBounds(re, TRUE, &status);
812 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
813
814 uregex_useTransparentBounds(re, FALSE, &status);
815 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* No Region */
816 uregex_setRegion(re, 0, 3, &status);
817 TEST_ASSERT(uregex_find(re, -1, &status) == FALSE); /* with region, opaque bounds */
818 uregex_useTransparentBounds(re, TRUE, &status);
819 TEST_ASSERT(uregex_find(re, -1, &status) == TRUE); /* with region, transparent bounds */
820 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
821 TEST_TEARDOWN;
822
823
824 /*
825 * replaceFirst()
826 */
827 {
828 UChar text1[80];
829 UChar text2[80];
830 UChar replText[80];
831 UChar buf[80];
832 int32_t resultSz;
833 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
834 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
835 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
836
837 status = U_ZERO_ERROR;
838 re = uregex_openC("x(.*?)x", 0, NULL, &status);
839 TEST_ASSERT_SUCCESS(status);
840
841 /* Normal case, with match */
842 uregex_setText(re, text1, -1, &status);
843 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
844 TEST_ASSERT_SUCCESS(status);
845 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
846 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
847
848 /* No match. Text should copy to output with no changes. */
849 status = U_ZERO_ERROR;
850 uregex_setText(re, text2, -1, &status);
851 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
852 TEST_ASSERT_SUCCESS(status);
853 TEST_ASSERT_STRING("No match here.", buf, TRUE);
854 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
855
856 /* Match, output just fills buffer, no termination warning. */
857 status = U_ZERO_ERROR;
858 uregex_setText(re, text1, -1, &status);
859 memset(buf, -1, sizeof(buf));
860 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
861 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
862 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
863 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
864 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
865
866 /* Do the replaceFirst again, without first resetting anything.
867 * Should give the same results.
868 */
869 status = U_ZERO_ERROR;
870 memset(buf, -1, sizeof(buf));
871 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
872 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
873 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
874 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
875 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
876
877 /* NULL buffer, zero buffer length */
878 status = U_ZERO_ERROR;
879 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
880 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
881 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
882
883 /* Buffer too small by one */
884 status = U_ZERO_ERROR;
885 memset(buf, -1, sizeof(buf));
886 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
887 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
888 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
889 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
890 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
891
892 uregex_close(re);
893 }
894
895
896 /*
897 * replaceAll()
898 */
899 {
900 UChar text1[80]; /* "Replace xaax x1x x...x." */
901 UChar text2[80]; /* "No match Here" */
902 UChar replText[80]; /* "<$1>" */
903 UChar replText2[80]; /* "<<$1>>" */
904 const char * pattern = "x(.*?)x";
905 const char * expectedResult = "Replace <aa> <1> <...>.";
906 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
907 UChar buf[80];
908 int32_t resultSize;
909 int32_t expectedResultSize;
910 int32_t expectedResultSize2;
911 int32_t i;
912
913 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
914 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
915 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
916 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
917 expectedResultSize = (int32_t)strlen(expectedResult);
918 expectedResultSize2 = (int32_t)strlen(expectedResult2);
919
920 status = U_ZERO_ERROR;
921 re = uregex_openC(pattern, 0, NULL, &status);
922 TEST_ASSERT_SUCCESS(status);
923
924 /* Normal case, with match */
925 uregex_setText(re, text1, -1, &status);
926 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
927 TEST_ASSERT_SUCCESS(status);
928 TEST_ASSERT_STRING(expectedResult, buf, TRUE);
929 TEST_ASSERT(resultSize == expectedResultSize);
930
931 /* No match. Text should copy to output with no changes. */
932 status = U_ZERO_ERROR;
933 uregex_setText(re, text2, -1, &status);
934 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
935 TEST_ASSERT_SUCCESS(status);
936 TEST_ASSERT_STRING("No match here.", buf, TRUE);
937 TEST_ASSERT(resultSize == u_strlen(text2));
938
939 /* Match, output just fills buffer, no termination warning. */
940 status = U_ZERO_ERROR;
941 uregex_setText(re, text1, -1, &status);
942 memset(buf, -1, sizeof(buf));
943 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
944 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
945 TEST_ASSERT_STRING(expectedResult, buf, FALSE);
946 TEST_ASSERT(resultSize == expectedResultSize);
947 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
948
949 /* Do the replaceFirst again, without first resetting anything.
950 * Should give the same results.
951 */
952 status = U_ZERO_ERROR;
953 memset(buf, -1, sizeof(buf));
954 resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
955 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
956 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
957 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
958 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
959
960 /* NULL buffer, zero buffer length */
961 status = U_ZERO_ERROR;
962 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
963 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
964 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
965
966 /* Buffer too small. Try every size, which will tickle edge cases
967 * in uregex_appendReplacement (used by replaceAll) */
968 for (i=0; i<expectedResultSize; i++) {
969 char expected[80];
970 status = U_ZERO_ERROR;
971 memset(buf, -1, sizeof(buf));
972 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
973 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
974 strcpy(expected, expectedResult);
975 expected[i] = 0;
976 TEST_ASSERT_STRING(expected, buf, FALSE);
977 TEST_ASSERT(resultSize == expectedResultSize);
978 TEST_ASSERT(buf[i] == (UChar)0xffff);
979 }
980
981 /* Buffer too small. Same as previous test, except this time the replacement
982 * text is longer than the match capture group, making the length of the complete
983 * replacement longer than the original string.
984 */
985 for (i=0; i<expectedResultSize2; i++) {
986 char expected[80];
987 status = U_ZERO_ERROR;
988 memset(buf, -1, sizeof(buf));
989 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
990 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
991 strcpy(expected, expectedResult2);
992 expected[i] = 0;
993 TEST_ASSERT_STRING(expected, buf, FALSE);
994 TEST_ASSERT(resultSize == expectedResultSize2);
995 TEST_ASSERT(buf[i] == (UChar)0xffff);
996 }
997
998
999 uregex_close(re);
1000 }
1001
1002
1003 /*
1004 * appendReplacement()
1005 */
1006 {
1007 UChar text[100];
1008 UChar repl[100];
1009 UChar buf[100];
1010 UChar *bufPtr;
1011 int32_t bufCap;
1012
1013
1014 status = U_ZERO_ERROR;
1015 re = uregex_openC(".*", 0, 0, &status);
1016 TEST_ASSERT_SUCCESS(status);
1017
1018 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1019 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1020 uregex_setText(re, text, -1, &status);
1021
1022 /* match covers whole target string */
1023 uregex_find(re, 0, &status);
1024 TEST_ASSERT_SUCCESS(status);
1025 bufPtr = buf;
1026 bufCap = UPRV_LENGTHOF(buf);
1027 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1028 TEST_ASSERT_SUCCESS(status);
1029 TEST_ASSERT_STRING("some other", buf, TRUE);
1030
1031 /* Match has \u \U escapes */
1032 uregex_find(re, 0, &status);
1033 TEST_ASSERT_SUCCESS(status);
1034 bufPtr = buf;
1035 bufCap = UPRV_LENGTHOF(buf);
1036 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1037 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1038 TEST_ASSERT_SUCCESS(status);
1039 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1040
1041 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1042 status = U_ZERO_ERROR;
1043 uregex_find(re, 0, &status);
1044 TEST_ASSERT_SUCCESS(status);
1045 bufPtr = buf;
1046 status = U_BUFFER_OVERFLOW_ERROR;
1047 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1048 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1049
1050 uregex_close(re);
1051 }
1052
1053
1054 /*
1055 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1056 */
1057
1058 /*
1059 * split()
1060 */
1061 {
1062 UChar textToSplit[80];
1063 UChar text2[80];
1064 UChar buf[200];
1065 UChar *fields[10];
1066 int32_t numFields;
1067 int32_t requiredCapacity;
1068 int32_t spaceNeeded;
1069 int32_t sz;
1070
1071 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1072 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1073
1074 status = U_ZERO_ERROR;
1075 re = uregex_openC(":", 0, NULL, &status);
1076
1077
1078 /* Simple split */
1079
1080 uregex_setText(re, textToSplit, -1, &status);
1081 TEST_ASSERT_SUCCESS(status);
1082
1083 /* The TEST_ASSERT_SUCCESS call above should change too... */
1084 if (U_SUCCESS(status)) {
1085 memset(fields, -1, sizeof(fields));
1086 numFields =
1087 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1088 TEST_ASSERT_SUCCESS(status);
1089
1090 /* The TEST_ASSERT_SUCCESS call above should change too... */
1091 if(U_SUCCESS(status)) {
1092 TEST_ASSERT(numFields == 3);
1093 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1094 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1095 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1096 TEST_ASSERT(fields[3] == NULL);
1097
1098 spaceNeeded = u_strlen(textToSplit) -
1099 (numFields - 1) + /* Field delimiters do not appear in output */
1100 numFields; /* Each field gets a NUL terminator */
1101
1102 TEST_ASSERT(spaceNeeded == requiredCapacity);
1103 }
1104 }
1105
1106 uregex_close(re);
1107
1108
1109 /* Split with too few output strings available */
1110 status = U_ZERO_ERROR;
1111 re = uregex_openC(":", 0, NULL, &status);
1112 uregex_setText(re, textToSplit, -1, &status);
1113 TEST_ASSERT_SUCCESS(status);
1114
1115 /* The TEST_ASSERT_SUCCESS call above should change too... */
1116 if(U_SUCCESS(status)) {
1117 memset(fields, -1, sizeof(fields));
1118 numFields =
1119 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1120 TEST_ASSERT_SUCCESS(status);
1121
1122 /* The TEST_ASSERT_SUCCESS call above should change too... */
1123 if(U_SUCCESS(status)) {
1124 TEST_ASSERT(numFields == 2);
1125 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1126 TEST_ASSERT_STRING(" second: third", fields[1], TRUE);
1127 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1128
1129 spaceNeeded = u_strlen(textToSplit) -
1130 (numFields - 1) + /* Field delimiters do not appear in output */
1131 numFields; /* Each field gets a NUL terminator */
1132
1133 TEST_ASSERT(spaceNeeded == requiredCapacity);
1134
1135 /* Split with a range of output buffer sizes. */
1136 spaceNeeded = u_strlen(textToSplit) -
1137 (numFields - 1) + /* Field delimiters do not appear in output */
1138 numFields; /* Each field gets a NUL terminator */
1139
1140 for (sz=0; sz < spaceNeeded+1; sz++) {
1141 memset(fields, -1, sizeof(fields));
1142 status = U_ZERO_ERROR;
1143 numFields =
1144 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1145 if (sz >= spaceNeeded) {
1146 TEST_ASSERT_SUCCESS(status);
1147 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1148 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1149 TEST_ASSERT_STRING(" third", fields[2], TRUE);
1150 } else {
1151 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1152 }
1153 TEST_ASSERT(numFields == 3);
1154 TEST_ASSERT(fields[3] == NULL);
1155 TEST_ASSERT(spaceNeeded == requiredCapacity);
1156 }
1157 }
1158 }
1159
1160 uregex_close(re);
1161 }
1162
1163
1164
1165
1166 /* Split(), part 2. Patterns with capture groups. The capture group text
1167 * comes out as additional fields. */
1168 {
1169 UChar textToSplit[80];
1170 UChar buf[200];
1171 UChar *fields[10];
1172 int32_t numFields;
1173 int32_t requiredCapacity;
1174 int32_t spaceNeeded;
1175 int32_t sz;
1176
1177 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1178
1179 status = U_ZERO_ERROR;
1180 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1181
1182 uregex_setText(re, textToSplit, -1, &status);
1183 TEST_ASSERT_SUCCESS(status);
1184
1185 /* The TEST_ASSERT_SUCCESS call above should change too... */
1186 if(U_SUCCESS(status)) {
1187 memset(fields, -1, sizeof(fields));
1188 numFields =
1189 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1190 TEST_ASSERT_SUCCESS(status);
1191
1192 /* The TEST_ASSERT_SUCCESS call above should change too... */
1193 if(U_SUCCESS(status)) {
1194 TEST_ASSERT(numFields == 5);
1195 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1196 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1197 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1198 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1199 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1200 TEST_ASSERT(fields[5] == NULL);
1201 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1202 TEST_ASSERT(spaceNeeded == requiredCapacity);
1203 }
1204 }
1205
1206 /* Split with too few output strings available (2) */
1207 status = U_ZERO_ERROR;
1208 memset(fields, -1, sizeof(fields));
1209 numFields =
1210 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1211 TEST_ASSERT_SUCCESS(status);
1212
1213 /* The TEST_ASSERT_SUCCESS call above should change too... */
1214 if(U_SUCCESS(status)) {
1215 TEST_ASSERT(numFields == 2);
1216 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1217 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE);
1218 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1219
1220 spaceNeeded = (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1221 TEST_ASSERT(spaceNeeded == requiredCapacity);
1222 }
1223
1224 /* Split with too few output strings available (3) */
1225 status = U_ZERO_ERROR;
1226 memset(fields, -1, sizeof(fields));
1227 numFields =
1228 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1229 TEST_ASSERT_SUCCESS(status);
1230
1231 /* The TEST_ASSERT_SUCCESS call above should change too... */
1232 if(U_SUCCESS(status)) {
1233 TEST_ASSERT(numFields == 3);
1234 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1235 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1236 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE);
1237 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1238
1239 spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1240 TEST_ASSERT(spaceNeeded == requiredCapacity);
1241 }
1242
1243 /* Split with just enough output strings available (5) */
1244 status = U_ZERO_ERROR;
1245 memset(fields, -1, sizeof(fields));
1246 numFields =
1247 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1248 TEST_ASSERT_SUCCESS(status);
1249
1250 /* The TEST_ASSERT_SUCCESS call above should change too... */
1251 if(U_SUCCESS(status)) {
1252 TEST_ASSERT(numFields == 5);
1253 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1254 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1255 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1256 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1257 TEST_ASSERT_STRING(" third", fields[4], TRUE);
1258 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1259
1260 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1261 TEST_ASSERT(spaceNeeded == requiredCapacity);
1262 }
1263
1264 /* Split, end of text is a field delimiter. */
1265 status = U_ZERO_ERROR;
1266 sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1267 uregex_setText(re, textToSplit, sz, &status);
1268 TEST_ASSERT_SUCCESS(status);
1269
1270 /* The TEST_ASSERT_SUCCESS call above should change too... */
1271 if(U_SUCCESS(status)) {
1272 memset(fields, -1, sizeof(fields));
1273 numFields =
1274 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1275 TEST_ASSERT_SUCCESS(status);
1276
1277 /* The TEST_ASSERT_SUCCESS call above should change too... */
1278 if(U_SUCCESS(status)) {
1279 TEST_ASSERT(numFields == 5);
1280 TEST_ASSERT_STRING("first ", fields[0], TRUE);
1281 TEST_ASSERT_STRING("tag-a", fields[1], TRUE);
1282 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1283 TEST_ASSERT_STRING("tag-b", fields[3], TRUE);
1284 TEST_ASSERT_STRING("", fields[4], TRUE);
1285 TEST_ASSERT(fields[5] == NULL);
1286 TEST_ASSERT(fields[8] == NULL);
1287 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1288 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1289 TEST_ASSERT(spaceNeeded == requiredCapacity);
1290 }
1291 }
1292
1293 uregex_close(re);
1294 }
1295
1296 /*
1297 * set/getTimeLimit
1298 */
1299 TEST_SETUP("abc$", "abcdef", 0);
1300 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1301 uregex_setTimeLimit(re, 1000, &status);
1302 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1303 TEST_ASSERT_SUCCESS(status);
1304 uregex_setTimeLimit(re, -1, &status);
1305 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1306 status = U_ZERO_ERROR;
1307 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1308 TEST_TEARDOWN;
1309
1310 /*
1311 * set/get Stack Limit
1312 */
1313 TEST_SETUP("abc$", "abcdef", 0);
1314 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1315 uregex_setStackLimit(re, 40000, &status);
1316 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1317 TEST_ASSERT_SUCCESS(status);
1318 uregex_setStackLimit(re, -1, &status);
1319 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1320 status = U_ZERO_ERROR;
1321 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1322 TEST_TEARDOWN;
1323
1324
1325 /*
1326 * Get/Set callback functions
1327 * This test is copied from intltest regex/Callbacks
1328 * The pattern and test data will run long enough to cause the callback
1329 * to be invoked. The nested '+' operators give exponential time
1330 * behavior with increasing string length.
1331 */
1332 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1333 callBackContext cbInfo = {4, 0, 0};
1334 const void *pContext = &cbInfo;
1335 URegexMatchCallback *returnedFn = &TestCallbackFn;
1336
1337 /* Getting the callback fn when it hasn't been set must return NULL */
1338 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1339 TEST_ASSERT_SUCCESS(status);
1340 TEST_ASSERT(returnedFn == NULL);
1341 TEST_ASSERT(pContext == NULL);
1342
1343 /* Set thecallback and do a match. */
1344 /* The callback function should record that it has been called. */
1345 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1346 TEST_ASSERT_SUCCESS(status);
1347 TEST_ASSERT(cbInfo.numCalls == 0);
1348 TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1349 TEST_ASSERT_SUCCESS(status);
1350 TEST_ASSERT(cbInfo.numCalls > 0);
1351
1352 /* Getting the callback should return the values that were set above. */
1353 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1354 TEST_ASSERT(returnedFn == &TestCallbackFn);
1355 TEST_ASSERT(pContext == &cbInfo);
1356
1357 TEST_TEARDOWN;
1358 }
1359
1360
1361
TestBug4315(void)1362 static void TestBug4315(void) {
1363 UErrorCode theICUError = U_ZERO_ERROR;
1364 URegularExpression *theRegEx;
1365 UChar *textBuff;
1366 const char *thePattern;
1367 UChar theString[100];
1368 UChar *destFields[24];
1369 int32_t neededLength1;
1370 int32_t neededLength2;
1371
1372 int32_t wordCount = 0;
1373 int32_t destFieldsSize = 24;
1374
1375 thePattern = "ck ";
1376 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1377
1378 /* open a regex */
1379 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1380 TEST_ASSERT_SUCCESS(theICUError);
1381
1382 /* set the input string */
1383 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1384 TEST_ASSERT_SUCCESS(theICUError);
1385
1386 /* split */
1387 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1388 * error occurs! */
1389 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1390 destFieldsSize, &theICUError);
1391
1392 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1393 TEST_ASSERT(wordCount==3);
1394
1395 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1396 {
1397 theICUError = U_ZERO_ERROR;
1398 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1399 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1400 destFields, destFieldsSize, &theICUError);
1401 TEST_ASSERT(wordCount==3);
1402 TEST_ASSERT_SUCCESS(theICUError);
1403 TEST_ASSERT(neededLength1 == neededLength2);
1404 TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1405 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1406 TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1407 TEST_ASSERT(destFields[3] == NULL);
1408 free(textBuff);
1409 }
1410 uregex_close(theRegEx);
1411 }
1412
1413 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1414 static void TestUTextAPI(void) {
1415 UErrorCode status = U_ZERO_ERROR;
1416 URegularExpression *re;
1417 UText patternText = UTEXT_INITIALIZER;
1418 UChar pat[200];
1419 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1420
1421 /* Mimimalist open/close */
1422 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1423 re = uregex_openUText(&patternText, 0, 0, &status);
1424 if (U_FAILURE(status)) {
1425 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1426 utext_close(&patternText);
1427 return;
1428 }
1429 uregex_close(re);
1430
1431 /* Open with all flag values set */
1432 status = U_ZERO_ERROR;
1433 re = uregex_openUText(&patternText,
1434 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1435 0, &status);
1436 TEST_ASSERT_SUCCESS(status);
1437 uregex_close(re);
1438
1439 /* Open with an invalid flag */
1440 status = U_ZERO_ERROR;
1441 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1442 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1443 uregex_close(re);
1444
1445 /* open with an invalid parameter */
1446 status = U_ZERO_ERROR;
1447 re = uregex_openUText(NULL,
1448 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1449 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1450
1451 /*
1452 * clone
1453 */
1454 {
1455 URegularExpression *clone1;
1456 URegularExpression *clone2;
1457 URegularExpression *clone3;
1458 UChar testString1[30];
1459 UChar testString2[30];
1460 UBool result;
1461
1462
1463 status = U_ZERO_ERROR;
1464 re = uregex_openUText(&patternText, 0, 0, &status);
1465 TEST_ASSERT_SUCCESS(status);
1466 clone1 = uregex_clone(re, &status);
1467 TEST_ASSERT_SUCCESS(status);
1468 TEST_ASSERT(clone1 != NULL);
1469
1470 status = U_ZERO_ERROR;
1471 clone2 = uregex_clone(re, &status);
1472 TEST_ASSERT_SUCCESS(status);
1473 TEST_ASSERT(clone2 != NULL);
1474 uregex_close(re);
1475
1476 status = U_ZERO_ERROR;
1477 clone3 = uregex_clone(clone2, &status);
1478 TEST_ASSERT_SUCCESS(status);
1479 TEST_ASSERT(clone3 != NULL);
1480
1481 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1482 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1483
1484 status = U_ZERO_ERROR;
1485 uregex_setText(clone1, testString1, -1, &status);
1486 TEST_ASSERT_SUCCESS(status);
1487 result = uregex_lookingAt(clone1, 0, &status);
1488 TEST_ASSERT_SUCCESS(status);
1489 TEST_ASSERT(result==TRUE);
1490
1491 status = U_ZERO_ERROR;
1492 uregex_setText(clone2, testString2, -1, &status);
1493 TEST_ASSERT_SUCCESS(status);
1494 result = uregex_lookingAt(clone2, 0, &status);
1495 TEST_ASSERT_SUCCESS(status);
1496 TEST_ASSERT(result==FALSE);
1497 result = uregex_find(clone2, 0, &status);
1498 TEST_ASSERT_SUCCESS(status);
1499 TEST_ASSERT(result==TRUE);
1500
1501 uregex_close(clone1);
1502 uregex_close(clone2);
1503 uregex_close(clone3);
1504
1505 }
1506
1507 /*
1508 * pattern() and patternText()
1509 */
1510 {
1511 const UChar *resultPat;
1512 int32_t resultLen;
1513 UText *resultText;
1514 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1515 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1516 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1517 status = U_ZERO_ERROR;
1518
1519 utext_openUTF8(&patternText, str_hello, -1, &status);
1520 re = uregex_open(pat, -1, 0, NULL, &status);
1521 resultPat = uregex_pattern(re, &resultLen, &status);
1522 TEST_ASSERT_SUCCESS(status);
1523
1524 /* The TEST_ASSERT_SUCCESS above should change too... */
1525 if (U_SUCCESS(status)) {
1526 TEST_ASSERT(resultLen == -1);
1527 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1528 }
1529
1530 resultText = uregex_patternUText(re, &status);
1531 TEST_ASSERT_SUCCESS(status);
1532 TEST_ASSERT_UTEXT(str_hello, resultText);
1533
1534 uregex_close(re);
1535
1536 status = U_ZERO_ERROR;
1537 re = uregex_open(pat, 3, 0, NULL, &status);
1538 resultPat = uregex_pattern(re, &resultLen, &status);
1539 TEST_ASSERT_SUCCESS(status);
1540
1541 /* The TEST_ASSERT_SUCCESS above should change too... */
1542 if (U_SUCCESS(status)) {
1543 TEST_ASSERT(resultLen == 3);
1544 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1545 TEST_ASSERT(u_strlen(resultPat) == 3);
1546 }
1547
1548 resultText = uregex_patternUText(re, &status);
1549 TEST_ASSERT_SUCCESS(status);
1550 TEST_ASSERT_UTEXT(str_hel, resultText);
1551
1552 uregex_close(re);
1553 }
1554
1555 /*
1556 * setUText() and lookingAt()
1557 */
1558 {
1559 UText text1 = UTEXT_INITIALIZER;
1560 UText text2 = UTEXT_INITIALIZER;
1561 UBool result;
1562 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1563 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1564 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1565 status = U_ZERO_ERROR;
1566 utext_openUTF8(&text1, str_abcccd, -1, &status);
1567 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1568
1569 utext_openUTF8(&patternText, str_abcd, -1, &status);
1570 re = uregex_openUText(&patternText, 0, NULL, &status);
1571 TEST_ASSERT_SUCCESS(status);
1572
1573 /* Operation before doing a setText should fail... */
1574 status = U_ZERO_ERROR;
1575 uregex_lookingAt(re, 0, &status);
1576 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1577
1578 status = U_ZERO_ERROR;
1579 uregex_setUText(re, &text1, &status);
1580 result = uregex_lookingAt(re, 0, &status);
1581 TEST_ASSERT(result == TRUE);
1582 TEST_ASSERT_SUCCESS(status);
1583
1584 status = U_ZERO_ERROR;
1585 uregex_setUText(re, &text2, &status);
1586 result = uregex_lookingAt(re, 0, &status);
1587 TEST_ASSERT(result == FALSE);
1588 TEST_ASSERT_SUCCESS(status);
1589
1590 status = U_ZERO_ERROR;
1591 uregex_setUText(re, &text1, &status);
1592 result = uregex_lookingAt(re, 0, &status);
1593 TEST_ASSERT(result == TRUE);
1594 TEST_ASSERT_SUCCESS(status);
1595
1596 uregex_close(re);
1597 utext_close(&text1);
1598 utext_close(&text2);
1599 }
1600
1601
1602 /*
1603 * getText() and getUText()
1604 */
1605 {
1606 UText text1 = UTEXT_INITIALIZER;
1607 UText text2 = UTEXT_INITIALIZER;
1608 UChar text2Chars[20];
1609 UText *resultText;
1610 const UChar *result;
1611 int32_t textLength;
1612 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1613 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1614 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1615
1616
1617 status = U_ZERO_ERROR;
1618 utext_openUTF8(&text1, str_abcccd, -1, &status);
1619 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1620 utext_openUChars(&text2, text2Chars, -1, &status);
1621
1622 utext_openUTF8(&patternText, str_abcd, -1, &status);
1623 re = uregex_openUText(&patternText, 0, NULL, &status);
1624
1625 /* First set a UText */
1626 uregex_setUText(re, &text1, &status);
1627 resultText = uregex_getUText(re, NULL, &status);
1628 TEST_ASSERT_SUCCESS(status);
1629 TEST_ASSERT(resultText != &text1);
1630 utext_setNativeIndex(resultText, 0);
1631 utext_setNativeIndex(&text1, 0);
1632 TEST_ASSERT(testUTextEqual(resultText, &text1));
1633 utext_close(resultText);
1634
1635 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1636 (void)result; /* Suppress set but not used warning. */
1637 TEST_ASSERT(textLength == -1 || textLength == 6);
1638 resultText = uregex_getUText(re, NULL, &status);
1639 TEST_ASSERT_SUCCESS(status);
1640 TEST_ASSERT(resultText != &text1);
1641 utext_setNativeIndex(resultText, 0);
1642 utext_setNativeIndex(&text1, 0);
1643 TEST_ASSERT(testUTextEqual(resultText, &text1));
1644 utext_close(resultText);
1645
1646 /* Then set a UChar * */
1647 uregex_setText(re, text2Chars, 7, &status);
1648 resultText = uregex_getUText(re, NULL, &status);
1649 TEST_ASSERT_SUCCESS(status);
1650 utext_setNativeIndex(resultText, 0);
1651 utext_setNativeIndex(&text2, 0);
1652 TEST_ASSERT(testUTextEqual(resultText, &text2));
1653 utext_close(resultText);
1654 result = uregex_getText(re, &textLength, &status);
1655 TEST_ASSERT(textLength == 7);
1656
1657 uregex_close(re);
1658 utext_close(&text1);
1659 utext_close(&text2);
1660 }
1661
1662 /*
1663 * matches()
1664 */
1665 {
1666 UText text1 = UTEXT_INITIALIZER;
1667 UBool result;
1668 UText nullText = UTEXT_INITIALIZER;
1669 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1670 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1671
1672 status = U_ZERO_ERROR;
1673 utext_openUTF8(&text1, str_abcccde, -1, &status);
1674 utext_openUTF8(&patternText, str_abcd, -1, &status);
1675 re = uregex_openUText(&patternText, 0, NULL, &status);
1676
1677 uregex_setUText(re, &text1, &status);
1678 result = uregex_matches(re, 0, &status);
1679 TEST_ASSERT(result == FALSE);
1680 TEST_ASSERT_SUCCESS(status);
1681 uregex_close(re);
1682
1683 status = U_ZERO_ERROR;
1684 re = uregex_openC(".?", 0, NULL, &status);
1685 uregex_setUText(re, &text1, &status);
1686 result = uregex_matches(re, 7, &status);
1687 TEST_ASSERT(result == TRUE);
1688 TEST_ASSERT_SUCCESS(status);
1689
1690 status = U_ZERO_ERROR;
1691 utext_openUTF8(&nullText, "", -1, &status);
1692 uregex_setUText(re, &nullText, &status);
1693 TEST_ASSERT_SUCCESS(status);
1694 result = uregex_matches(re, 0, &status);
1695 TEST_ASSERT(result == TRUE);
1696 TEST_ASSERT_SUCCESS(status);
1697
1698 uregex_close(re);
1699 utext_close(&text1);
1700 utext_close(&nullText);
1701 }
1702
1703
1704 /*
1705 * lookingAt() Used in setText test.
1706 */
1707
1708
1709 /*
1710 * find(), findNext, start, end, reset
1711 */
1712 {
1713 UChar text1[50];
1714 UBool result;
1715 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1716 status = U_ZERO_ERROR;
1717 re = uregex_openC("rx", 0, NULL, &status);
1718
1719 uregex_setText(re, text1, -1, &status);
1720 result = uregex_find(re, 0, &status);
1721 TEST_ASSERT(result == TRUE);
1722 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1723 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1724 TEST_ASSERT_SUCCESS(status);
1725
1726 result = uregex_find(re, 9, &status);
1727 TEST_ASSERT(result == TRUE);
1728 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1729 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1730 TEST_ASSERT_SUCCESS(status);
1731
1732 result = uregex_find(re, 14, &status);
1733 TEST_ASSERT(result == FALSE);
1734 TEST_ASSERT_SUCCESS(status);
1735
1736 status = U_ZERO_ERROR;
1737 uregex_reset(re, 0, &status);
1738
1739 result = uregex_findNext(re, &status);
1740 TEST_ASSERT(result == TRUE);
1741 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1742 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1743 TEST_ASSERT_SUCCESS(status);
1744
1745 result = uregex_findNext(re, &status);
1746 TEST_ASSERT(result == TRUE);
1747 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1748 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1749 TEST_ASSERT_SUCCESS(status);
1750
1751 status = U_ZERO_ERROR;
1752 uregex_reset(re, 12, &status);
1753
1754 result = uregex_findNext(re, &status);
1755 TEST_ASSERT(result == TRUE);
1756 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1757 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1758 TEST_ASSERT_SUCCESS(status);
1759
1760 result = uregex_findNext(re, &status);
1761 TEST_ASSERT(result == FALSE);
1762 TEST_ASSERT_SUCCESS(status);
1763
1764 uregex_close(re);
1765 }
1766
1767 /*
1768 * groupUText()
1769 */
1770 {
1771 UChar text1[80];
1772 UText *actual;
1773 UBool result;
1774 int64_t groupLen = 0;
1775 UChar groupBuf[20];
1776
1777 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1778
1779 status = U_ZERO_ERROR;
1780 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1781 TEST_ASSERT_SUCCESS(status);
1782
1783 uregex_setText(re, text1, -1, &status);
1784 result = uregex_find(re, 0, &status);
1785 TEST_ASSERT(result==TRUE);
1786
1787 /* Capture Group 0 with shallow clone API. Should succeed. */
1788 status = U_ZERO_ERROR;
1789 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1790 TEST_ASSERT_SUCCESS(status);
1791
1792 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1793 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1794 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1795
1796 TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1797 utext_close(actual);
1798
1799 /* Capture group #1. Should succeed. */
1800 status = U_ZERO_ERROR;
1801
1802 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1803 TEST_ASSERT_SUCCESS(status);
1804 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1805 /* (within the string text1) */
1806 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1807 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1808 TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1809
1810 utext_close(actual);
1811
1812 /* Capture group out of range. Error. */
1813 status = U_ZERO_ERROR;
1814 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1815 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1816 utext_close(actual);
1817
1818 uregex_close(re);
1819 }
1820
1821 /*
1822 * replaceFirst()
1823 */
1824 {
1825 UChar text1[80];
1826 UChar text2[80];
1827 UText replText = UTEXT_INITIALIZER;
1828 UText *result;
1829 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1830 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1831 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1832 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1833 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1834 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1835 status = U_ZERO_ERROR;
1836 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1837 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1838 utext_openUTF8(&replText, str_1x, -1, &status);
1839
1840 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1841 TEST_ASSERT_SUCCESS(status);
1842
1843 /* Normal case, with match */
1844 uregex_setText(re, text1, -1, &status);
1845 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1846 TEST_ASSERT_SUCCESS(status);
1847 TEST_ASSERT_UTEXT(str_Replxxx, result);
1848 utext_close(result);
1849
1850 /* No match. Text should copy to output with no changes. */
1851 uregex_setText(re, text2, -1, &status);
1852 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853 TEST_ASSERT_SUCCESS(status);
1854 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1855 utext_close(result);
1856
1857 /* Unicode escapes */
1858 uregex_setText(re, text1, -1, &status);
1859 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1860 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1861 TEST_ASSERT_SUCCESS(status);
1862 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1863 utext_close(result);
1864
1865 uregex_close(re);
1866 utext_close(&replText);
1867 }
1868
1869
1870 /*
1871 * replaceAll()
1872 */
1873 {
1874 UChar text1[80];
1875 UChar text2[80];
1876 UText replText = UTEXT_INITIALIZER;
1877 UText *result;
1878 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1879 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1880 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1881 status = U_ZERO_ERROR;
1882 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1883 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1884 utext_openUTF8(&replText, str_1, -1, &status);
1885
1886 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1887 TEST_ASSERT_SUCCESS(status);
1888
1889 /* Normal case, with match */
1890 uregex_setText(re, text1, -1, &status);
1891 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1892 TEST_ASSERT_SUCCESS(status);
1893 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1894 utext_close(result);
1895
1896 /* No match. Text should copy to output with no changes. */
1897 uregex_setText(re, text2, -1, &status);
1898 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1899 TEST_ASSERT_SUCCESS(status);
1900 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1901 utext_close(result);
1902
1903 uregex_close(re);
1904 utext_close(&replText);
1905 }
1906
1907
1908 /*
1909 * appendReplacement()
1910 */
1911 {
1912 UChar text[100];
1913 UChar repl[100];
1914 UChar buf[100];
1915 UChar *bufPtr;
1916 int32_t bufCap;
1917
1918 status = U_ZERO_ERROR;
1919 re = uregex_openC(".*", 0, 0, &status);
1920 TEST_ASSERT_SUCCESS(status);
1921
1922 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1923 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1924 uregex_setText(re, text, -1, &status);
1925
1926 /* match covers whole target string */
1927 uregex_find(re, 0, &status);
1928 TEST_ASSERT_SUCCESS(status);
1929 bufPtr = buf;
1930 bufCap = UPRV_LENGTHOF(buf);
1931 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1932 TEST_ASSERT_SUCCESS(status);
1933 TEST_ASSERT_STRING("some other", buf, TRUE);
1934
1935 /* Match has \u \U escapes */
1936 uregex_find(re, 0, &status);
1937 TEST_ASSERT_SUCCESS(status);
1938 bufPtr = buf;
1939 bufCap = UPRV_LENGTHOF(buf);
1940 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1941 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1942 TEST_ASSERT_SUCCESS(status);
1943 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1944
1945 uregex_close(re);
1946 }
1947
1948
1949 /*
1950 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1951 */
1952
1953 /*
1954 * splitUText()
1955 */
1956 {
1957 UChar textToSplit[80];
1958 UChar text2[80];
1959 UText *fields[10];
1960 int32_t numFields;
1961 int32_t i;
1962
1963 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1964 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1965
1966 status = U_ZERO_ERROR;
1967 re = uregex_openC(":", 0, NULL, &status);
1968
1969
1970 /* Simple split */
1971
1972 uregex_setText(re, textToSplit, -1, &status);
1973 TEST_ASSERT_SUCCESS(status);
1974
1975 /* The TEST_ASSERT_SUCCESS call above should change too... */
1976 if (U_SUCCESS(status)) {
1977 memset(fields, 0, sizeof(fields));
1978 numFields = uregex_splitUText(re, fields, 10, &status);
1979 TEST_ASSERT_SUCCESS(status);
1980
1981 /* The TEST_ASSERT_SUCCESS call above should change too... */
1982 if(U_SUCCESS(status)) {
1983 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1984 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1985 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1986 TEST_ASSERT(numFields == 3);
1987 TEST_ASSERT_UTEXT(str_first, fields[0]);
1988 TEST_ASSERT_UTEXT(str_second, fields[1]);
1989 TEST_ASSERT_UTEXT(str_third, fields[2]);
1990 TEST_ASSERT(fields[3] == NULL);
1991 }
1992 for(i = 0; i < numFields; i++) {
1993 utext_close(fields[i]);
1994 }
1995 }
1996
1997 uregex_close(re);
1998
1999
2000 /* Split with too few output strings available */
2001 status = U_ZERO_ERROR;
2002 re = uregex_openC(":", 0, NULL, &status);
2003 uregex_setText(re, textToSplit, -1, &status);
2004 TEST_ASSERT_SUCCESS(status);
2005
2006 /* The TEST_ASSERT_SUCCESS call above should change too... */
2007 if(U_SUCCESS(status)) {
2008 fields[0] = NULL;
2009 fields[1] = NULL;
2010 fields[2] = &patternText;
2011 numFields = uregex_splitUText(re, fields, 2, &status);
2012 TEST_ASSERT_SUCCESS(status);
2013
2014 /* The TEST_ASSERT_SUCCESS call above should change too... */
2015 if(U_SUCCESS(status)) {
2016 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2017 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2018 TEST_ASSERT(numFields == 2);
2019 TEST_ASSERT_UTEXT(str_first, fields[0]);
2020 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2021 TEST_ASSERT(fields[2] == &patternText);
2022 }
2023 for(i = 0; i < numFields; i++) {
2024 utext_close(fields[i]);
2025 }
2026 }
2027
2028 uregex_close(re);
2029 }
2030
2031 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2032 * comes out as additional fields. */
2033 {
2034 UChar textToSplit[80];
2035 UText *fields[10];
2036 int32_t numFields;
2037 int32_t i;
2038
2039 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2040
2041 status = U_ZERO_ERROR;
2042 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2043
2044 uregex_setText(re, textToSplit, -1, &status);
2045 TEST_ASSERT_SUCCESS(status);
2046
2047 /* The TEST_ASSERT_SUCCESS call above should change too... */
2048 if(U_SUCCESS(status)) {
2049 memset(fields, 0, sizeof(fields));
2050 numFields = uregex_splitUText(re, fields, 10, &status);
2051 TEST_ASSERT_SUCCESS(status);
2052
2053 /* The TEST_ASSERT_SUCCESS call above should change too... */
2054 if(U_SUCCESS(status)) {
2055 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2056 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2057 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2058 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2059 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2060
2061 TEST_ASSERT(numFields == 5);
2062 TEST_ASSERT_UTEXT(str_first, fields[0]);
2063 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2064 TEST_ASSERT_UTEXT(str_second, fields[2]);
2065 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2066 TEST_ASSERT_UTEXT(str_third, fields[4]);
2067 TEST_ASSERT(fields[5] == NULL);
2068 }
2069 for(i = 0; i < numFields; i++) {
2070 utext_close(fields[i]);
2071 }
2072 }
2073
2074 /* Split with too few output strings available (2) */
2075 status = U_ZERO_ERROR;
2076 fields[0] = NULL;
2077 fields[1] = NULL;
2078 fields[2] = &patternText;
2079 numFields = uregex_splitUText(re, fields, 2, &status);
2080 TEST_ASSERT_SUCCESS(status);
2081
2082 /* The TEST_ASSERT_SUCCESS call above should change too... */
2083 if(U_SUCCESS(status)) {
2084 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2085 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2086 TEST_ASSERT(numFields == 2);
2087 TEST_ASSERT_UTEXT(str_first, fields[0]);
2088 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2089 TEST_ASSERT(fields[2] == &patternText);
2090 }
2091 for(i = 0; i < numFields; i++) {
2092 utext_close(fields[i]);
2093 }
2094
2095
2096 /* Split with too few output strings available (3) */
2097 status = U_ZERO_ERROR;
2098 fields[0] = NULL;
2099 fields[1] = NULL;
2100 fields[2] = NULL;
2101 fields[3] = &patternText;
2102 numFields = uregex_splitUText(re, fields, 3, &status);
2103 TEST_ASSERT_SUCCESS(status);
2104
2105 /* The TEST_ASSERT_SUCCESS call above should change too... */
2106 if(U_SUCCESS(status)) {
2107 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2108 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2109 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2110 TEST_ASSERT(numFields == 3);
2111 TEST_ASSERT_UTEXT(str_first, fields[0]);
2112 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2113 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2114 TEST_ASSERT(fields[3] == &patternText);
2115 }
2116 for(i = 0; i < numFields; i++) {
2117 utext_close(fields[i]);
2118 }
2119
2120 /* Split with just enough output strings available (5) */
2121 status = U_ZERO_ERROR;
2122 fields[0] = NULL;
2123 fields[1] = NULL;
2124 fields[2] = NULL;
2125 fields[3] = NULL;
2126 fields[4] = NULL;
2127 fields[5] = &patternText;
2128 numFields = uregex_splitUText(re, fields, 5, &status);
2129 TEST_ASSERT_SUCCESS(status);
2130
2131 /* The TEST_ASSERT_SUCCESS call above should change too... */
2132 if(U_SUCCESS(status)) {
2133 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2134 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2135 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2136 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2137 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2138
2139 TEST_ASSERT(numFields == 5);
2140 TEST_ASSERT_UTEXT(str_first, fields[0]);
2141 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2142 TEST_ASSERT_UTEXT(str_second, fields[2]);
2143 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2144 TEST_ASSERT_UTEXT(str_third, fields[4]);
2145 TEST_ASSERT(fields[5] == &patternText);
2146 }
2147 for(i = 0; i < numFields; i++) {
2148 utext_close(fields[i]);
2149 }
2150
2151 /* Split, end of text is a field delimiter. */
2152 status = U_ZERO_ERROR;
2153 uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2154 TEST_ASSERT_SUCCESS(status);
2155
2156 /* The TEST_ASSERT_SUCCESS call above should change too... */
2157 if(U_SUCCESS(status)) {
2158 memset(fields, 0, sizeof(fields));
2159 fields[9] = &patternText;
2160 numFields = uregex_splitUText(re, fields, 9, &status);
2161 TEST_ASSERT_SUCCESS(status);
2162
2163 /* The TEST_ASSERT_SUCCESS call above should change too... */
2164 if(U_SUCCESS(status)) {
2165 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2166 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2167 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2168 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2169 const char str_empty[] = { 0x00 };
2170
2171 TEST_ASSERT(numFields == 5);
2172 TEST_ASSERT_UTEXT(str_first, fields[0]);
2173 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2174 TEST_ASSERT_UTEXT(str_second, fields[2]);
2175 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2176 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2177 TEST_ASSERT(fields[5] == NULL);
2178 TEST_ASSERT(fields[8] == NULL);
2179 TEST_ASSERT(fields[9] == &patternText);
2180 }
2181 for(i = 0; i < numFields; i++) {
2182 utext_close(fields[i]);
2183 }
2184 }
2185
2186 uregex_close(re);
2187 }
2188 utext_close(&patternText);
2189 }
2190
2191
TestRefreshInput(void)2192 static void TestRefreshInput(void) {
2193 /*
2194 * RefreshInput changes out the input of a URegularExpression without
2195 * changing anything else in the match state. Used with Java JNI,
2196 * when Java moves the underlying string storage. This test
2197 * runs a find() loop, moving the text after the first match.
2198 * The right number of matches should still be found.
2199 */
2200 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2201 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2202 UErrorCode status = U_ZERO_ERROR;
2203 URegularExpression *re;
2204 UText ut1 = UTEXT_INITIALIZER;
2205 UText ut2 = UTEXT_INITIALIZER;
2206
2207 re = uregex_openC("[ABC]", 0, 0, &status);
2208 TEST_ASSERT_SUCCESS(status);
2209
2210 utext_openUChars(&ut1, testStr, -1, &status);
2211 TEST_ASSERT_SUCCESS(status);
2212 uregex_setUText(re, &ut1, &status);
2213 TEST_ASSERT_SUCCESS(status);
2214
2215 /* Find the first match "A" in the original string */
2216 TEST_ASSERT(uregex_findNext(re, &status));
2217 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2218
2219 /* Move the string, kill the original string. */
2220 u_strcpy(movedStr, testStr);
2221 u_memset(testStr, 0, u_strlen(testStr));
2222 utext_openUChars(&ut2, movedStr, -1, &status);
2223 TEST_ASSERT_SUCCESS(status);
2224 uregex_refreshUText(re, &ut2, &status);
2225 TEST_ASSERT_SUCCESS(status);
2226
2227 /* Find the following two matches, now working in the moved string. */
2228 TEST_ASSERT(uregex_findNext(re, &status));
2229 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2230 TEST_ASSERT(uregex_findNext(re, &status));
2231 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2232 TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2233
2234 uregex_close(re);
2235 }
2236
2237
TestBug8421(void)2238 static void TestBug8421(void) {
2239 /* Bug 8421: setTimeLimit on a regular expresssion before setting text to be matched
2240 * was failing.
2241 */
2242 URegularExpression *re;
2243 UErrorCode status = U_ZERO_ERROR;
2244 int32_t limit = -1;
2245
2246 re = uregex_openC("abc", 0, 0, &status);
2247 TEST_ASSERT_SUCCESS(status);
2248
2249 limit = uregex_getTimeLimit(re, &status);
2250 TEST_ASSERT_SUCCESS(status);
2251 TEST_ASSERT(limit == 0);
2252
2253 uregex_setTimeLimit(re, 100, &status);
2254 TEST_ASSERT_SUCCESS(status);
2255 limit = uregex_getTimeLimit(re, &status);
2256 TEST_ASSERT_SUCCESS(status);
2257 TEST_ASSERT(limit == 100);
2258
2259 uregex_close(re);
2260 }
2261
FindCallback(const void * context,int64_t matchIndex)2262 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2263 // suppress compiler warnings about unused variables
2264 (void)context;
2265 (void)matchIndex;
2266 return FALSE;
2267 }
2268
MatchCallback(const void * context,int32_t steps)2269 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2270 // suppress compiler warnings about unused variables
2271 (void)context;
2272 (void)steps;
2273 return FALSE;
2274 }
2275
TestBug10815()2276 static void TestBug10815() {
2277 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2278 * when the callback function specified by uregex_setMatchCallback() returns FALSE
2279 */
2280 URegularExpression *re;
2281 UErrorCode status = U_ZERO_ERROR;
2282 UChar text[100];
2283
2284
2285 // findNext() with a find progress callback function.
2286
2287 re = uregex_openC(".z", 0, 0, &status);
2288 TEST_ASSERT_SUCCESS(status);
2289
2290 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2291 uregex_setText(re, text, -1, &status);
2292 TEST_ASSERT_SUCCESS(status);
2293
2294 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2295 TEST_ASSERT_SUCCESS(status);
2296
2297 uregex_findNext(re, &status);
2298 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2299
2300 uregex_close(re);
2301
2302 // findNext() with a match progress callback function.
2303
2304 status = U_ZERO_ERROR;
2305 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2306 TEST_ASSERT_SUCCESS(status);
2307
2308 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2309 // it will appear to be stuck in a (near) infinite loop.
2310 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2311 uregex_setText(re, text, -1, &status);
2312 TEST_ASSERT_SUCCESS(status);
2313
2314 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2315 TEST_ASSERT_SUCCESS(status);
2316
2317 uregex_findNext(re, &status);
2318 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2319
2320 uregex_close(re);
2321 }
2322
2323
2324 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
2325