1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 2004-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't
16 * try to test the full functionality. It just calls each function and verifies that it
17 * works on a basic level.
18 *
19 * More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21
22 #include "unicode/utypes.h"
23
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25
26 #include <stdbool.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include "unicode/uloc.h"
30 #include "unicode/uregex.h"
31 #include "unicode/ustring.h"
32 #include "unicode/utext.h"
33 #include "unicode/utf8.h"
34 #include "cintltst.h"
35 #include "cmemory.h"
36
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38 if (U_FAILURE(status)) { \
39 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
40 } \
41 } UPRV_BLOCK_MACRO_END
42
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44 if ((expr)==false) { \
45 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
46 } \
47 } UPRV_BLOCK_MACRO_END
48
49 /*
50 * TEST_SETUP and TEST_TEARDOWN
51 * macros to handle the boilerplate around setting up regex test cases.
52 * parameteres to setup:
53 * pattern: The regex pattern, a (char *) null terminated C string.
54 * testString: The string data, also a (char *) C string.
55 * flags: Regex flags to set when compiling the pattern
56 *
57 * Put arbitrary test code between SETUP and TEARDOWN.
58 * 're" is the compiled, ready-to-go regular expression.
59 */
60 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
61 UChar *srcString = NULL; \
62 status = U_ZERO_ERROR; \
63 re = uregex_openC(pattern, flags, NULL, &status); \
64 TEST_ASSERT_SUCCESS(status); \
65 int32_t testStringLen = (int32_t)strlen(testString); \
66 srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
67 u_uastrncpy(srcString, testString, testStringLen + 1); \
68 uregex_setText(re, srcString, -1, &status); \
69 TEST_ASSERT_SUCCESS(status); \
70 if (U_SUCCESS(status)) { \
71 UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
72
73 #define TEST_TEARDOWN \
74 } \
75 TEST_ASSERT_SUCCESS(status); \
76 uregex_close(re); \
77 free(srcString); \
78 } UPRV_BLOCK_MACRO_END
79
80
81 /**
82 * @param expected utf-8 array of bytes to be expected
83 */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)84 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
85 char buf_inside_macro[120];
86 int32_t len = (int32_t)strlen(expected);
87 UBool success;
88 if (nulTerm) {
89 u_austrncpy(buf_inside_macro, (actual), len+1);
90 buf_inside_macro[len+2] = 0;
91 success = (strcmp((expected), buf_inside_macro) == 0);
92 } else {
93 u_austrncpy(buf_inside_macro, (actual), len);
94 buf_inside_macro[len+1] = 0;
95 success = (strncmp((expected), buf_inside_macro, len) == 0);
96 }
97 if (success == false) {
98 log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
99 file, line, (expected), buf_inside_macro);
100 }
101 }
102
103 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
104
105
equals_utf8_utext(const char * utf8,UText * utext)106 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
107 int32_t u8i = 0;
108 UChar32 u8c = 0;
109 UChar32 utc = 0;
110 UBool stringsEqual = true;
111 utext_setNativeIndex(utext, 0);
112 for (;;) {
113 U8_NEXT_UNSAFE(utf8, u8i, u8c);
114 utc = utext_next32(utext);
115 if (u8c == 0 && utc == U_SENTINEL) {
116 break;
117 }
118 if (u8c != utc || u8c == 0) {
119 stringsEqual = false;
120 break;
121 }
122 }
123 return stringsEqual;
124 }
125
126
test_assert_utext(const char * expected,UText * actual,const char * file,int line)127 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
128 utext_setNativeIndex(actual, 0);
129 if (!equals_utf8_utext(expected, actual)) {
130 UChar32 c;
131 log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
132 c = utext_next32From(actual, 0);
133 while (c != U_SENTINEL) {
134 if (0x20<c && c <0x7e) {
135 log_err("%c", c);
136 } else {
137 log_err("%#x", c);
138 }
139 c = UTEXT_NEXT32(actual);
140 }
141 log_err("\"\n");
142 }
143 }
144
145 /*
146 * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
147 * Note: Expected is a UTF-8 encoded string, _not_ the system code page.
148 */
149 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
150
testUTextEqual(UText * uta,UText * utb)151 static UBool testUTextEqual(UText *uta, UText *utb) {
152 UChar32 ca = 0;
153 UChar32 cb = 0;
154 utext_setNativeIndex(uta, 0);
155 utext_setNativeIndex(utb, 0);
156 do {
157 ca = utext_next32(uta);
158 cb = utext_next32(utb);
159 if (ca != cb) {
160 break;
161 }
162 } while (ca != U_SENTINEL);
163 return ca == cb;
164 }
165
166
167
168
169 static void TestRegexCAPI(void);
170 static void TestBug4315(void);
171 static void TestUTextAPI(void);
172 static void TestRefreshInput(void);
173 static void TestBug8421(void);
174 static void TestBug10815(void);
175
176 void addURegexTest(TestNode** root);
177
addURegexTest(TestNode ** root)178 void addURegexTest(TestNode** root)
179 {
180 addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
181 addTest(root, &TestBug4315, "regex/TestBug4315");
182 addTest(root, &TestUTextAPI, "regex/TestUTextAPI");
183 addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
184 addTest(root, &TestBug8421, "regex/TestBug8421");
185 addTest(root, &TestBug10815, "regex/TestBug10815");
186 }
187
188 /*
189 * Call back function and context struct used for testing
190 * regular expression user callbacks. This test is mostly the same as
191 * the corresponding C++ test in intltest.
192 */
193 typedef struct callBackContext {
194 int32_t maxCalls;
195 int32_t numCalls;
196 int32_t lastSteps;
197 } callBackContext;
198
199 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)200 TestCallbackFn(const void *context, int32_t steps) {
201 callBackContext *info = (callBackContext *)context;
202 if (info->lastSteps+1 != steps) {
203 log_err("incorrect steps in callback. Expected %d, got %d\n", info->lastSteps+1, steps);
204 }
205 info->lastSteps = steps;
206 info->numCalls++;
207 return (info->numCalls < info->maxCalls);
208 }
209
210 /*
211 * Regular Expression C API Tests
212 */
TestRegexCAPI(void)213 static void TestRegexCAPI(void) {
214 UErrorCode status = U_ZERO_ERROR;
215 URegularExpression *re;
216 UChar pat[200];
217 UChar *minus1;
218
219 memset(&minus1, -1, sizeof(minus1));
220
221 /* Mimimalist open/close */
222 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
223 re = uregex_open(pat, -1, 0, 0, &status);
224 if (U_FAILURE(status)) {
225 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
226 return;
227 }
228 uregex_close(re);
229
230 /* Open with all flag values set */
231 status = U_ZERO_ERROR;
232 re = uregex_open(pat, -1,
233 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
234 0, &status);
235 TEST_ASSERT_SUCCESS(status);
236 uregex_close(re);
237
238 /* Open with an invalid flag */
239 status = U_ZERO_ERROR;
240 re = uregex_open(pat, -1, 0x40000000, 0, &status);
241 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
242 uregex_close(re);
243
244 /* Open with an unimplemented flag */
245 status = U_ZERO_ERROR;
246 re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
247 TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
248 uregex_close(re);
249
250 /* openC with an invalid parameter */
251 status = U_ZERO_ERROR;
252 re = uregex_openC(NULL,
253 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
254 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
255
256 /* openC with an invalid parameter */
257 status = U_USELESS_COLLATOR_ERROR;
258 re = uregex_openC(NULL,
259 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
260 TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
261
262 /* openC open from a C string */
263 {
264 const UChar *p;
265 int32_t len;
266 status = U_ZERO_ERROR;
267 re = uregex_openC("abc*", 0, 0, &status);
268 TEST_ASSERT_SUCCESS(status);
269 p = uregex_pattern(re, &len, &status);
270 TEST_ASSERT_SUCCESS(status);
271
272 /* The TEST_ASSERT_SUCCESS above should change too... */
273 if(U_SUCCESS(status)) {
274 u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
275 TEST_ASSERT(u_strcmp(pat, p) == 0);
276 TEST_ASSERT(len==(int32_t)strlen("abc*"));
277 }
278
279 uregex_close(re);
280
281 /* TODO: Open with ParseError parameter */
282 }
283
284 /*
285 * clone
286 */
287 {
288 URegularExpression *clone1;
289 URegularExpression *clone2;
290 URegularExpression *clone3;
291 UChar testString1[30];
292 UChar testString2[30];
293 UBool result;
294
295
296 status = U_ZERO_ERROR;
297 re = uregex_openC("abc*", 0, 0, &status);
298 TEST_ASSERT_SUCCESS(status);
299 clone1 = uregex_clone(re, &status);
300 TEST_ASSERT_SUCCESS(status);
301 TEST_ASSERT(clone1 != NULL);
302
303 status = U_ZERO_ERROR;
304 clone2 = uregex_clone(re, &status);
305 TEST_ASSERT_SUCCESS(status);
306 TEST_ASSERT(clone2 != NULL);
307 uregex_close(re);
308
309 status = U_ZERO_ERROR;
310 clone3 = uregex_clone(clone2, &status);
311 TEST_ASSERT_SUCCESS(status);
312 TEST_ASSERT(clone3 != NULL);
313
314 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
315 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
316
317 status = U_ZERO_ERROR;
318 uregex_setText(clone1, testString1, -1, &status);
319 TEST_ASSERT_SUCCESS(status);
320 result = uregex_lookingAt(clone1, 0, &status);
321 TEST_ASSERT_SUCCESS(status);
322 TEST_ASSERT(result==true);
323
324 status = U_ZERO_ERROR;
325 uregex_setText(clone2, testString2, -1, &status);
326 TEST_ASSERT_SUCCESS(status);
327 result = uregex_lookingAt(clone2, 0, &status);
328 TEST_ASSERT_SUCCESS(status);
329 TEST_ASSERT(result==false);
330 result = uregex_find(clone2, 0, &status);
331 TEST_ASSERT_SUCCESS(status);
332 TEST_ASSERT(result==true);
333
334 uregex_close(clone1);
335 uregex_close(clone2);
336 uregex_close(clone3);
337
338 }
339
340 /*
341 * pattern()
342 */
343 {
344 const UChar *resultPat;
345 int32_t resultLen;
346 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
347 status = U_ZERO_ERROR;
348 re = uregex_open(pat, -1, 0, NULL, &status);
349 resultPat = uregex_pattern(re, &resultLen, &status);
350 TEST_ASSERT_SUCCESS(status);
351
352 /* The TEST_ASSERT_SUCCESS above should change too... */
353 if (U_SUCCESS(status)) {
354 TEST_ASSERT(resultLen == -1);
355 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
356 }
357
358 uregex_close(re);
359
360 status = U_ZERO_ERROR;
361 re = uregex_open(pat, 3, 0, NULL, &status);
362 resultPat = uregex_pattern(re, &resultLen, &status);
363 TEST_ASSERT_SUCCESS(status);
364 TEST_ASSERT_SUCCESS(status);
365
366 /* The TEST_ASSERT_SUCCESS above should change too... */
367 if (U_SUCCESS(status)) {
368 TEST_ASSERT(resultLen == 3);
369 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
370 TEST_ASSERT(u_strlen(resultPat) == 3);
371 }
372
373 uregex_close(re);
374 }
375
376 /*
377 * flags()
378 */
379 {
380 int32_t t;
381
382 status = U_ZERO_ERROR;
383 re = uregex_open(pat, -1, 0, NULL, &status);
384 t = uregex_flags(re, &status);
385 TEST_ASSERT_SUCCESS(status);
386 TEST_ASSERT(t == 0);
387 uregex_close(re);
388
389 status = U_ZERO_ERROR;
390 re = uregex_open(pat, -1, 0, NULL, &status);
391 t = uregex_flags(re, &status);
392 TEST_ASSERT_SUCCESS(status);
393 TEST_ASSERT(t == 0);
394 uregex_close(re);
395
396 status = U_ZERO_ERROR;
397 re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
398 t = uregex_flags(re, &status);
399 TEST_ASSERT_SUCCESS(status);
400 TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
401 uregex_close(re);
402 }
403
404 /*
405 * setText() and lookingAt()
406 */
407 {
408 UChar text1[50];
409 UChar text2[50];
410 UBool result;
411
412 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
413 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
414 status = U_ZERO_ERROR;
415 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
416 re = uregex_open(pat, -1, 0, NULL, &status);
417 TEST_ASSERT_SUCCESS(status);
418
419 /* Operation before doing a setText should fail... */
420 status = U_ZERO_ERROR;
421 uregex_lookingAt(re, 0, &status);
422 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
423
424 status = U_ZERO_ERROR;
425 uregex_setText(re, text1, -1, &status);
426 result = uregex_lookingAt(re, 0, &status);
427 TEST_ASSERT(result == true);
428 TEST_ASSERT_SUCCESS(status);
429
430 status = U_ZERO_ERROR;
431 uregex_setText(re, text2, -1, &status);
432 result = uregex_lookingAt(re, 0, &status);
433 TEST_ASSERT(result == false);
434 TEST_ASSERT_SUCCESS(status);
435
436 status = U_ZERO_ERROR;
437 uregex_setText(re, text1, -1, &status);
438 result = uregex_lookingAt(re, 0, &status);
439 TEST_ASSERT(result == true);
440 TEST_ASSERT_SUCCESS(status);
441
442 status = U_ZERO_ERROR;
443 uregex_setText(re, text1, 5, &status);
444 result = uregex_lookingAt(re, 0, &status);
445 TEST_ASSERT(result == false);
446 TEST_ASSERT_SUCCESS(status);
447
448 status = U_ZERO_ERROR;
449 uregex_setText(re, text1, 6, &status);
450 result = uregex_lookingAt(re, 0, &status);
451 TEST_ASSERT(result == true);
452 TEST_ASSERT_SUCCESS(status);
453
454 uregex_close(re);
455 }
456
457
458 /*
459 * getText()
460 */
461 {
462 UChar text1[50];
463 UChar text2[50];
464 const UChar *result;
465 int32_t textLength;
466
467 u_uastrncpy(text1, "abcccd", UPRV_LENGTHOF(text1));
468 u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
469 status = U_ZERO_ERROR;
470 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
471 re = uregex_open(pat, -1, 0, NULL, &status);
472
473 uregex_setText(re, text1, -1, &status);
474 result = uregex_getText(re, &textLength, &status);
475 TEST_ASSERT(result == text1);
476 TEST_ASSERT(textLength == -1);
477 TEST_ASSERT_SUCCESS(status);
478
479 status = U_ZERO_ERROR;
480 uregex_setText(re, text2, 7, &status);
481 result = uregex_getText(re, &textLength, &status);
482 TEST_ASSERT(result == text2);
483 TEST_ASSERT(textLength == 7);
484 TEST_ASSERT_SUCCESS(status);
485
486 status = U_ZERO_ERROR;
487 uregex_setText(re, text2, 4, &status);
488 result = uregex_getText(re, &textLength, &status);
489 TEST_ASSERT(result == text2);
490 TEST_ASSERT(textLength == 4);
491 TEST_ASSERT_SUCCESS(status);
492 uregex_close(re);
493 }
494
495 /*
496 * matches()
497 */
498 {
499 UChar text1[50];
500 UBool result;
501 int len;
502 UChar nullString[] = {0,0,0};
503
504 u_uastrncpy(text1, "abcccde", UPRV_LENGTHOF(text1));
505 status = U_ZERO_ERROR;
506 u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
507 re = uregex_open(pat, -1, 0, NULL, &status);
508
509 uregex_setText(re, text1, -1, &status);
510 result = uregex_matches(re, 0, &status);
511 TEST_ASSERT(result == false);
512 TEST_ASSERT_SUCCESS(status);
513
514 status = U_ZERO_ERROR;
515 uregex_setText(re, text1, 6, &status);
516 result = uregex_matches(re, 0, &status);
517 TEST_ASSERT(result == true);
518 TEST_ASSERT_SUCCESS(status);
519
520 status = U_ZERO_ERROR;
521 uregex_setText(re, text1, 6, &status);
522 result = uregex_matches(re, 1, &status);
523 TEST_ASSERT(result == false);
524 TEST_ASSERT_SUCCESS(status);
525 uregex_close(re);
526
527 status = U_ZERO_ERROR;
528 re = uregex_openC(".?", 0, NULL, &status);
529 uregex_setText(re, text1, -1, &status);
530 len = u_strlen(text1);
531 result = uregex_matches(re, len, &status);
532 TEST_ASSERT(result == true);
533 TEST_ASSERT_SUCCESS(status);
534
535 status = U_ZERO_ERROR;
536 uregex_setText(re, nullString, -1, &status);
537 TEST_ASSERT_SUCCESS(status);
538 result = uregex_matches(re, 0, &status);
539 TEST_ASSERT(result == true);
540 TEST_ASSERT_SUCCESS(status);
541 uregex_close(re);
542 }
543
544
545 /*
546 * lookingAt() Used in setText test.
547 */
548
549
550 /*
551 * find(), findNext, start, end, reset
552 */
553 {
554 UChar text1[50];
555 UBool result;
556 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
557 status = U_ZERO_ERROR;
558 re = uregex_openC("rx", 0, NULL, &status);
559
560 uregex_setText(re, text1, -1, &status);
561 result = uregex_find(re, 0, &status);
562 TEST_ASSERT(result == true);
563 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
564 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
565 TEST_ASSERT_SUCCESS(status);
566
567 result = uregex_find(re, 9, &status);
568 TEST_ASSERT(result == true);
569 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
570 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
571 TEST_ASSERT_SUCCESS(status);
572
573 result = uregex_find(re, 14, &status);
574 TEST_ASSERT(result == false);
575 TEST_ASSERT_SUCCESS(status);
576
577 status = U_ZERO_ERROR;
578 uregex_reset(re, 0, &status);
579
580 result = uregex_findNext(re, &status);
581 TEST_ASSERT(result == true);
582 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
583 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
584 TEST_ASSERT_SUCCESS(status);
585
586 result = uregex_findNext(re, &status);
587 TEST_ASSERT(result == true);
588 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
589 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
590 TEST_ASSERT_SUCCESS(status);
591
592 status = U_ZERO_ERROR;
593 uregex_reset(re, 12, &status);
594
595 result = uregex_findNext(re, &status);
596 TEST_ASSERT(result == true);
597 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
598 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
599 TEST_ASSERT_SUCCESS(status);
600
601 result = uregex_findNext(re, &status);
602 TEST_ASSERT(result == false);
603 TEST_ASSERT_SUCCESS(status);
604
605 uregex_close(re);
606 }
607
608 /*
609 * groupCount
610 */
611 {
612 int32_t result;
613
614 status = U_ZERO_ERROR;
615 re = uregex_openC("abc", 0, NULL, &status);
616 result = uregex_groupCount(re, &status);
617 TEST_ASSERT_SUCCESS(status);
618 TEST_ASSERT(result == 0);
619 uregex_close(re);
620
621 status = U_ZERO_ERROR;
622 re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
623 result = uregex_groupCount(re, &status);
624 TEST_ASSERT_SUCCESS(status);
625 TEST_ASSERT(result == 3);
626 uregex_close(re);
627
628 }
629
630
631 /*
632 * group()
633 */
634 {
635 UChar text1[80];
636 UChar buf[80];
637 UBool result;
638 int32_t resultSz;
639 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
640
641 status = U_ZERO_ERROR;
642 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
643 TEST_ASSERT_SUCCESS(status);
644
645
646 uregex_setText(re, text1, -1, &status);
647 result = uregex_find(re, 0, &status);
648 TEST_ASSERT(result==true);
649
650 /* Capture Group 0, the full match. Should succeed. */
651 status = U_ZERO_ERROR;
652 resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
653 TEST_ASSERT_SUCCESS(status);
654 TEST_ASSERT_STRING("abc interior def", buf, true);
655 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
656
657 /* Capture group #1. Should succeed. */
658 status = U_ZERO_ERROR;
659 resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
660 TEST_ASSERT_SUCCESS(status);
661 TEST_ASSERT_STRING(" interior ", buf, true);
662 TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
663
664 /* Capture group out of range. Error. */
665 status = U_ZERO_ERROR;
666 uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
667 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
668
669 /* NULL buffer, pure pre-flight */
670 status = U_ZERO_ERROR;
671 resultSz = uregex_group(re, 0, NULL, 0, &status);
672 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
673 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674
675 /* Too small buffer, truncated string */
676 status = U_ZERO_ERROR;
677 memset(buf, -1, sizeof(buf));
678 resultSz = uregex_group(re, 0, buf, 5, &status);
679 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
680 TEST_ASSERT_STRING("abc i", buf, false);
681 TEST_ASSERT(buf[5] == (UChar)0xffff);
682 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
683
684 /* Output string just fits buffer, no NUL term. */
685 status = U_ZERO_ERROR;
686 resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
687 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
688 TEST_ASSERT_STRING("abc interior def", buf, false);
689 TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
690 TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
691
692 uregex_close(re);
693
694 }
695
696 /*
697 * Regions
698 */
699
700
701 /* SetRegion(), getRegion() do something */
702 TEST_SETUP(".*", "0123456789ABCDEF", 0);
703 UChar resultString[40];
704 TEST_ASSERT(uregex_regionStart(re, &status) == 0);
705 TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
706 uregex_setRegion(re, 3, 6, &status);
707 TEST_ASSERT(uregex_regionStart(re, &status) == 3);
708 TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
709 TEST_ASSERT(uregex_findNext(re, &status));
710 TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
711 TEST_ASSERT_STRING("345", resultString, true);
712 TEST_TEARDOWN;
713
714 /* find(start=-1) uses regions */
715 TEST_SETUP(".*", "0123456789ABCDEF", 0);
716 uregex_setRegion(re, 4, 6, &status);
717 TEST_ASSERT(uregex_find(re, -1, &status) == true);
718 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
719 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
720 TEST_TEARDOWN;
721
722 /* find (start >=0) does not use regions */
723 TEST_SETUP(".*", "0123456789ABCDEF", 0);
724 uregex_setRegion(re, 4, 6, &status);
725 TEST_ASSERT(uregex_find(re, 0, &status) == true);
726 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
727 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
728 TEST_TEARDOWN;
729
730 /* findNext() obeys regions */
731 TEST_SETUP(".", "0123456789ABCDEF", 0);
732 uregex_setRegion(re, 4, 6, &status);
733 TEST_ASSERT(uregex_findNext(re,&status) == true);
734 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
735 TEST_ASSERT(uregex_findNext(re, &status) == true);
736 TEST_ASSERT(uregex_start(re, 0, &status) == 5);
737 TEST_ASSERT(uregex_findNext(re, &status) == false);
738 TEST_TEARDOWN;
739
740 /* matches(start=-1) uses regions */
741 /* Also, verify that non-greedy *? succeeds in finding the full match. */
742 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
743 uregex_setRegion(re, 4, 6, &status);
744 TEST_ASSERT(uregex_matches(re, -1, &status) == true);
745 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
746 TEST_ASSERT(uregex_end(re, 0, &status) == 6);
747 TEST_TEARDOWN;
748
749 /* matches (start >=0) does not use regions */
750 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
751 uregex_setRegion(re, 4, 6, &status);
752 TEST_ASSERT(uregex_matches(re, 0, &status) == true);
753 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
754 TEST_ASSERT(uregex_end(re, 0, &status) == 16);
755 TEST_TEARDOWN;
756
757 /* lookingAt(start=-1) uses regions */
758 /* Also, verify that non-greedy *? finds the first (shortest) match. */
759 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
760 uregex_setRegion(re, 4, 6, &status);
761 TEST_ASSERT(uregex_lookingAt(re, -1, &status) == true);
762 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
763 TEST_ASSERT(uregex_end(re, 0, &status) == 4);
764 TEST_TEARDOWN;
765
766 /* lookingAt (start >=0) does not use regions */
767 TEST_SETUP(".*?", "0123456789ABCDEF", 0);
768 uregex_setRegion(re, 4, 6, &status);
769 TEST_ASSERT(uregex_lookingAt(re, 0, &status) == true);
770 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
771 TEST_ASSERT(uregex_end(re, 0, &status) == 0);
772 TEST_TEARDOWN;
773
774 /* hitEnd() */
775 TEST_SETUP("[a-f]*", "abcdefghij", 0);
776 TEST_ASSERT(uregex_find(re, 0, &status) == true);
777 TEST_ASSERT(uregex_hitEnd(re, &status) == false);
778 TEST_TEARDOWN;
779
780 TEST_SETUP("[a-f]*", "abcdef", 0);
781 TEST_ASSERT(uregex_find(re, 0, &status) == true);
782 TEST_ASSERT(uregex_hitEnd(re, &status) == true);
783 TEST_TEARDOWN;
784
785 /* requireEnd */
786 TEST_SETUP("abcd", "abcd", 0);
787 TEST_ASSERT(uregex_find(re, 0, &status) == true);
788 TEST_ASSERT(uregex_requireEnd(re, &status) == false);
789 TEST_TEARDOWN;
790
791 TEST_SETUP("abcd$", "abcd", 0);
792 TEST_ASSERT(uregex_find(re, 0, &status) == true);
793 TEST_ASSERT(uregex_requireEnd(re, &status) == true);
794 TEST_TEARDOWN;
795
796 /* anchoringBounds */
797 TEST_SETUP("abc$", "abcdef", 0);
798 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == true);
799 uregex_useAnchoringBounds(re, false, &status);
800 TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == false);
801
802 TEST_ASSERT(uregex_find(re, -1, &status) == false);
803 uregex_useAnchoringBounds(re, true, &status);
804 uregex_setRegion(re, 0, 3, &status);
805 TEST_ASSERT(uregex_find(re, -1, &status) == true);
806 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
807 TEST_TEARDOWN;
808
809 /* Transparent Bounds */
810 TEST_SETUP("abc(?=def)", "abcdef", 0);
811 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == false);
812 uregex_useTransparentBounds(re, true, &status);
813 TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == true);
814
815 uregex_useTransparentBounds(re, false, &status);
816 TEST_ASSERT(uregex_find(re, -1, &status) == true); /* No Region */
817 uregex_setRegion(re, 0, 3, &status);
818 TEST_ASSERT(uregex_find(re, -1, &status) == false); /* with region, opaque bounds */
819 uregex_useTransparentBounds(re, true, &status);
820 TEST_ASSERT(uregex_find(re, -1, &status) == true); /* with region, transparent bounds */
821 TEST_ASSERT(uregex_end(re, 0, &status) == 3);
822 TEST_TEARDOWN;
823
824
825 /*
826 * replaceFirst()
827 */
828 {
829 UChar text1[80];
830 UChar text2[80];
831 UChar replText[80];
832 UChar buf[80];
833 int32_t resultSz;
834 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
835 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
836 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
837
838 status = U_ZERO_ERROR;
839 re = uregex_openC("x(.*?)x", 0, NULL, &status);
840 TEST_ASSERT_SUCCESS(status);
841
842 /* Normal case, with match */
843 uregex_setText(re, text1, -1, &status);
844 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
845 TEST_ASSERT_SUCCESS(status);
846 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, true);
847 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
848
849 /* No match. Text should copy to output with no changes. */
850 status = U_ZERO_ERROR;
851 uregex_setText(re, text2, -1, &status);
852 resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
853 TEST_ASSERT_SUCCESS(status);
854 TEST_ASSERT_STRING("No match here.", buf, true);
855 TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
856
857 /* Match, output just fills buffer, no termination warning. */
858 status = U_ZERO_ERROR;
859 uregex_setText(re, text1, -1, &status);
860 memset(buf, -1, sizeof(buf));
861 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
862 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
863 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, false);
864 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
865 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
866
867 /* Do the replaceFirst again, without first resetting anything.
868 * Should give the same results.
869 */
870 status = U_ZERO_ERROR;
871 memset(buf, -1, sizeof(buf));
872 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
873 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
874 TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, false);
875 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
876 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
877
878 /* NULL buffer, zero buffer length */
879 status = U_ZERO_ERROR;
880 resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
881 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
882 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
883
884 /* Buffer too small by one */
885 status = U_ZERO_ERROR;
886 memset(buf, -1, sizeof(buf));
887 resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
888 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
889 TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, false);
890 TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
891 TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
892
893 uregex_close(re);
894 }
895
896
897 /*
898 * replaceAll()
899 */
900 {
901 UChar text1[80]; /* "Replace xaax x1x x...x." */
902 UChar text2[80]; /* "No match Here" */
903 UChar replText[80]; /* "<$1>" */
904 UChar replText2[80]; /* "<<$1>>" */
905 const char * pattern = "x(.*?)x";
906 const char * expectedResult = "Replace <aa> <1> <...>.";
907 const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
908 UChar buf[80];
909 int32_t resultSize;
910 int32_t expectedResultSize;
911 int32_t expectedResultSize2;
912 int32_t i;
913
914 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
915 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
916 u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
917 u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
918 expectedResultSize = (int32_t)strlen(expectedResult);
919 expectedResultSize2 = (int32_t)strlen(expectedResult2);
920
921 status = U_ZERO_ERROR;
922 re = uregex_openC(pattern, 0, NULL, &status);
923 TEST_ASSERT_SUCCESS(status);
924
925 /* Normal case, with match */
926 uregex_setText(re, text1, -1, &status);
927 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
928 TEST_ASSERT_SUCCESS(status);
929 TEST_ASSERT_STRING(expectedResult, buf, true);
930 TEST_ASSERT(resultSize == expectedResultSize);
931
932 /* No match. Text should copy to output with no changes. */
933 status = U_ZERO_ERROR;
934 uregex_setText(re, text2, -1, &status);
935 resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
936 TEST_ASSERT_SUCCESS(status);
937 TEST_ASSERT_STRING("No match here.", buf, true);
938 TEST_ASSERT(resultSize == u_strlen(text2));
939
940 /* Match, output just fills buffer, no termination warning. */
941 status = U_ZERO_ERROR;
942 uregex_setText(re, text1, -1, &status);
943 memset(buf, -1, sizeof(buf));
944 resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
945 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
946 TEST_ASSERT_STRING(expectedResult, buf, false);
947 TEST_ASSERT(resultSize == expectedResultSize);
948 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
949
950 /* Do the replaceFirst again, without first resetting anything.
951 * Should give the same results.
952 */
953 status = U_ZERO_ERROR;
954 memset(buf, -1, sizeof(buf));
955 resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
956 TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
957 TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, false);
958 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
959 TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
960
961 /* NULL buffer, zero buffer length */
962 status = U_ZERO_ERROR;
963 resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
964 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
965 TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
966
967 /* Buffer too small. Try every size, which will tickle edge cases
968 * in uregex_appendReplacement (used by replaceAll) */
969 for (i=0; i<expectedResultSize; i++) {
970 char expected[80];
971 status = U_ZERO_ERROR;
972 memset(buf, -1, sizeof(buf));
973 resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
974 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
975 strcpy(expected, expectedResult);
976 expected[i] = 0;
977 TEST_ASSERT_STRING(expected, buf, false);
978 TEST_ASSERT(resultSize == expectedResultSize);
979 TEST_ASSERT(buf[i] == (UChar)0xffff);
980 }
981
982 /* Buffer too small. Same as previous test, except this time the replacement
983 * text is longer than the match capture group, making the length of the complete
984 * replacement longer than the original string.
985 */
986 for (i=0; i<expectedResultSize2; i++) {
987 char expected[80];
988 status = U_ZERO_ERROR;
989 memset(buf, -1, sizeof(buf));
990 resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
991 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
992 strcpy(expected, expectedResult2);
993 expected[i] = 0;
994 TEST_ASSERT_STRING(expected, buf, false);
995 TEST_ASSERT(resultSize == expectedResultSize2);
996 TEST_ASSERT(buf[i] == (UChar)0xffff);
997 }
998
999
1000 uregex_close(re);
1001 }
1002
1003
1004 /*
1005 * appendReplacement()
1006 */
1007 {
1008 UChar text[100];
1009 UChar repl[100];
1010 UChar buf[100];
1011 UChar *bufPtr;
1012 int32_t bufCap;
1013
1014
1015 status = U_ZERO_ERROR;
1016 re = uregex_openC(".*", 0, 0, &status);
1017 TEST_ASSERT_SUCCESS(status);
1018
1019 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1020 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1021 uregex_setText(re, text, -1, &status);
1022
1023 /* match covers whole target string */
1024 uregex_find(re, 0, &status);
1025 TEST_ASSERT_SUCCESS(status);
1026 bufPtr = buf;
1027 bufCap = UPRV_LENGTHOF(buf);
1028 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1029 TEST_ASSERT_SUCCESS(status);
1030 TEST_ASSERT_STRING("some other", buf, true);
1031
1032 /* Match has \u \U escapes */
1033 uregex_find(re, 0, &status);
1034 TEST_ASSERT_SUCCESS(status);
1035 bufPtr = buf;
1036 bufCap = UPRV_LENGTHOF(buf);
1037 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1038 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1039 TEST_ASSERT_SUCCESS(status);
1040 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, true);
1041
1042 /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1043 status = U_ZERO_ERROR;
1044 uregex_find(re, 0, &status);
1045 TEST_ASSERT_SUCCESS(status);
1046 bufPtr = buf;
1047 status = U_BUFFER_OVERFLOW_ERROR;
1048 uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1049 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1050
1051 uregex_close(re);
1052 }
1053
1054
1055 /*
1056 * appendTail(). Checked in ReplaceFirst(), replaceAll().
1057 */
1058
1059 /*
1060 * split()
1061 */
1062 {
1063 UChar textToSplit[80];
1064 UChar text2[80];
1065 UChar buf[200];
1066 UChar *fields[10];
1067 int32_t numFields;
1068 int32_t requiredCapacity;
1069 int32_t spaceNeeded;
1070 int32_t sz;
1071
1072 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1073 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1074
1075 status = U_ZERO_ERROR;
1076 re = uregex_openC(":", 0, NULL, &status);
1077
1078
1079 /* Simple split */
1080
1081 uregex_setText(re, textToSplit, -1, &status);
1082 TEST_ASSERT_SUCCESS(status);
1083
1084 /* The TEST_ASSERT_SUCCESS call above should change too... */
1085 if (U_SUCCESS(status)) {
1086 memset(fields, -1, sizeof(fields));
1087 numFields =
1088 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1089 TEST_ASSERT_SUCCESS(status);
1090
1091 /* The TEST_ASSERT_SUCCESS call above should change too... */
1092 if(U_SUCCESS(status)) {
1093 TEST_ASSERT(numFields == 3);
1094 TEST_ASSERT_STRING("first ", fields[0], true);
1095 TEST_ASSERT_STRING(" second", fields[1], true);
1096 TEST_ASSERT_STRING(" third", fields[2], true);
1097 TEST_ASSERT(fields[3] == NULL);
1098
1099 spaceNeeded = u_strlen(textToSplit) -
1100 (numFields - 1) + /* Field delimiters do not appear in output */
1101 numFields; /* Each field gets a NUL terminator */
1102
1103 TEST_ASSERT(spaceNeeded == requiredCapacity);
1104 }
1105 }
1106
1107 uregex_close(re);
1108
1109
1110 /* Split with too few output strings available */
1111 status = U_ZERO_ERROR;
1112 re = uregex_openC(":", 0, NULL, &status);
1113 uregex_setText(re, textToSplit, -1, &status);
1114 TEST_ASSERT_SUCCESS(status);
1115
1116 /* The TEST_ASSERT_SUCCESS call above should change too... */
1117 if(U_SUCCESS(status)) {
1118 memset(fields, -1, sizeof(fields));
1119 numFields =
1120 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1121 TEST_ASSERT_SUCCESS(status);
1122
1123 /* The TEST_ASSERT_SUCCESS call above should change too... */
1124 if(U_SUCCESS(status)) {
1125 TEST_ASSERT(numFields == 2);
1126 TEST_ASSERT_STRING("first ", fields[0], true);
1127 TEST_ASSERT_STRING(" second: third", fields[1], true);
1128 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1129
1130 spaceNeeded = u_strlen(textToSplit) -
1131 (numFields - 1) + /* Field delimiters do not appear in output */
1132 numFields; /* Each field gets a NUL terminator */
1133
1134 TEST_ASSERT(spaceNeeded == requiredCapacity);
1135
1136 /* Split with a range of output buffer sizes. */
1137 spaceNeeded = u_strlen(textToSplit) -
1138 (numFields - 1) + /* Field delimiters do not appear in output */
1139 numFields; /* Each field gets a NUL terminator */
1140
1141 for (sz=0; sz < spaceNeeded+1; sz++) {
1142 memset(fields, -1, sizeof(fields));
1143 status = U_ZERO_ERROR;
1144 numFields =
1145 uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1146 if (sz >= spaceNeeded) {
1147 TEST_ASSERT_SUCCESS(status);
1148 TEST_ASSERT_STRING("first ", fields[0], true);
1149 TEST_ASSERT_STRING(" second", fields[1], true);
1150 TEST_ASSERT_STRING(" third", fields[2], true);
1151 } else {
1152 TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1153 }
1154 TEST_ASSERT(numFields == 3);
1155 TEST_ASSERT(fields[3] == NULL);
1156 TEST_ASSERT(spaceNeeded == requiredCapacity);
1157 }
1158 }
1159 }
1160
1161 uregex_close(re);
1162 }
1163
1164
1165
1166
1167 /* Split(), part 2. Patterns with capture groups. The capture group text
1168 * comes out as additional fields. */
1169 {
1170 UChar textToSplit[80];
1171 UChar buf[200];
1172 UChar *fields[10];
1173 int32_t numFields;
1174 int32_t requiredCapacity;
1175 int32_t spaceNeeded;
1176 int32_t sz;
1177
1178 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
1179
1180 status = U_ZERO_ERROR;
1181 re = uregex_openC("<(.*?)>", 0, NULL, &status);
1182
1183 uregex_setText(re, textToSplit, -1, &status);
1184 TEST_ASSERT_SUCCESS(status);
1185
1186 /* The TEST_ASSERT_SUCCESS call above should change too... */
1187 if(U_SUCCESS(status)) {
1188 memset(fields, -1, sizeof(fields));
1189 numFields =
1190 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1191 TEST_ASSERT_SUCCESS(status);
1192
1193 /* The TEST_ASSERT_SUCCESS call above should change too... */
1194 if(U_SUCCESS(status)) {
1195 TEST_ASSERT(numFields == 5);
1196 TEST_ASSERT_STRING("first ", fields[0], true);
1197 TEST_ASSERT_STRING("tag-a", fields[1], true);
1198 TEST_ASSERT_STRING(" second", fields[2], true);
1199 TEST_ASSERT_STRING("tag-b", fields[3], true);
1200 TEST_ASSERT_STRING(" third", fields[4], true);
1201 TEST_ASSERT(fields[5] == NULL);
1202 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1203 TEST_ASSERT(spaceNeeded == requiredCapacity);
1204 }
1205 }
1206
1207 /* Split with too few output strings available (2) */
1208 status = U_ZERO_ERROR;
1209 memset(fields, -1, sizeof(fields));
1210 numFields =
1211 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1212 TEST_ASSERT_SUCCESS(status);
1213
1214 /* The TEST_ASSERT_SUCCESS call above should change too... */
1215 if(U_SUCCESS(status)) {
1216 TEST_ASSERT(numFields == 2);
1217 TEST_ASSERT_STRING("first ", fields[0], true);
1218 TEST_ASSERT_STRING(" second<tag-b> third", fields[1], true);
1219 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1220
1221 spaceNeeded = (int32_t)strlen("first . second<tag-b> third."); /* "." at NUL positions */
1222 TEST_ASSERT(spaceNeeded == requiredCapacity);
1223 }
1224
1225 /* Split with too few output strings available (3) */
1226 status = U_ZERO_ERROR;
1227 memset(fields, -1, sizeof(fields));
1228 numFields =
1229 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1230 TEST_ASSERT_SUCCESS(status);
1231
1232 /* The TEST_ASSERT_SUCCESS call above should change too... */
1233 if(U_SUCCESS(status)) {
1234 TEST_ASSERT(numFields == 3);
1235 TEST_ASSERT_STRING("first ", fields[0], true);
1236 TEST_ASSERT_STRING("tag-a", fields[1], true);
1237 TEST_ASSERT_STRING(" second<tag-b> third", fields[2], true);
1238 TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1239
1240 spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */
1241 TEST_ASSERT(spaceNeeded == requiredCapacity);
1242 }
1243
1244 /* Split with just enough output strings available (5) */
1245 status = U_ZERO_ERROR;
1246 memset(fields, -1, sizeof(fields));
1247 numFields =
1248 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1249 TEST_ASSERT_SUCCESS(status);
1250
1251 /* The TEST_ASSERT_SUCCESS call above should change too... */
1252 if(U_SUCCESS(status)) {
1253 TEST_ASSERT(numFields == 5);
1254 TEST_ASSERT_STRING("first ", fields[0], true);
1255 TEST_ASSERT_STRING("tag-a", fields[1], true);
1256 TEST_ASSERT_STRING(" second", fields[2], true);
1257 TEST_ASSERT_STRING("tag-b", fields[3], true);
1258 TEST_ASSERT_STRING(" third", fields[4], true);
1259 TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1260
1261 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */
1262 TEST_ASSERT(spaceNeeded == requiredCapacity);
1263 }
1264
1265 /* Split, end of text is a field delimiter. */
1266 status = U_ZERO_ERROR;
1267 sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1268 uregex_setText(re, textToSplit, sz, &status);
1269 TEST_ASSERT_SUCCESS(status);
1270
1271 /* The TEST_ASSERT_SUCCESS call above should change too... */
1272 if(U_SUCCESS(status)) {
1273 memset(fields, -1, sizeof(fields));
1274 numFields =
1275 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1276 TEST_ASSERT_SUCCESS(status);
1277
1278 /* The TEST_ASSERT_SUCCESS call above should change too... */
1279 if(U_SUCCESS(status)) {
1280 TEST_ASSERT(numFields == 5);
1281 TEST_ASSERT_STRING("first ", fields[0], true);
1282 TEST_ASSERT_STRING("tag-a", fields[1], true);
1283 TEST_ASSERT_STRING(" second", fields[2], true);
1284 TEST_ASSERT_STRING("tag-b", fields[3], true);
1285 TEST_ASSERT_STRING("", fields[4], true);
1286 TEST_ASSERT(fields[5] == NULL);
1287 TEST_ASSERT(fields[8] == NULL);
1288 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1289 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.."); /* "." at NUL positions */
1290 TEST_ASSERT(spaceNeeded == requiredCapacity);
1291 }
1292 }
1293
1294 uregex_close(re);
1295 }
1296
1297 /*
1298 * set/getTimeLimit
1299 */
1300 TEST_SETUP("abc$", "abcdef", 0);
1301 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1302 uregex_setTimeLimit(re, 1000, &status);
1303 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1304 TEST_ASSERT_SUCCESS(status);
1305 uregex_setTimeLimit(re, -1, &status);
1306 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1307 status = U_ZERO_ERROR;
1308 TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1309 TEST_TEARDOWN;
1310
1311 /*
1312 * set/get Stack Limit
1313 */
1314 TEST_SETUP("abc$", "abcdef", 0);
1315 TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1316 uregex_setStackLimit(re, 40000, &status);
1317 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1318 TEST_ASSERT_SUCCESS(status);
1319 uregex_setStackLimit(re, -1, &status);
1320 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1321 status = U_ZERO_ERROR;
1322 TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1323 TEST_TEARDOWN;
1324
1325
1326 /*
1327 * Get/Set callback functions
1328 * This test is copied from intltest regex/Callbacks
1329 * The pattern and test data will run long enough to cause the callback
1330 * to be invoked. The nested '+' operators give exponential time
1331 * behavior with increasing string length.
1332 */
1333 TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1334 callBackContext cbInfo = {4, 0, 0};
1335 const void *pContext = &cbInfo;
1336 URegexMatchCallback *returnedFn = &TestCallbackFn;
1337
1338 /* Getting the callback fn when it hasn't been set must return NULL */
1339 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1340 TEST_ASSERT_SUCCESS(status);
1341 TEST_ASSERT(returnedFn == NULL);
1342 TEST_ASSERT(pContext == NULL);
1343
1344 /* Set thecallback and do a match. */
1345 /* The callback function should record that it has been called. */
1346 uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1347 TEST_ASSERT_SUCCESS(status);
1348 TEST_ASSERT(cbInfo.numCalls == 0);
1349 TEST_ASSERT(uregex_matches(re, -1, &status) == false);
1350 TEST_ASSERT_SUCCESS(status);
1351 TEST_ASSERT(cbInfo.numCalls > 0);
1352
1353 /* Getting the callback should return the values that were set above. */
1354 uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1355 TEST_ASSERT(returnedFn == &TestCallbackFn);
1356 TEST_ASSERT(pContext == &cbInfo);
1357
1358 TEST_TEARDOWN;
1359 }
1360
1361
1362
TestBug4315(void)1363 static void TestBug4315(void) {
1364 UErrorCode theICUError = U_ZERO_ERROR;
1365 URegularExpression *theRegEx;
1366 UChar *textBuff;
1367 const char *thePattern;
1368 UChar theString[100];
1369 UChar *destFields[24];
1370 int32_t neededLength1;
1371 int32_t neededLength2;
1372
1373 int32_t wordCount = 0;
1374 int32_t destFieldsSize = 24;
1375
1376 thePattern = "ck ";
1377 u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1378
1379 /* open a regex */
1380 theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1381 TEST_ASSERT_SUCCESS(theICUError);
1382
1383 /* set the input string */
1384 uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1385 TEST_ASSERT_SUCCESS(theICUError);
1386
1387 /* split */
1388 /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1389 * error occurs! */
1390 wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1391 destFieldsSize, &theICUError);
1392
1393 TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1394 TEST_ASSERT(wordCount==3);
1395
1396 if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1397 {
1398 theICUError = U_ZERO_ERROR;
1399 textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1400 wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1401 destFields, destFieldsSize, &theICUError);
1402 TEST_ASSERT(wordCount==3);
1403 TEST_ASSERT_SUCCESS(theICUError);
1404 TEST_ASSERT(neededLength1 == neededLength2);
1405 TEST_ASSERT_STRING("The qui", destFields[0], true);
1406 TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], true);
1407 TEST_ASSERT_STRING("turtle.", destFields[2], true);
1408 TEST_ASSERT(destFields[3] == NULL);
1409 free(textBuff);
1410 }
1411 uregex_close(theRegEx);
1412 }
1413
1414 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1415 static void TestUTextAPI(void) {
1416 UErrorCode status = U_ZERO_ERROR;
1417 URegularExpression *re;
1418 UText patternText = UTEXT_INITIALIZER;
1419 UChar pat[200];
1420 const char patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1421
1422 /* Mimimalist open/close */
1423 utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1424 re = uregex_openUText(&patternText, 0, 0, &status);
1425 if (U_FAILURE(status)) {
1426 log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1427 utext_close(&patternText);
1428 return;
1429 }
1430 uregex_close(re);
1431
1432 /* Open with all flag values set */
1433 status = U_ZERO_ERROR;
1434 re = uregex_openUText(&patternText,
1435 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1436 0, &status);
1437 TEST_ASSERT_SUCCESS(status);
1438 uregex_close(re);
1439
1440 /* Open with an invalid flag */
1441 status = U_ZERO_ERROR;
1442 re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1443 TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1444 uregex_close(re);
1445
1446 /* open with an invalid parameter */
1447 status = U_ZERO_ERROR;
1448 re = uregex_openUText(NULL,
1449 UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1450 TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1451
1452 /*
1453 * clone
1454 */
1455 {
1456 URegularExpression *clone1;
1457 URegularExpression *clone2;
1458 URegularExpression *clone3;
1459 UChar testString1[30];
1460 UChar testString2[30];
1461 UBool result;
1462
1463
1464 status = U_ZERO_ERROR;
1465 re = uregex_openUText(&patternText, 0, 0, &status);
1466 TEST_ASSERT_SUCCESS(status);
1467 clone1 = uregex_clone(re, &status);
1468 TEST_ASSERT_SUCCESS(status);
1469 TEST_ASSERT(clone1 != NULL);
1470
1471 status = U_ZERO_ERROR;
1472 clone2 = uregex_clone(re, &status);
1473 TEST_ASSERT_SUCCESS(status);
1474 TEST_ASSERT(clone2 != NULL);
1475 uregex_close(re);
1476
1477 status = U_ZERO_ERROR;
1478 clone3 = uregex_clone(clone2, &status);
1479 TEST_ASSERT_SUCCESS(status);
1480 TEST_ASSERT(clone3 != NULL);
1481
1482 u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1483 u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1484
1485 status = U_ZERO_ERROR;
1486 uregex_setText(clone1, testString1, -1, &status);
1487 TEST_ASSERT_SUCCESS(status);
1488 result = uregex_lookingAt(clone1, 0, &status);
1489 TEST_ASSERT_SUCCESS(status);
1490 TEST_ASSERT(result==true);
1491
1492 status = U_ZERO_ERROR;
1493 uregex_setText(clone2, testString2, -1, &status);
1494 TEST_ASSERT_SUCCESS(status);
1495 result = uregex_lookingAt(clone2, 0, &status);
1496 TEST_ASSERT_SUCCESS(status);
1497 TEST_ASSERT(result==false);
1498 result = uregex_find(clone2, 0, &status);
1499 TEST_ASSERT_SUCCESS(status);
1500 TEST_ASSERT(result==true);
1501
1502 uregex_close(clone1);
1503 uregex_close(clone2);
1504 uregex_close(clone3);
1505
1506 }
1507
1508 /*
1509 * pattern() and patternText()
1510 */
1511 {
1512 const UChar *resultPat;
1513 int32_t resultLen;
1514 UText *resultText;
1515 const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1516 const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1517 u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1518 status = U_ZERO_ERROR;
1519
1520 utext_openUTF8(&patternText, str_hello, -1, &status);
1521 re = uregex_open(pat, -1, 0, NULL, &status);
1522 resultPat = uregex_pattern(re, &resultLen, &status);
1523 TEST_ASSERT_SUCCESS(status);
1524
1525 /* The TEST_ASSERT_SUCCESS above should change too... */
1526 if (U_SUCCESS(status)) {
1527 TEST_ASSERT(resultLen == -1);
1528 TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1529 }
1530
1531 resultText = uregex_patternUText(re, &status);
1532 TEST_ASSERT_SUCCESS(status);
1533 TEST_ASSERT_UTEXT(str_hello, resultText);
1534
1535 uregex_close(re);
1536
1537 status = U_ZERO_ERROR;
1538 re = uregex_open(pat, 3, 0, NULL, &status);
1539 resultPat = uregex_pattern(re, &resultLen, &status);
1540 TEST_ASSERT_SUCCESS(status);
1541
1542 /* The TEST_ASSERT_SUCCESS above should change too... */
1543 if (U_SUCCESS(status)) {
1544 TEST_ASSERT(resultLen == 3);
1545 TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1546 TEST_ASSERT(u_strlen(resultPat) == 3);
1547 }
1548
1549 resultText = uregex_patternUText(re, &status);
1550 TEST_ASSERT_SUCCESS(status);
1551 TEST_ASSERT_UTEXT(str_hel, resultText);
1552
1553 uregex_close(re);
1554 }
1555
1556 /*
1557 * setUText() and lookingAt()
1558 */
1559 {
1560 UText text1 = UTEXT_INITIALIZER;
1561 UText text2 = UTEXT_INITIALIZER;
1562 UBool result;
1563 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1564 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1565 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1566 status = U_ZERO_ERROR;
1567 utext_openUTF8(&text1, str_abcccd, -1, &status);
1568 utext_openUTF8(&text2, str_abcccxd, -1, &status);
1569
1570 utext_openUTF8(&patternText, str_abcd, -1, &status);
1571 re = uregex_openUText(&patternText, 0, NULL, &status);
1572 TEST_ASSERT_SUCCESS(status);
1573
1574 /* Operation before doing a setText should fail... */
1575 status = U_ZERO_ERROR;
1576 uregex_lookingAt(re, 0, &status);
1577 TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1578
1579 status = U_ZERO_ERROR;
1580 uregex_setUText(re, &text1, &status);
1581 result = uregex_lookingAt(re, 0, &status);
1582 TEST_ASSERT(result == true);
1583 TEST_ASSERT_SUCCESS(status);
1584
1585 status = U_ZERO_ERROR;
1586 uregex_setUText(re, &text2, &status);
1587 result = uregex_lookingAt(re, 0, &status);
1588 TEST_ASSERT(result == false);
1589 TEST_ASSERT_SUCCESS(status);
1590
1591 status = U_ZERO_ERROR;
1592 uregex_setUText(re, &text1, &status);
1593 result = uregex_lookingAt(re, 0, &status);
1594 TEST_ASSERT(result == true);
1595 TEST_ASSERT_SUCCESS(status);
1596
1597 uregex_close(re);
1598 utext_close(&text1);
1599 utext_close(&text2);
1600 }
1601
1602
1603 /*
1604 * getText() and getUText()
1605 */
1606 {
1607 UText text1 = UTEXT_INITIALIZER;
1608 UText text2 = UTEXT_INITIALIZER;
1609 UChar text2Chars[20];
1610 UText *resultText;
1611 const UChar *result;
1612 int32_t textLength;
1613 const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1614 const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1615 const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1616
1617
1618 status = U_ZERO_ERROR;
1619 utext_openUTF8(&text1, str_abcccd, -1, &status);
1620 u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1621 utext_openUChars(&text2, text2Chars, -1, &status);
1622
1623 utext_openUTF8(&patternText, str_abcd, -1, &status);
1624 re = uregex_openUText(&patternText, 0, NULL, &status);
1625
1626 /* First set a UText */
1627 uregex_setUText(re, &text1, &status);
1628 resultText = uregex_getUText(re, NULL, &status);
1629 TEST_ASSERT_SUCCESS(status);
1630 TEST_ASSERT(resultText != &text1);
1631 utext_setNativeIndex(resultText, 0);
1632 utext_setNativeIndex(&text1, 0);
1633 TEST_ASSERT(testUTextEqual(resultText, &text1));
1634 utext_close(resultText);
1635
1636 result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1637 (void)result; /* Suppress set but not used warning. */
1638 TEST_ASSERT(textLength == -1 || textLength == 6);
1639 resultText = uregex_getUText(re, NULL, &status);
1640 TEST_ASSERT_SUCCESS(status);
1641 TEST_ASSERT(resultText != &text1);
1642 utext_setNativeIndex(resultText, 0);
1643 utext_setNativeIndex(&text1, 0);
1644 TEST_ASSERT(testUTextEqual(resultText, &text1));
1645 utext_close(resultText);
1646
1647 /* Then set a UChar * */
1648 uregex_setText(re, text2Chars, 7, &status);
1649 resultText = uregex_getUText(re, NULL, &status);
1650 TEST_ASSERT_SUCCESS(status);
1651 utext_setNativeIndex(resultText, 0);
1652 utext_setNativeIndex(&text2, 0);
1653 TEST_ASSERT(testUTextEqual(resultText, &text2));
1654 utext_close(resultText);
1655 result = uregex_getText(re, &textLength, &status);
1656 TEST_ASSERT(textLength == 7);
1657
1658 uregex_close(re);
1659 utext_close(&text1);
1660 utext_close(&text2);
1661 }
1662
1663 /*
1664 * matches()
1665 */
1666 {
1667 UText text1 = UTEXT_INITIALIZER;
1668 UBool result;
1669 UText nullText = UTEXT_INITIALIZER;
1670 const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1671 const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1672
1673 status = U_ZERO_ERROR;
1674 utext_openUTF8(&text1, str_abcccde, -1, &status);
1675 utext_openUTF8(&patternText, str_abcd, -1, &status);
1676 re = uregex_openUText(&patternText, 0, NULL, &status);
1677
1678 uregex_setUText(re, &text1, &status);
1679 result = uregex_matches(re, 0, &status);
1680 TEST_ASSERT(result == false);
1681 TEST_ASSERT_SUCCESS(status);
1682 uregex_close(re);
1683
1684 status = U_ZERO_ERROR;
1685 re = uregex_openC(".?", 0, NULL, &status);
1686 uregex_setUText(re, &text1, &status);
1687 result = uregex_matches(re, 7, &status);
1688 TEST_ASSERT(result == true);
1689 TEST_ASSERT_SUCCESS(status);
1690
1691 status = U_ZERO_ERROR;
1692 utext_openUTF8(&nullText, "", -1, &status);
1693 uregex_setUText(re, &nullText, &status);
1694 TEST_ASSERT_SUCCESS(status);
1695 result = uregex_matches(re, 0, &status);
1696 TEST_ASSERT(result == true);
1697 TEST_ASSERT_SUCCESS(status);
1698
1699 uregex_close(re);
1700 utext_close(&text1);
1701 utext_close(&nullText);
1702 }
1703
1704
1705 /*
1706 * lookingAt() Used in setText test.
1707 */
1708
1709
1710 /*
1711 * find(), findNext, start, end, reset
1712 */
1713 {
1714 UChar text1[50];
1715 UBool result;
1716 u_uastrncpy(text1, "012rx5rx890rxrx...", UPRV_LENGTHOF(text1));
1717 status = U_ZERO_ERROR;
1718 re = uregex_openC("rx", 0, NULL, &status);
1719
1720 uregex_setText(re, text1, -1, &status);
1721 result = uregex_find(re, 0, &status);
1722 TEST_ASSERT(result == true);
1723 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1724 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1725 TEST_ASSERT_SUCCESS(status);
1726
1727 result = uregex_find(re, 9, &status);
1728 TEST_ASSERT(result == true);
1729 TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1730 TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1731 TEST_ASSERT_SUCCESS(status);
1732
1733 result = uregex_find(re, 14, &status);
1734 TEST_ASSERT(result == false);
1735 TEST_ASSERT_SUCCESS(status);
1736
1737 status = U_ZERO_ERROR;
1738 uregex_reset(re, 0, &status);
1739
1740 result = uregex_findNext(re, &status);
1741 TEST_ASSERT(result == true);
1742 TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1743 TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1744 TEST_ASSERT_SUCCESS(status);
1745
1746 result = uregex_findNext(re, &status);
1747 TEST_ASSERT(result == true);
1748 TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1749 TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1750 TEST_ASSERT_SUCCESS(status);
1751
1752 status = U_ZERO_ERROR;
1753 uregex_reset(re, 12, &status);
1754
1755 result = uregex_findNext(re, &status);
1756 TEST_ASSERT(result == true);
1757 TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1758 TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1759 TEST_ASSERT_SUCCESS(status);
1760
1761 result = uregex_findNext(re, &status);
1762 TEST_ASSERT(result == false);
1763 TEST_ASSERT_SUCCESS(status);
1764
1765 uregex_close(re);
1766 }
1767
1768 /*
1769 * groupUText()
1770 */
1771 {
1772 UChar text1[80];
1773 UText *actual;
1774 UBool result;
1775 int64_t groupLen = 0;
1776 UChar groupBuf[20];
1777
1778 u_uastrncpy(text1, "noise abc interior def, and this is off the end", UPRV_LENGTHOF(text1));
1779
1780 status = U_ZERO_ERROR;
1781 re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1782 TEST_ASSERT_SUCCESS(status);
1783
1784 uregex_setText(re, text1, -1, &status);
1785 result = uregex_find(re, 0, &status);
1786 TEST_ASSERT(result==true);
1787
1788 /* Capture Group 0 with shallow clone API. Should succeed. */
1789 status = U_ZERO_ERROR;
1790 actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1791 TEST_ASSERT_SUCCESS(status);
1792
1793 TEST_ASSERT(utext_getNativeIndex(actual) == 6); /* index of "abc " within "noise abc ..." */
1794 TEST_ASSERT(groupLen == 16); /* length of "abc interior def" */
1795 utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1796
1797 TEST_ASSERT_STRING("abc interior def", groupBuf, true);
1798 utext_close(actual);
1799
1800 /* Capture group #1. Should succeed. */
1801 status = U_ZERO_ERROR;
1802
1803 actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1804 TEST_ASSERT_SUCCESS(status);
1805 TEST_ASSERT(9 == utext_getNativeIndex(actual)); /* index of " interior " within "noise abc interior def ... " */
1806 /* (within the string text1) */
1807 TEST_ASSERT(10 == groupLen); /* length of " interior " */
1808 utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1809 TEST_ASSERT_STRING(" interior ", groupBuf, true);
1810
1811 utext_close(actual);
1812
1813 /* Capture group out of range. Error. */
1814 status = U_ZERO_ERROR;
1815 actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1816 TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1817 utext_close(actual);
1818
1819 uregex_close(re);
1820 }
1821
1822 /*
1823 * replaceFirst()
1824 */
1825 {
1826 UChar text1[80];
1827 UChar text2[80];
1828 UText replText = UTEXT_INITIALIZER;
1829 UText *result;
1830 const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1831 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1832 const char str_u00411U00000042a[] = { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1833 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1834 const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1835 const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1836 status = U_ZERO_ERROR;
1837 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1838 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1839 utext_openUTF8(&replText, str_1x, -1, &status);
1840
1841 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1842 TEST_ASSERT_SUCCESS(status);
1843
1844 /* Normal case, with match */
1845 uregex_setText(re, text1, -1, &status);
1846 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1847 TEST_ASSERT_SUCCESS(status);
1848 TEST_ASSERT_UTEXT(str_Replxxx, result);
1849 utext_close(result);
1850
1851 /* No match. Text should copy to output with no changes. */
1852 uregex_setText(re, text2, -1, &status);
1853 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1854 TEST_ASSERT_SUCCESS(status);
1855 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1856 utext_close(result);
1857
1858 /* Unicode escapes */
1859 uregex_setText(re, text1, -1, &status);
1860 utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1861 result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1862 TEST_ASSERT_SUCCESS(status);
1863 TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1864 utext_close(result);
1865
1866 uregex_close(re);
1867 utext_close(&replText);
1868 }
1869
1870
1871 /*
1872 * replaceAll()
1873 */
1874 {
1875 UChar text1[80];
1876 UChar text2[80];
1877 UText replText = UTEXT_INITIALIZER;
1878 UText *result;
1879 const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1880 const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1881 const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1882 status = U_ZERO_ERROR;
1883 u_uastrncpy(text1, "Replace xaax x1x x...x.", UPRV_LENGTHOF(text1));
1884 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1885 utext_openUTF8(&replText, str_1, -1, &status);
1886
1887 re = uregex_openC("x(.*?)x", 0, NULL, &status);
1888 TEST_ASSERT_SUCCESS(status);
1889
1890 /* Normal case, with match */
1891 uregex_setText(re, text1, -1, &status);
1892 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1893 TEST_ASSERT_SUCCESS(status);
1894 TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1895 utext_close(result);
1896
1897 /* No match. Text should copy to output with no changes. */
1898 uregex_setText(re, text2, -1, &status);
1899 result = uregex_replaceAllUText(re, &replText, NULL, &status);
1900 TEST_ASSERT_SUCCESS(status);
1901 TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1902 utext_close(result);
1903
1904 uregex_close(re);
1905 utext_close(&replText);
1906 }
1907
1908
1909 /*
1910 * appendReplacement()
1911 */
1912 {
1913 UChar text[100];
1914 UChar repl[100];
1915 UChar buf[100];
1916 UChar *bufPtr;
1917 int32_t bufCap;
1918
1919 status = U_ZERO_ERROR;
1920 re = uregex_openC(".*", 0, 0, &status);
1921 TEST_ASSERT_SUCCESS(status);
1922
1923 u_uastrncpy(text, "whatever", UPRV_LENGTHOF(text));
1924 u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1925 uregex_setText(re, text, -1, &status);
1926
1927 /* match covers whole target string */
1928 uregex_find(re, 0, &status);
1929 TEST_ASSERT_SUCCESS(status);
1930 bufPtr = buf;
1931 bufCap = UPRV_LENGTHOF(buf);
1932 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1933 TEST_ASSERT_SUCCESS(status);
1934 TEST_ASSERT_STRING("some other", buf, true);
1935
1936 /* Match has \u \U escapes */
1937 uregex_find(re, 0, &status);
1938 TEST_ASSERT_SUCCESS(status);
1939 bufPtr = buf;
1940 bufCap = UPRV_LENGTHOF(buf);
1941 u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1942 uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1943 TEST_ASSERT_SUCCESS(status);
1944 TEST_ASSERT_STRING("abcAB \\ $ abc", buf, true);
1945
1946 uregex_close(re);
1947 }
1948
1949
1950 /*
1951 * appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1952 */
1953
1954 /*
1955 * splitUText()
1956 */
1957 {
1958 UChar textToSplit[80];
1959 UChar text2[80];
1960 UText *fields[10];
1961 int32_t numFields;
1962 int32_t i;
1963
1964 u_uastrncpy(textToSplit, "first : second: third", UPRV_LENGTHOF(textToSplit));
1965 u_uastrncpy(text2, "No match here.", UPRV_LENGTHOF(text2));
1966
1967 status = U_ZERO_ERROR;
1968 re = uregex_openC(":", 0, NULL, &status);
1969
1970
1971 /* Simple split */
1972
1973 uregex_setText(re, textToSplit, -1, &status);
1974 TEST_ASSERT_SUCCESS(status);
1975
1976 /* The TEST_ASSERT_SUCCESS call above should change too... */
1977 if (U_SUCCESS(status)) {
1978 memset(fields, 0, sizeof(fields));
1979 numFields = uregex_splitUText(re, fields, 10, &status);
1980 TEST_ASSERT_SUCCESS(status);
1981
1982 /* The TEST_ASSERT_SUCCESS call above should change too... */
1983 if(U_SUCCESS(status)) {
1984 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1985 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* ' second' */
1986 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* ' third' */
1987 TEST_ASSERT(numFields == 3);
1988 TEST_ASSERT_UTEXT(str_first, fields[0]);
1989 TEST_ASSERT_UTEXT(str_second, fields[1]);
1990 TEST_ASSERT_UTEXT(str_third, fields[2]);
1991 TEST_ASSERT(fields[3] == NULL);
1992 }
1993 for(i = 0; i < numFields; i++) {
1994 utext_close(fields[i]);
1995 }
1996 }
1997
1998 uregex_close(re);
1999
2000
2001 /* Split with too few output strings available */
2002 status = U_ZERO_ERROR;
2003 re = uregex_openC(":", 0, NULL, &status);
2004 uregex_setText(re, textToSplit, -1, &status);
2005 TEST_ASSERT_SUCCESS(status);
2006
2007 /* The TEST_ASSERT_SUCCESS call above should change too... */
2008 if(U_SUCCESS(status)) {
2009 fields[0] = NULL;
2010 fields[1] = NULL;
2011 fields[2] = &patternText;
2012 numFields = uregex_splitUText(re, fields, 2, &status);
2013 TEST_ASSERT_SUCCESS(status);
2014
2015 /* The TEST_ASSERT_SUCCESS call above should change too... */
2016 if(U_SUCCESS(status)) {
2017 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2018 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second: third */
2019 TEST_ASSERT(numFields == 2);
2020 TEST_ASSERT_UTEXT(str_first, fields[0]);
2021 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2022 TEST_ASSERT(fields[2] == &patternText);
2023 }
2024 for(i = 0; i < numFields; i++) {
2025 utext_close(fields[i]);
2026 }
2027 }
2028
2029 uregex_close(re);
2030 }
2031
2032 /* splitUText(), part 2. Patterns with capture groups. The capture group text
2033 * comes out as additional fields. */
2034 {
2035 UChar textToSplit[80];
2036 UText *fields[10];
2037 int32_t numFields;
2038 int32_t i;
2039
2040 u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", UPRV_LENGTHOF(textToSplit));
2041
2042 status = U_ZERO_ERROR;
2043 re = uregex_openC("<(.*?)>", 0, NULL, &status);
2044
2045 uregex_setText(re, textToSplit, -1, &status);
2046 TEST_ASSERT_SUCCESS(status);
2047
2048 /* The TEST_ASSERT_SUCCESS call above should change too... */
2049 if(U_SUCCESS(status)) {
2050 memset(fields, 0, sizeof(fields));
2051 numFields = uregex_splitUText(re, fields, 10, &status);
2052 TEST_ASSERT_SUCCESS(status);
2053
2054 /* The TEST_ASSERT_SUCCESS call above should change too... */
2055 if(U_SUCCESS(status)) {
2056 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2057 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2058 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2059 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2060 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2061
2062 TEST_ASSERT(numFields == 5);
2063 TEST_ASSERT_UTEXT(str_first, fields[0]);
2064 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2065 TEST_ASSERT_UTEXT(str_second, fields[2]);
2066 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2067 TEST_ASSERT_UTEXT(str_third, fields[4]);
2068 TEST_ASSERT(fields[5] == NULL);
2069 }
2070 for(i = 0; i < numFields; i++) {
2071 utext_close(fields[i]);
2072 }
2073 }
2074
2075 /* Split with too few output strings available (2) */
2076 status = U_ZERO_ERROR;
2077 fields[0] = NULL;
2078 fields[1] = NULL;
2079 fields[2] = &patternText;
2080 numFields = uregex_splitUText(re, fields, 2, &status);
2081 TEST_ASSERT_SUCCESS(status);
2082
2083 /* The TEST_ASSERT_SUCCESS call above should change too... */
2084 if(U_SUCCESS(status)) {
2085 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2086 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2087 TEST_ASSERT(numFields == 2);
2088 TEST_ASSERT_UTEXT(str_first, fields[0]);
2089 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2090 TEST_ASSERT(fields[2] == &patternText);
2091 }
2092 for(i = 0; i < numFields; i++) {
2093 utext_close(fields[i]);
2094 }
2095
2096
2097 /* Split with too few output strings available (3) */
2098 status = U_ZERO_ERROR;
2099 fields[0] = NULL;
2100 fields[1] = NULL;
2101 fields[2] = NULL;
2102 fields[3] = &patternText;
2103 numFields = uregex_splitUText(re, fields, 3, &status);
2104 TEST_ASSERT_SUCCESS(status);
2105
2106 /* The TEST_ASSERT_SUCCESS call above should change too... */
2107 if(U_SUCCESS(status)) {
2108 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2109 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2110 const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* second<tag-b> third */
2111 TEST_ASSERT(numFields == 3);
2112 TEST_ASSERT_UTEXT(str_first, fields[0]);
2113 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2114 TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2115 TEST_ASSERT(fields[3] == &patternText);
2116 }
2117 for(i = 0; i < numFields; i++) {
2118 utext_close(fields[i]);
2119 }
2120
2121 /* Split with just enough output strings available (5) */
2122 status = U_ZERO_ERROR;
2123 fields[0] = NULL;
2124 fields[1] = NULL;
2125 fields[2] = NULL;
2126 fields[3] = NULL;
2127 fields[4] = NULL;
2128 fields[5] = &patternText;
2129 numFields = uregex_splitUText(re, fields, 5, &status);
2130 TEST_ASSERT_SUCCESS(status);
2131
2132 /* The TEST_ASSERT_SUCCESS call above should change too... */
2133 if(U_SUCCESS(status)) {
2134 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2135 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2136 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2137 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2138 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* third */
2139
2140 TEST_ASSERT(numFields == 5);
2141 TEST_ASSERT_UTEXT(str_first, fields[0]);
2142 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2143 TEST_ASSERT_UTEXT(str_second, fields[2]);
2144 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2145 TEST_ASSERT_UTEXT(str_third, fields[4]);
2146 TEST_ASSERT(fields[5] == &patternText);
2147 }
2148 for(i = 0; i < numFields; i++) {
2149 utext_close(fields[i]);
2150 }
2151
2152 /* Split, end of text is a field delimiter. */
2153 status = U_ZERO_ERROR;
2154 uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2155 TEST_ASSERT_SUCCESS(status);
2156
2157 /* The TEST_ASSERT_SUCCESS call above should change too... */
2158 if(U_SUCCESS(status)) {
2159 memset(fields, 0, sizeof(fields));
2160 fields[9] = &patternText;
2161 numFields = uregex_splitUText(re, fields, 9, &status);
2162 TEST_ASSERT_SUCCESS(status);
2163
2164 /* The TEST_ASSERT_SUCCESS call above should change too... */
2165 if(U_SUCCESS(status)) {
2166 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first */
2167 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2168 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* second */
2169 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2170 const char str_empty[] = { 0x00 };
2171
2172 TEST_ASSERT(numFields == 5);
2173 TEST_ASSERT_UTEXT(str_first, fields[0]);
2174 TEST_ASSERT_UTEXT(str_taga, fields[1]);
2175 TEST_ASSERT_UTEXT(str_second, fields[2]);
2176 TEST_ASSERT_UTEXT(str_tagb, fields[3]);
2177 TEST_ASSERT_UTEXT(str_empty, fields[4]);
2178 TEST_ASSERT(fields[5] == NULL);
2179 TEST_ASSERT(fields[8] == NULL);
2180 TEST_ASSERT(fields[9] == &patternText);
2181 }
2182 for(i = 0; i < numFields; i++) {
2183 utext_close(fields[i]);
2184 }
2185 }
2186
2187 uregex_close(re);
2188 }
2189 utext_close(&patternText);
2190 }
2191
2192
TestRefreshInput(void)2193 static void TestRefreshInput(void) {
2194 /*
2195 * RefreshInput changes out the input of a URegularExpression without
2196 * changing anything else in the match state. Used with Java JNI,
2197 * when Java moves the underlying string storage. This test
2198 * runs a find() loop, moving the text after the first match.
2199 * The right number of matches should still be found.
2200 */
2201 UChar testStr[] = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0}; /* = "A B C" */
2202 UChar movedStr[] = { 0, 0, 0, 0, 0, 0};
2203 UErrorCode status = U_ZERO_ERROR;
2204 URegularExpression *re;
2205 UText ut1 = UTEXT_INITIALIZER;
2206 UText ut2 = UTEXT_INITIALIZER;
2207
2208 re = uregex_openC("[ABC]", 0, 0, &status);
2209 TEST_ASSERT_SUCCESS(status);
2210
2211 utext_openUChars(&ut1, testStr, -1, &status);
2212 TEST_ASSERT_SUCCESS(status);
2213 uregex_setUText(re, &ut1, &status);
2214 TEST_ASSERT_SUCCESS(status);
2215
2216 /* Find the first match "A" in the original string */
2217 TEST_ASSERT(uregex_findNext(re, &status));
2218 TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2219
2220 /* Move the string, kill the original string. */
2221 u_strcpy(movedStr, testStr);
2222 u_memset(testStr, 0, u_strlen(testStr));
2223 utext_openUChars(&ut2, movedStr, -1, &status);
2224 TEST_ASSERT_SUCCESS(status);
2225 uregex_refreshUText(re, &ut2, &status);
2226 TEST_ASSERT_SUCCESS(status);
2227
2228 /* Find the following two matches, now working in the moved string. */
2229 TEST_ASSERT(uregex_findNext(re, &status));
2230 TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2231 TEST_ASSERT(uregex_findNext(re, &status));
2232 TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2233 TEST_ASSERT(false == uregex_findNext(re, &status));
2234
2235 uregex_close(re);
2236 }
2237
2238
TestBug8421(void)2239 static void TestBug8421(void) {
2240 /* Bug 8421: setTimeLimit on a regular expression before setting text to be matched
2241 * was failing.
2242 */
2243 URegularExpression *re;
2244 UErrorCode status = U_ZERO_ERROR;
2245 int32_t limit = -1;
2246
2247 re = uregex_openC("abc", 0, 0, &status);
2248 TEST_ASSERT_SUCCESS(status);
2249
2250 limit = uregex_getTimeLimit(re, &status);
2251 TEST_ASSERT_SUCCESS(status);
2252 TEST_ASSERT(limit == 0);
2253
2254 uregex_setTimeLimit(re, 100, &status);
2255 TEST_ASSERT_SUCCESS(status);
2256 limit = uregex_getTimeLimit(re, &status);
2257 TEST_ASSERT_SUCCESS(status);
2258 TEST_ASSERT(limit == 100);
2259
2260 uregex_close(re);
2261 }
2262
FindCallback(const void * context,int64_t matchIndex)2263 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2264 // suppress compiler warnings about unused variables
2265 (void)context;
2266 (void)matchIndex;
2267 return false;
2268 }
2269
MatchCallback(const void * context,int32_t steps)2270 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2271 // suppress compiler warnings about unused variables
2272 (void)context;
2273 (void)steps;
2274 return false;
2275 }
2276
TestBug10815()2277 static void TestBug10815() {
2278 /* Bug 10815: uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2279 * when the callback function specified by uregex_setMatchCallback() returns false
2280 */
2281 URegularExpression *re;
2282 UErrorCode status = U_ZERO_ERROR;
2283 UChar text[100];
2284
2285
2286 // findNext() with a find progress callback function.
2287
2288 re = uregex_openC(".z", 0, 0, &status);
2289 TEST_ASSERT_SUCCESS(status);
2290
2291 u_uastrncpy(text, "Hello, World.", UPRV_LENGTHOF(text));
2292 uregex_setText(re, text, -1, &status);
2293 TEST_ASSERT_SUCCESS(status);
2294
2295 uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2296 TEST_ASSERT_SUCCESS(status);
2297
2298 uregex_findNext(re, &status);
2299 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2300
2301 uregex_close(re);
2302
2303 // findNext() with a match progress callback function.
2304
2305 status = U_ZERO_ERROR;
2306 re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2307 TEST_ASSERT_SUCCESS(status);
2308
2309 // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2310 // it will appear to be stuck in a (near) infinite loop.
2311 u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", UPRV_LENGTHOF(text));
2312 uregex_setText(re, text, -1, &status);
2313 TEST_ASSERT_SUCCESS(status);
2314
2315 uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2316 TEST_ASSERT_SUCCESS(status);
2317
2318 uregex_findNext(re, &status);
2319 TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2320
2321 uregex_close(re);
2322 }
2323
2324
2325 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
2326