• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2004-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
16 *   try to test the full functionality.  It just calls each function and verifies that it
17 *   works on a basic level.
18 *
19 *   More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25 
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35 
36 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
37 log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
38 
39 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
40 log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr);}}
41 
42 /*
43  *   TEST_SETUP and TEST_TEARDOWN
44  *         macros to handle the boilerplate around setting up regex test cases.
45  *         parameteres to setup:
46  *              pattern:     The regex pattern, a (char *) null terminated C string.
47  *              testString:  The string data, also a (char *) C string.
48  *              flags:       Regex flags to set when compiling the pattern
49  *
50  *         Put arbitrary test code between SETUP and TEARDOWN.
51  *         're" is the compiled, ready-to-go  regular expression.
52  */
53 #define TEST_SETUP(pattern, testString, flags) {  \
54     UChar   *srcString = NULL;  \
55     status = U_ZERO_ERROR; \
56     re = uregex_openC(pattern, flags, NULL, &status);  \
57     TEST_ASSERT_SUCCESS(status);   \
58     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
59     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
60     uregex_setText(re, srcString, -1, &status); \
61     TEST_ASSERT_SUCCESS(status);  \
62     if (U_SUCCESS(status)) {
63 
64 #define TEST_TEARDOWN  \
65     }  \
66     TEST_ASSERT_SUCCESS(status);  \
67     uregex_close(re);  \
68     free(srcString);   \
69     }
70 
71 
72 /**
73  * @param expected utf-8 array of bytes to be expected
74  */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)75 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
76      char     buf_inside_macro[120];
77      int32_t  len = (int32_t)strlen(expected);
78      UBool    success;
79      if (nulTerm) {
80          u_austrncpy(buf_inside_macro, (actual), len+1);
81          buf_inside_macro[len+2] = 0;
82          success = (strcmp((expected), buf_inside_macro) == 0);
83      } else {
84          u_austrncpy(buf_inside_macro, (actual), len);
85          buf_inside_macro[len+1] = 0;
86          success = (strncmp((expected), buf_inside_macro, len) == 0);
87      }
88      if (success == FALSE) {
89          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
90              file, line, (expected), buf_inside_macro);
91      }
92 }
93 
94 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
95 
96 
equals_utf8_utext(const char * utf8,UText * utext)97 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
98     int32_t u8i = 0;
99     UChar32 u8c = 0;
100     UChar32 utc = 0;
101     UBool   stringsEqual = TRUE;
102     utext_setNativeIndex(utext, 0);
103     for (;;) {
104         U8_NEXT_UNSAFE(utf8, u8i, u8c);
105         utc = utext_next32(utext);
106         if (u8c == 0 && utc == U_SENTINEL) {
107             break;
108         }
109         if (u8c != utc || u8c == 0) {
110             stringsEqual = FALSE;
111             break;
112         }
113     }
114     return stringsEqual;
115 }
116 
117 
test_assert_utext(const char * expected,UText * actual,const char * file,int line)118 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
119     utext_setNativeIndex(actual, 0);
120     if (!equals_utf8_utext(expected, actual)) {
121         UChar32 c;
122         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
123         c = utext_next32From(actual, 0);
124         while (c != U_SENTINEL) {
125             if (0x20<c && c <0x7e) {
126                 log_err("%c", c);
127             } else {
128                 log_err("%#x", c);
129             }
130             c = UTEXT_NEXT32(actual);
131         }
132         log_err("\"\n");
133     }
134 }
135 
136 /*
137  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
138  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
139  */
140 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
141 
testUTextEqual(UText * uta,UText * utb)142 static UBool testUTextEqual(UText *uta, UText *utb) {
143     UChar32 ca = 0;
144     UChar32 cb = 0;
145     utext_setNativeIndex(uta, 0);
146     utext_setNativeIndex(utb, 0);
147     do {
148         ca = utext_next32(uta);
149         cb = utext_next32(utb);
150         if (ca != cb) {
151             break;
152         }
153     } while (ca != U_SENTINEL);
154     return ca == cb;
155 }
156 
157 
158 
159 
160 static void TestRegexCAPI(void);
161 static void TestBug4315(void);
162 static void TestUTextAPI(void);
163 static void TestRefreshInput(void);
164 static void TestBug8421(void);
165 static void TestBug10815(void);
166 
167 void addURegexTest(TestNode** root);
168 
addURegexTest(TestNode ** root)169 void addURegexTest(TestNode** root)
170 {
171     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
172     addTest(root, &TestBug4315,   "regex/TestBug4315");
173     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
174     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
175     addTest(root, &TestBug8421,   "regex/TestBug8421");
176     addTest(root, &TestBug10815,   "regex/TestBug10815");
177 }
178 
179 /*
180  * Call back function and context struct used for testing
181  *    regular expression user callbacks.  This test is mostly the same as
182  *   the corresponding C++ test in intltest.
183  */
184 typedef struct callBackContext {
185     int32_t          maxCalls;
186     int32_t          numCalls;
187     int32_t          lastSteps;
188 } callBackContext;
189 
190 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)191 TestCallbackFn(const void *context, int32_t steps) {
192   callBackContext  *info = (callBackContext *)context;
193   if (info->lastSteps+1 != steps) {
194       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
195   }
196   info->lastSteps = steps;
197   info->numCalls++;
198   return (info->numCalls < info->maxCalls);
199 }
200 
201 /*
202  *   Regular Expression C API Tests
203  */
TestRegexCAPI(void)204 static void TestRegexCAPI(void) {
205     UErrorCode           status = U_ZERO_ERROR;
206     URegularExpression  *re;
207     UChar                pat[200];
208     UChar               *minus1;
209 
210     memset(&minus1, -1, sizeof(minus1));
211 
212     /* Mimimalist open/close */
213     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
214     re = uregex_open(pat, -1, 0, 0, &status);
215     if (U_FAILURE(status)) {
216          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
217          return;
218     }
219     uregex_close(re);
220 
221     /* Open with all flag values set */
222     status = U_ZERO_ERROR;
223     re = uregex_open(pat, -1,
224         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
225         0, &status);
226     TEST_ASSERT_SUCCESS(status);
227     uregex_close(re);
228 
229     /* Open with an invalid flag */
230     status = U_ZERO_ERROR;
231     re = uregex_open(pat, -1, 0x40000000, 0, &status);
232     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
233     uregex_close(re);
234 
235     /* Open with an unimplemented flag */
236     status = U_ZERO_ERROR;
237     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
238     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
239     uregex_close(re);
240 
241     /* openC with an invalid parameter */
242     status = U_ZERO_ERROR;
243     re = uregex_openC(NULL,
244         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
245     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
246 
247     /* openC with an invalid parameter */
248     status = U_USELESS_COLLATOR_ERROR;
249     re = uregex_openC(NULL,
250         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
251     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
252 
253     /* openC   open from a C string */
254     {
255         const UChar   *p;
256         int32_t  len;
257         status = U_ZERO_ERROR;
258         re = uregex_openC("abc*", 0, 0, &status);
259         TEST_ASSERT_SUCCESS(status);
260         p = uregex_pattern(re, &len, &status);
261         TEST_ASSERT_SUCCESS(status);
262 
263         /* The TEST_ASSERT_SUCCESS above should change too... */
264         if(U_SUCCESS(status)) {
265             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
266             TEST_ASSERT(u_strcmp(pat, p) == 0);
267             TEST_ASSERT(len==(int32_t)strlen("abc*"));
268         }
269 
270         uregex_close(re);
271 
272         /*  TODO:  Open with ParseError parameter */
273     }
274 
275     /*
276      *  clone
277      */
278     {
279         URegularExpression *clone1;
280         URegularExpression *clone2;
281         URegularExpression *clone3;
282         UChar  testString1[30];
283         UChar  testString2[30];
284         UBool  result;
285 
286 
287         status = U_ZERO_ERROR;
288         re = uregex_openC("abc*", 0, 0, &status);
289         TEST_ASSERT_SUCCESS(status);
290         clone1 = uregex_clone(re, &status);
291         TEST_ASSERT_SUCCESS(status);
292         TEST_ASSERT(clone1 != NULL);
293 
294         status = U_ZERO_ERROR;
295         clone2 = uregex_clone(re, &status);
296         TEST_ASSERT_SUCCESS(status);
297         TEST_ASSERT(clone2 != NULL);
298         uregex_close(re);
299 
300         status = U_ZERO_ERROR;
301         clone3 = uregex_clone(clone2, &status);
302         TEST_ASSERT_SUCCESS(status);
303         TEST_ASSERT(clone3 != NULL);
304 
305         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
306         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
307 
308         status = U_ZERO_ERROR;
309         uregex_setText(clone1, testString1, -1, &status);
310         TEST_ASSERT_SUCCESS(status);
311         result = uregex_lookingAt(clone1, 0, &status);
312         TEST_ASSERT_SUCCESS(status);
313         TEST_ASSERT(result==TRUE);
314 
315         status = U_ZERO_ERROR;
316         uregex_setText(clone2, testString2, -1, &status);
317         TEST_ASSERT_SUCCESS(status);
318         result = uregex_lookingAt(clone2, 0, &status);
319         TEST_ASSERT_SUCCESS(status);
320         TEST_ASSERT(result==FALSE);
321         result = uregex_find(clone2, 0, &status);
322         TEST_ASSERT_SUCCESS(status);
323         TEST_ASSERT(result==TRUE);
324 
325         uregex_close(clone1);
326         uregex_close(clone2);
327         uregex_close(clone3);
328 
329     }
330 
331     /*
332      *  pattern()
333     */
334     {
335         const UChar  *resultPat;
336         int32_t       resultLen;
337         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
338         status = U_ZERO_ERROR;
339         re = uregex_open(pat, -1, 0, NULL, &status);
340         resultPat = uregex_pattern(re, &resultLen, &status);
341         TEST_ASSERT_SUCCESS(status);
342 
343         /* The TEST_ASSERT_SUCCESS above should change too... */
344         if (U_SUCCESS(status)) {
345             TEST_ASSERT(resultLen == -1);
346             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
347         }
348 
349         uregex_close(re);
350 
351         status = U_ZERO_ERROR;
352         re = uregex_open(pat, 3, 0, NULL, &status);
353         resultPat = uregex_pattern(re, &resultLen, &status);
354         TEST_ASSERT_SUCCESS(status);
355         TEST_ASSERT_SUCCESS(status);
356 
357         /* The TEST_ASSERT_SUCCESS above should change too... */
358         if (U_SUCCESS(status)) {
359             TEST_ASSERT(resultLen == 3);
360             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
361             TEST_ASSERT(u_strlen(resultPat) == 3);
362         }
363 
364         uregex_close(re);
365     }
366 
367     /*
368      *  flags()
369      */
370     {
371         int32_t  t;
372 
373         status = U_ZERO_ERROR;
374         re = uregex_open(pat, -1, 0, NULL, &status);
375         t  = uregex_flags(re, &status);
376         TEST_ASSERT_SUCCESS(status);
377         TEST_ASSERT(t == 0);
378         uregex_close(re);
379 
380         status = U_ZERO_ERROR;
381         re = uregex_open(pat, -1, 0, NULL, &status);
382         t  = uregex_flags(re, &status);
383         TEST_ASSERT_SUCCESS(status);
384         TEST_ASSERT(t == 0);
385         uregex_close(re);
386 
387         status = U_ZERO_ERROR;
388         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
389         t  = uregex_flags(re, &status);
390         TEST_ASSERT_SUCCESS(status);
391         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
392         uregex_close(re);
393     }
394 
395     /*
396      *  setText() and lookingAt()
397      */
398     {
399         UChar  text1[50];
400         UChar  text2[50];
401         UBool  result;
402 
403         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
404         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
405         status = U_ZERO_ERROR;
406         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
407         re = uregex_open(pat, -1, 0, NULL, &status);
408         TEST_ASSERT_SUCCESS(status);
409 
410         /* Operation before doing a setText should fail... */
411         status = U_ZERO_ERROR;
412         uregex_lookingAt(re, 0, &status);
413         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
414 
415         status = U_ZERO_ERROR;
416         uregex_setText(re, text1, -1, &status);
417         result = uregex_lookingAt(re, 0, &status);
418         TEST_ASSERT(result == TRUE);
419         TEST_ASSERT_SUCCESS(status);
420 
421         status = U_ZERO_ERROR;
422         uregex_setText(re, text2, -1, &status);
423         result = uregex_lookingAt(re, 0, &status);
424         TEST_ASSERT(result == FALSE);
425         TEST_ASSERT_SUCCESS(status);
426 
427         status = U_ZERO_ERROR;
428         uregex_setText(re, text1, -1, &status);
429         result = uregex_lookingAt(re, 0, &status);
430         TEST_ASSERT(result == TRUE);
431         TEST_ASSERT_SUCCESS(status);
432 
433         status = U_ZERO_ERROR;
434         uregex_setText(re, text1, 5, &status);
435         result = uregex_lookingAt(re, 0, &status);
436         TEST_ASSERT(result == FALSE);
437         TEST_ASSERT_SUCCESS(status);
438 
439         status = U_ZERO_ERROR;
440         uregex_setText(re, text1, 6, &status);
441         result = uregex_lookingAt(re, 0, &status);
442         TEST_ASSERT(result == TRUE);
443         TEST_ASSERT_SUCCESS(status);
444 
445         uregex_close(re);
446     }
447 
448 
449     /*
450      *  getText()
451      */
452     {
453         UChar    text1[50];
454         UChar    text2[50];
455         const UChar   *result;
456         int32_t  textLength;
457 
458         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
459         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
460         status = U_ZERO_ERROR;
461         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
462         re = uregex_open(pat, -1, 0, NULL, &status);
463 
464         uregex_setText(re, text1, -1, &status);
465         result = uregex_getText(re, &textLength, &status);
466         TEST_ASSERT(result == text1);
467         TEST_ASSERT(textLength == -1);
468         TEST_ASSERT_SUCCESS(status);
469 
470         status = U_ZERO_ERROR;
471         uregex_setText(re, text2, 7, &status);
472         result = uregex_getText(re, &textLength, &status);
473         TEST_ASSERT(result == text2);
474         TEST_ASSERT(textLength == 7);
475         TEST_ASSERT_SUCCESS(status);
476 
477         status = U_ZERO_ERROR;
478         uregex_setText(re, text2, 4, &status);
479         result = uregex_getText(re, &textLength, &status);
480         TEST_ASSERT(result == text2);
481         TEST_ASSERT(textLength == 4);
482         TEST_ASSERT_SUCCESS(status);
483         uregex_close(re);
484     }
485 
486     /*
487      *  matches()
488      */
489     {
490         UChar   text1[50];
491         UBool   result;
492         int     len;
493         UChar   nullString[] = {0,0,0};
494 
495         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
496         status = U_ZERO_ERROR;
497         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
498         re = uregex_open(pat, -1, 0, NULL, &status);
499 
500         uregex_setText(re, text1, -1, &status);
501         result = uregex_matches(re, 0, &status);
502         TEST_ASSERT(result == FALSE);
503         TEST_ASSERT_SUCCESS(status);
504 
505         status = U_ZERO_ERROR;
506         uregex_setText(re, text1, 6, &status);
507         result = uregex_matches(re, 0, &status);
508         TEST_ASSERT(result == TRUE);
509         TEST_ASSERT_SUCCESS(status);
510 
511         status = U_ZERO_ERROR;
512         uregex_setText(re, text1, 6, &status);
513         result = uregex_matches(re, 1, &status);
514         TEST_ASSERT(result == FALSE);
515         TEST_ASSERT_SUCCESS(status);
516         uregex_close(re);
517 
518         status = U_ZERO_ERROR;
519         re = uregex_openC(".?", 0, NULL, &status);
520         uregex_setText(re, text1, -1, &status);
521         len = u_strlen(text1);
522         result = uregex_matches(re, len, &status);
523         TEST_ASSERT(result == TRUE);
524         TEST_ASSERT_SUCCESS(status);
525 
526         status = U_ZERO_ERROR;
527         uregex_setText(re, nullString, -1, &status);
528         TEST_ASSERT_SUCCESS(status);
529         result = uregex_matches(re, 0, &status);
530         TEST_ASSERT(result == TRUE);
531         TEST_ASSERT_SUCCESS(status);
532         uregex_close(re);
533     }
534 
535 
536     /*
537      *  lookingAt()    Used in setText test.
538      */
539 
540 
541     /*
542      *  find(), findNext, start, end, reset
543      */
544     {
545         UChar    text1[50];
546         UBool    result;
547         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
548         status = U_ZERO_ERROR;
549         re = uregex_openC("rx", 0, NULL, &status);
550 
551         uregex_setText(re, text1, -1, &status);
552         result = uregex_find(re, 0, &status);
553         TEST_ASSERT(result == TRUE);
554         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
555         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
556         TEST_ASSERT_SUCCESS(status);
557 
558         result = uregex_find(re, 9, &status);
559         TEST_ASSERT(result == TRUE);
560         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
561         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
562         TEST_ASSERT_SUCCESS(status);
563 
564         result = uregex_find(re, 14, &status);
565         TEST_ASSERT(result == FALSE);
566         TEST_ASSERT_SUCCESS(status);
567 
568         status = U_ZERO_ERROR;
569         uregex_reset(re, 0, &status);
570 
571         result = uregex_findNext(re, &status);
572         TEST_ASSERT(result == TRUE);
573         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
574         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
575         TEST_ASSERT_SUCCESS(status);
576 
577         result = uregex_findNext(re, &status);
578         TEST_ASSERT(result == TRUE);
579         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
580         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
581         TEST_ASSERT_SUCCESS(status);
582 
583         status = U_ZERO_ERROR;
584         uregex_reset(re, 12, &status);
585 
586         result = uregex_findNext(re, &status);
587         TEST_ASSERT(result == TRUE);
588         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
589         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
590         TEST_ASSERT_SUCCESS(status);
591 
592         result = uregex_findNext(re, &status);
593         TEST_ASSERT(result == FALSE);
594         TEST_ASSERT_SUCCESS(status);
595 
596         uregex_close(re);
597     }
598 
599     /*
600      *  groupCount
601      */
602     {
603         int32_t result;
604 
605         status = U_ZERO_ERROR;
606         re = uregex_openC("abc", 0, NULL, &status);
607         result = uregex_groupCount(re, &status);
608         TEST_ASSERT_SUCCESS(status);
609         TEST_ASSERT(result == 0);
610         uregex_close(re);
611 
612         status = U_ZERO_ERROR;
613         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
614         result = uregex_groupCount(re, &status);
615         TEST_ASSERT_SUCCESS(status);
616         TEST_ASSERT(result == 3);
617         uregex_close(re);
618 
619     }
620 
621 
622     /*
623      *  group()
624      */
625     {
626         UChar    text1[80];
627         UChar    buf[80];
628         UBool    result;
629         int32_t  resultSz;
630         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
631 
632         status = U_ZERO_ERROR;
633         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
634         TEST_ASSERT_SUCCESS(status);
635 
636 
637         uregex_setText(re, text1, -1, &status);
638         result = uregex_find(re, 0, &status);
639         TEST_ASSERT(result==TRUE);
640 
641         /*  Capture Group 0, the full match.  Should succeed.  */
642         status = U_ZERO_ERROR;
643         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
644         TEST_ASSERT_SUCCESS(status);
645         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
646         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
647 
648         /*  Capture group #1.  Should succeed. */
649         status = U_ZERO_ERROR;
650         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
651         TEST_ASSERT_SUCCESS(status);
652         TEST_ASSERT_STRING(" interior ", buf, TRUE);
653         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
654 
655         /*  Capture group out of range.  Error. */
656         status = U_ZERO_ERROR;
657         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
658         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
659 
660         /* NULL buffer, pure pre-flight */
661         status = U_ZERO_ERROR;
662         resultSz = uregex_group(re, 0, NULL, 0, &status);
663         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
664         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
665 
666         /* Too small buffer, truncated string */
667         status = U_ZERO_ERROR;
668         memset(buf, -1, sizeof(buf));
669         resultSz = uregex_group(re, 0, buf, 5, &status);
670         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
671         TEST_ASSERT_STRING("abc i", buf, FALSE);
672         TEST_ASSERT(buf[5] == (UChar)0xffff);
673         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674 
675         /* Output string just fits buffer, no NUL term. */
676         status = U_ZERO_ERROR;
677         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
678         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
679         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
680         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
681         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
682 
683         uregex_close(re);
684 
685     }
686 
687     /*
688      *  Regions
689      */
690 
691 
692         /* SetRegion(), getRegion() do something  */
693         TEST_SETUP(".*", "0123456789ABCDEF", 0)
694         UChar resultString[40];
695         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
696         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
697         uregex_setRegion(re, 3, 6, &status);
698         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
699         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
700         TEST_ASSERT(uregex_findNext(re, &status));
701         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3)
702         TEST_ASSERT_STRING("345", resultString, TRUE);
703         TEST_TEARDOWN;
704 
705         /* find(start=-1) uses regions   */
706         TEST_SETUP(".*", "0123456789ABCDEF", 0);
707         uregex_setRegion(re, 4, 6, &status);
708         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
709         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
710         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
711         TEST_TEARDOWN;
712 
713         /* find (start >=0) does not use regions   */
714         TEST_SETUP(".*", "0123456789ABCDEF", 0);
715         uregex_setRegion(re, 4, 6, &status);
716         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
717         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
718         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
719         TEST_TEARDOWN;
720 
721         /* findNext() obeys regions    */
722         TEST_SETUP(".", "0123456789ABCDEF", 0);
723         uregex_setRegion(re, 4, 6, &status);
724         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
725         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
726         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
727         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
728         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
729         TEST_TEARDOWN;
730 
731         /* matches(start=-1) uses regions                                           */
732         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
733         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
734         uregex_setRegion(re, 4, 6, &status);
735         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
736         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
737         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
738         TEST_TEARDOWN;
739 
740         /* matches (start >=0) does not use regions       */
741         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
742         uregex_setRegion(re, 4, 6, &status);
743         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
744         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
745         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
746         TEST_TEARDOWN;
747 
748         /* lookingAt(start=-1) uses regions                                         */
749         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
750         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
751         uregex_setRegion(re, 4, 6, &status);
752         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
753         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
754         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
755         TEST_TEARDOWN;
756 
757         /* lookingAt (start >=0) does not use regions  */
758         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
759         uregex_setRegion(re, 4, 6, &status);
760         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
761         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
762         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
763         TEST_TEARDOWN;
764 
765         /* hitEnd()       */
766         TEST_SETUP("[a-f]*", "abcdefghij", 0);
767         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
768         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
769         TEST_TEARDOWN;
770 
771         TEST_SETUP("[a-f]*", "abcdef", 0);
772         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
773         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
774         TEST_TEARDOWN;
775 
776         /* requireEnd   */
777         TEST_SETUP("abcd", "abcd", 0);
778         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
779         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
780         TEST_TEARDOWN;
781 
782         TEST_SETUP("abcd$", "abcd", 0);
783         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
784         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
785         TEST_TEARDOWN;
786 
787         /* anchoringBounds        */
788         TEST_SETUP("abc$", "abcdef", 0);
789         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
790         uregex_useAnchoringBounds(re, FALSE, &status);
791         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
792 
793         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
794         uregex_useAnchoringBounds(re, TRUE, &status);
795         uregex_setRegion(re, 0, 3, &status);
796         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
797         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
798         TEST_TEARDOWN;
799 
800         /* Transparent Bounds      */
801         TEST_SETUP("abc(?=def)", "abcdef", 0);
802         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
803         uregex_useTransparentBounds(re, TRUE, &status);
804         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
805 
806         uregex_useTransparentBounds(re, FALSE, &status);
807         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
808         uregex_setRegion(re, 0, 3, &status);
809         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
810         uregex_useTransparentBounds(re, TRUE, &status);
811         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
812         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
813         TEST_TEARDOWN;
814 
815 
816     /*
817      *  replaceFirst()
818      */
819     {
820         UChar    text1[80];
821         UChar    text2[80];
822         UChar    replText[80];
823         UChar    buf[80];
824         int32_t  resultSz;
825         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
826         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
827         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
828 
829         status = U_ZERO_ERROR;
830         re = uregex_openC("x(.*?)x", 0, NULL, &status);
831         TEST_ASSERT_SUCCESS(status);
832 
833         /*  Normal case, with match */
834         uregex_setText(re, text1, -1, &status);
835         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
836         TEST_ASSERT_SUCCESS(status);
837         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
838         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
839 
840         /* No match.  Text should copy to output with no changes.  */
841         status = U_ZERO_ERROR;
842         uregex_setText(re, text2, -1, &status);
843         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
844         TEST_ASSERT_SUCCESS(status);
845         TEST_ASSERT_STRING("No match here.", buf, TRUE);
846         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
847 
848         /*  Match, output just fills buffer, no termination warning. */
849         status = U_ZERO_ERROR;
850         uregex_setText(re, text1, -1, &status);
851         memset(buf, -1, sizeof(buf));
852         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
853         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
854         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
855         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
856         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
857 
858         /* Do the replaceFirst again, without first resetting anything.
859          *  Should give the same results.
860          */
861         status = U_ZERO_ERROR;
862         memset(buf, -1, sizeof(buf));
863         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
864         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
865         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
866         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
867         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
868 
869         /* NULL buffer, zero buffer length */
870         status = U_ZERO_ERROR;
871         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
872         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
873         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
874 
875         /* Buffer too small by one */
876         status = U_ZERO_ERROR;
877         memset(buf, -1, sizeof(buf));
878         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
879         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
880         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
881         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
882         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
883 
884         uregex_close(re);
885     }
886 
887 
888     /*
889      *  replaceAll()
890      */
891     {
892         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
893         UChar    text2[80];          /*  "No match Here"           */
894         UChar    replText[80];       /*  "<$1>"                    */
895         UChar    replText2[80];      /*  "<<$1>>"                  */
896         const char * pattern = "x(.*?)x";
897         const char * expectedResult = "Replace <aa> <1> <...>.";
898         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
899         UChar    buf[80];
900         int32_t  resultSize;
901         int32_t  expectedResultSize;
902         int32_t  expectedResultSize2;
903         int32_t  i;
904 
905         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
906         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
907         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
908         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
909         expectedResultSize = strlen(expectedResult);
910         expectedResultSize2 = strlen(expectedResult2);
911 
912         status = U_ZERO_ERROR;
913         re = uregex_openC(pattern, 0, NULL, &status);
914         TEST_ASSERT_SUCCESS(status);
915 
916         /*  Normal case, with match */
917         uregex_setText(re, text1, -1, &status);
918         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
919         TEST_ASSERT_SUCCESS(status);
920         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
921         TEST_ASSERT(resultSize == expectedResultSize);
922 
923         /* No match.  Text should copy to output with no changes.  */
924         status = U_ZERO_ERROR;
925         uregex_setText(re, text2, -1, &status);
926         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
927         TEST_ASSERT_SUCCESS(status);
928         TEST_ASSERT_STRING("No match here.", buf, TRUE);
929         TEST_ASSERT(resultSize == u_strlen(text2));
930 
931         /*  Match, output just fills buffer, no termination warning. */
932         status = U_ZERO_ERROR;
933         uregex_setText(re, text1, -1, &status);
934         memset(buf, -1, sizeof(buf));
935         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
936         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
937         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
938         TEST_ASSERT(resultSize == expectedResultSize);
939         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
940 
941         /* Do the replaceFirst again, without first resetting anything.
942          *  Should give the same results.
943          */
944         status = U_ZERO_ERROR;
945         memset(buf, -1, sizeof(buf));
946         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
947         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
948         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
949         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
950         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
951 
952         /* NULL buffer, zero buffer length */
953         status = U_ZERO_ERROR;
954         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
955         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
956         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
957 
958         /* Buffer too small.  Try every size, which will tickle edge cases
959          * in uregex_appendReplacement (used by replaceAll)   */
960         for (i=0; i<expectedResultSize; i++) {
961             char  expected[80];
962             status = U_ZERO_ERROR;
963             memset(buf, -1, sizeof(buf));
964             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
965             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
966             strcpy(expected, expectedResult);
967             expected[i] = 0;
968             TEST_ASSERT_STRING(expected, buf, FALSE);
969             TEST_ASSERT(resultSize == expectedResultSize);
970             TEST_ASSERT(buf[i] == (UChar)0xffff);
971         }
972 
973         /* Buffer too small.  Same as previous test, except this time the replacement
974          * text is longer than the match capture group, making the length of the complete
975          * replacement longer than the original string.
976          */
977         for (i=0; i<expectedResultSize2; i++) {
978             char  expected[80];
979             status = U_ZERO_ERROR;
980             memset(buf, -1, sizeof(buf));
981             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
982             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
983             strcpy(expected, expectedResult2);
984             expected[i] = 0;
985             TEST_ASSERT_STRING(expected, buf, FALSE);
986             TEST_ASSERT(resultSize == expectedResultSize2);
987             TEST_ASSERT(buf[i] == (UChar)0xffff);
988         }
989 
990 
991         uregex_close(re);
992     }
993 
994 
995     /*
996      *  appendReplacement()
997      */
998     {
999         UChar    text[100];
1000         UChar    repl[100];
1001         UChar    buf[100];
1002         UChar   *bufPtr;
1003         int32_t  bufCap;
1004 
1005 
1006         status = U_ZERO_ERROR;
1007         re = uregex_openC(".*", 0, 0, &status);
1008         TEST_ASSERT_SUCCESS(status);
1009 
1010         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1011         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1012         uregex_setText(re, text, -1, &status);
1013 
1014         /* match covers whole target string */
1015         uregex_find(re, 0, &status);
1016         TEST_ASSERT_SUCCESS(status);
1017         bufPtr = buf;
1018         bufCap = UPRV_LENGTHOF(buf);
1019         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1020         TEST_ASSERT_SUCCESS(status);
1021         TEST_ASSERT_STRING("some other", buf, TRUE);
1022 
1023         /* Match has \u \U escapes */
1024         uregex_find(re, 0, &status);
1025         TEST_ASSERT_SUCCESS(status);
1026         bufPtr = buf;
1027         bufCap = UPRV_LENGTHOF(buf);
1028         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1029         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1030         TEST_ASSERT_SUCCESS(status);
1031         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1032 
1033         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1034         status = U_ZERO_ERROR;
1035         uregex_find(re, 0, &status);
1036         TEST_ASSERT_SUCCESS(status);
1037         bufPtr = buf;
1038         status = U_BUFFER_OVERFLOW_ERROR;
1039         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1040         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1041 
1042         uregex_close(re);
1043     }
1044 
1045 
1046     /*
1047      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1048      */
1049 
1050     /*
1051      *  split()
1052      */
1053     {
1054         UChar    textToSplit[80];
1055         UChar    text2[80];
1056         UChar    buf[200];
1057         UChar    *fields[10];
1058         int32_t  numFields;
1059         int32_t  requiredCapacity;
1060         int32_t  spaceNeeded;
1061         int32_t  sz;
1062 
1063         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1064         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1065 
1066         status = U_ZERO_ERROR;
1067         re = uregex_openC(":", 0, NULL, &status);
1068 
1069 
1070         /*  Simple split */
1071 
1072         uregex_setText(re, textToSplit, -1, &status);
1073         TEST_ASSERT_SUCCESS(status);
1074 
1075         /* The TEST_ASSERT_SUCCESS call above should change too... */
1076         if (U_SUCCESS(status)) {
1077             memset(fields, -1, sizeof(fields));
1078             numFields =
1079                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1080             TEST_ASSERT_SUCCESS(status);
1081 
1082             /* The TEST_ASSERT_SUCCESS call above should change too... */
1083             if(U_SUCCESS(status)) {
1084                 TEST_ASSERT(numFields == 3);
1085                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1086                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1087                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1088                 TEST_ASSERT(fields[3] == NULL);
1089 
1090                 spaceNeeded = u_strlen(textToSplit) -
1091                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1092                             numFields;          /* Each field gets a NUL terminator */
1093 
1094                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1095             }
1096         }
1097 
1098         uregex_close(re);
1099 
1100 
1101         /*  Split with too few output strings available */
1102         status = U_ZERO_ERROR;
1103         re = uregex_openC(":", 0, NULL, &status);
1104         uregex_setText(re, textToSplit, -1, &status);
1105         TEST_ASSERT_SUCCESS(status);
1106 
1107         /* The TEST_ASSERT_SUCCESS call above should change too... */
1108         if(U_SUCCESS(status)) {
1109             memset(fields, -1, sizeof(fields));
1110             numFields =
1111                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1112             TEST_ASSERT_SUCCESS(status);
1113 
1114             /* The TEST_ASSERT_SUCCESS call above should change too... */
1115             if(U_SUCCESS(status)) {
1116                 TEST_ASSERT(numFields == 2);
1117                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1118                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1119                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1120 
1121                 spaceNeeded = u_strlen(textToSplit) -
1122                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1123                             numFields;          /* Each field gets a NUL terminator */
1124 
1125                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1126 
1127                 /* Split with a range of output buffer sizes.  */
1128                 spaceNeeded = u_strlen(textToSplit) -
1129                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1130                     numFields;          /* Each field gets a NUL terminator */
1131 
1132                 for (sz=0; sz < spaceNeeded+1; sz++) {
1133                     memset(fields, -1, sizeof(fields));
1134                     status = U_ZERO_ERROR;
1135                     numFields =
1136                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1137                     if (sz >= spaceNeeded) {
1138                         TEST_ASSERT_SUCCESS(status);
1139                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1140                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1141                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1142                     } else {
1143                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1144                     }
1145                     TEST_ASSERT(numFields == 3);
1146                     TEST_ASSERT(fields[3] == NULL);
1147                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1148                 }
1149             }
1150         }
1151 
1152         uregex_close(re);
1153     }
1154 
1155 
1156 
1157 
1158     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1159      *                   comes out as additional fields.  */
1160     {
1161         UChar    textToSplit[80];
1162         UChar    buf[200];
1163         UChar    *fields[10];
1164         int32_t  numFields;
1165         int32_t  requiredCapacity;
1166         int32_t  spaceNeeded;
1167         int32_t  sz;
1168 
1169         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1170 
1171         status = U_ZERO_ERROR;
1172         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1173 
1174         uregex_setText(re, textToSplit, -1, &status);
1175         TEST_ASSERT_SUCCESS(status);
1176 
1177         /* The TEST_ASSERT_SUCCESS call above should change too... */
1178         if(U_SUCCESS(status)) {
1179             memset(fields, -1, sizeof(fields));
1180             numFields =
1181                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1182             TEST_ASSERT_SUCCESS(status);
1183 
1184             /* The TEST_ASSERT_SUCCESS call above should change too... */
1185             if(U_SUCCESS(status)) {
1186                 TEST_ASSERT(numFields == 5);
1187                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1188                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1189                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1190                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1191                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1192                 TEST_ASSERT(fields[5] == NULL);
1193                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1194                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1195             }
1196         }
1197 
1198         /*  Split with too few output strings available (2) */
1199         status = U_ZERO_ERROR;
1200         memset(fields, -1, sizeof(fields));
1201         numFields =
1202             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1203         TEST_ASSERT_SUCCESS(status);
1204 
1205         /* The TEST_ASSERT_SUCCESS call above should change too... */
1206         if(U_SUCCESS(status)) {
1207             TEST_ASSERT(numFields == 2);
1208             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1209             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1210             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1211 
1212             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1213             TEST_ASSERT(spaceNeeded == requiredCapacity);
1214         }
1215 
1216         /*  Split with too few output strings available (3) */
1217         status = U_ZERO_ERROR;
1218         memset(fields, -1, sizeof(fields));
1219         numFields =
1220             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1221         TEST_ASSERT_SUCCESS(status);
1222 
1223         /* The TEST_ASSERT_SUCCESS call above should change too... */
1224         if(U_SUCCESS(status)) {
1225             TEST_ASSERT(numFields == 3);
1226             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1227             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1228             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1229             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1230 
1231             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1232             TEST_ASSERT(spaceNeeded == requiredCapacity);
1233         }
1234 
1235         /*  Split with just enough output strings available (5) */
1236         status = U_ZERO_ERROR;
1237         memset(fields, -1, sizeof(fields));
1238         numFields =
1239             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1240         TEST_ASSERT_SUCCESS(status);
1241 
1242         /* The TEST_ASSERT_SUCCESS call above should change too... */
1243         if(U_SUCCESS(status)) {
1244             TEST_ASSERT(numFields == 5);
1245             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1246             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1247             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1248             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1249             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1250             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1251 
1252             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1253             TEST_ASSERT(spaceNeeded == requiredCapacity);
1254         }
1255 
1256         /* Split, end of text is a field delimiter.   */
1257         status = U_ZERO_ERROR;
1258         sz = strlen("first <tag-a> second<tag-b>");
1259         uregex_setText(re, textToSplit, sz, &status);
1260         TEST_ASSERT_SUCCESS(status);
1261 
1262         /* The TEST_ASSERT_SUCCESS call above should change too... */
1263         if(U_SUCCESS(status)) {
1264             memset(fields, -1, sizeof(fields));
1265             numFields =
1266                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1267             TEST_ASSERT_SUCCESS(status);
1268 
1269             /* The TEST_ASSERT_SUCCESS call above should change too... */
1270             if(U_SUCCESS(status)) {
1271                 TEST_ASSERT(numFields == 5);
1272                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1273                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1274                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1275                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1276                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1277                 TEST_ASSERT(fields[5] == NULL);
1278                 TEST_ASSERT(fields[8] == NULL);
1279                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1280                 spaceNeeded = strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1281                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1282             }
1283         }
1284 
1285         uregex_close(re);
1286     }
1287 
1288     /*
1289      * set/getTimeLimit
1290      */
1291      TEST_SETUP("abc$", "abcdef", 0);
1292      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1293      uregex_setTimeLimit(re, 1000, &status);
1294      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1295      TEST_ASSERT_SUCCESS(status);
1296      uregex_setTimeLimit(re, -1, &status);
1297      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1298      status = U_ZERO_ERROR;
1299      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1300      TEST_TEARDOWN;
1301 
1302      /*
1303       * set/get Stack Limit
1304       */
1305      TEST_SETUP("abc$", "abcdef", 0);
1306      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1307      uregex_setStackLimit(re, 40000, &status);
1308      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1309      TEST_ASSERT_SUCCESS(status);
1310      uregex_setStackLimit(re, -1, &status);
1311      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1312      status = U_ZERO_ERROR;
1313      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1314      TEST_TEARDOWN;
1315 
1316 
1317      /*
1318       * Get/Set callback functions
1319       *     This test is copied from intltest regex/Callbacks
1320       *     The pattern and test data will run long enough to cause the callback
1321       *       to be invoked.  The nested '+' operators give exponential time
1322       *       behavior with increasing string length.
1323       */
1324      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1325      callBackContext cbInfo = {4, 0, 0};
1326      const void     *pContext   = &cbInfo;
1327      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1328 
1329      /*  Getting the callback fn when it hasn't been set must return NULL  */
1330      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1331      TEST_ASSERT_SUCCESS(status);
1332      TEST_ASSERT(returnedFn == NULL);
1333      TEST_ASSERT(pContext == NULL);
1334 
1335      /* Set thecallback and do a match.                                   */
1336      /* The callback function should record that it has been called.      */
1337      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1338      TEST_ASSERT_SUCCESS(status);
1339      TEST_ASSERT(cbInfo.numCalls == 0);
1340      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1341      TEST_ASSERT_SUCCESS(status);
1342      TEST_ASSERT(cbInfo.numCalls > 0);
1343 
1344      /* Getting the callback should return the values that were set above.  */
1345      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1346      TEST_ASSERT(returnedFn == &TestCallbackFn);
1347      TEST_ASSERT(pContext == &cbInfo);
1348 
1349      TEST_TEARDOWN;
1350 }
1351 
1352 
1353 
TestBug4315(void)1354 static void TestBug4315(void) {
1355     UErrorCode      theICUError = U_ZERO_ERROR;
1356     URegularExpression *theRegEx;
1357     UChar           *textBuff;
1358     const char      *thePattern;
1359     UChar            theString[100];
1360     UChar           *destFields[24];
1361     int32_t         neededLength1;
1362     int32_t         neededLength2;
1363 
1364     int32_t         wordCount = 0;
1365     int32_t         destFieldsSize = 24;
1366 
1367     thePattern  = "ck ";
1368     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1369 
1370     /* open a regex */
1371     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1372     TEST_ASSERT_SUCCESS(theICUError);
1373 
1374     /* set the input string */
1375     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1376     TEST_ASSERT_SUCCESS(theICUError);
1377 
1378     /* split */
1379     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1380      *  error occurs! */
1381     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1382         destFieldsSize, &theICUError);
1383 
1384     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1385     TEST_ASSERT(wordCount==3);
1386 
1387     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1388     {
1389         theICUError = U_ZERO_ERROR;
1390         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1391         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1392             destFields, destFieldsSize, &theICUError);
1393         TEST_ASSERT(wordCount==3);
1394         TEST_ASSERT_SUCCESS(theICUError);
1395         TEST_ASSERT(neededLength1 == neededLength2);
1396         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1397         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1398         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1399         TEST_ASSERT(destFields[3] == NULL);
1400         free(textBuff);
1401     }
1402     uregex_close(theRegEx);
1403 }
1404 
1405 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1406 static void TestUTextAPI(void) {
1407     UErrorCode           status = U_ZERO_ERROR;
1408     URegularExpression  *re;
1409     UText                patternText = UTEXT_INITIALIZER;
1410     UChar                pat[200];
1411     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1412 
1413     /* Mimimalist open/close */
1414     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1415     re = uregex_openUText(&patternText, 0, 0, &status);
1416     if (U_FAILURE(status)) {
1417          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1418          utext_close(&patternText);
1419          return;
1420     }
1421     uregex_close(re);
1422 
1423     /* Open with all flag values set */
1424     status = U_ZERO_ERROR;
1425     re = uregex_openUText(&patternText,
1426         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1427         0, &status);
1428     TEST_ASSERT_SUCCESS(status);
1429     uregex_close(re);
1430 
1431     /* Open with an invalid flag */
1432     status = U_ZERO_ERROR;
1433     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1434     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1435     uregex_close(re);
1436 
1437     /* open with an invalid parameter */
1438     status = U_ZERO_ERROR;
1439     re = uregex_openUText(NULL,
1440         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1441     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1442 
1443     /*
1444      *  clone
1445      */
1446     {
1447         URegularExpression *clone1;
1448         URegularExpression *clone2;
1449         URegularExpression *clone3;
1450         UChar  testString1[30];
1451         UChar  testString2[30];
1452         UBool  result;
1453 
1454 
1455         status = U_ZERO_ERROR;
1456         re = uregex_openUText(&patternText, 0, 0, &status);
1457         TEST_ASSERT_SUCCESS(status);
1458         clone1 = uregex_clone(re, &status);
1459         TEST_ASSERT_SUCCESS(status);
1460         TEST_ASSERT(clone1 != NULL);
1461 
1462         status = U_ZERO_ERROR;
1463         clone2 = uregex_clone(re, &status);
1464         TEST_ASSERT_SUCCESS(status);
1465         TEST_ASSERT(clone2 != NULL);
1466         uregex_close(re);
1467 
1468         status = U_ZERO_ERROR;
1469         clone3 = uregex_clone(clone2, &status);
1470         TEST_ASSERT_SUCCESS(status);
1471         TEST_ASSERT(clone3 != NULL);
1472 
1473         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1474         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1475 
1476         status = U_ZERO_ERROR;
1477         uregex_setText(clone1, testString1, -1, &status);
1478         TEST_ASSERT_SUCCESS(status);
1479         result = uregex_lookingAt(clone1, 0, &status);
1480         TEST_ASSERT_SUCCESS(status);
1481         TEST_ASSERT(result==TRUE);
1482 
1483         status = U_ZERO_ERROR;
1484         uregex_setText(clone2, testString2, -1, &status);
1485         TEST_ASSERT_SUCCESS(status);
1486         result = uregex_lookingAt(clone2, 0, &status);
1487         TEST_ASSERT_SUCCESS(status);
1488         TEST_ASSERT(result==FALSE);
1489         result = uregex_find(clone2, 0, &status);
1490         TEST_ASSERT_SUCCESS(status);
1491         TEST_ASSERT(result==TRUE);
1492 
1493         uregex_close(clone1);
1494         uregex_close(clone2);
1495         uregex_close(clone3);
1496 
1497     }
1498 
1499     /*
1500      *  pattern() and patternText()
1501      */
1502     {
1503         const UChar  *resultPat;
1504         int32_t       resultLen;
1505         UText        *resultText;
1506         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1507         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1508         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1509         status = U_ZERO_ERROR;
1510 
1511         utext_openUTF8(&patternText, str_hello, -1, &status);
1512         re = uregex_open(pat, -1, 0, NULL, &status);
1513         resultPat = uregex_pattern(re, &resultLen, &status);
1514         TEST_ASSERT_SUCCESS(status);
1515 
1516         /* The TEST_ASSERT_SUCCESS above should change too... */
1517         if (U_SUCCESS(status)) {
1518             TEST_ASSERT(resultLen == -1);
1519             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1520         }
1521 
1522         resultText = uregex_patternUText(re, &status);
1523         TEST_ASSERT_SUCCESS(status);
1524         TEST_ASSERT_UTEXT(str_hello, resultText);
1525 
1526         uregex_close(re);
1527 
1528         status = U_ZERO_ERROR;
1529         re = uregex_open(pat, 3, 0, NULL, &status);
1530         resultPat = uregex_pattern(re, &resultLen, &status);
1531         TEST_ASSERT_SUCCESS(status);
1532 
1533         /* The TEST_ASSERT_SUCCESS above should change too... */
1534         if (U_SUCCESS(status)) {
1535             TEST_ASSERT(resultLen == 3);
1536             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1537             TEST_ASSERT(u_strlen(resultPat) == 3);
1538         }
1539 
1540         resultText = uregex_patternUText(re, &status);
1541         TEST_ASSERT_SUCCESS(status);
1542         TEST_ASSERT_UTEXT(str_hel, resultText);
1543 
1544         uregex_close(re);
1545     }
1546 
1547     /*
1548      *  setUText() and lookingAt()
1549      */
1550     {
1551         UText  text1 = UTEXT_INITIALIZER;
1552         UText  text2 = UTEXT_INITIALIZER;
1553         UBool  result;
1554         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1555         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1556         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1557         status = U_ZERO_ERROR;
1558         utext_openUTF8(&text1, str_abcccd, -1, &status);
1559         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1560 
1561         utext_openUTF8(&patternText, str_abcd, -1, &status);
1562         re = uregex_openUText(&patternText, 0, NULL, &status);
1563         TEST_ASSERT_SUCCESS(status);
1564 
1565         /* Operation before doing a setText should fail... */
1566         status = U_ZERO_ERROR;
1567         uregex_lookingAt(re, 0, &status);
1568         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1569 
1570         status = U_ZERO_ERROR;
1571         uregex_setUText(re, &text1, &status);
1572         result = uregex_lookingAt(re, 0, &status);
1573         TEST_ASSERT(result == TRUE);
1574         TEST_ASSERT_SUCCESS(status);
1575 
1576         status = U_ZERO_ERROR;
1577         uregex_setUText(re, &text2, &status);
1578         result = uregex_lookingAt(re, 0, &status);
1579         TEST_ASSERT(result == FALSE);
1580         TEST_ASSERT_SUCCESS(status);
1581 
1582         status = U_ZERO_ERROR;
1583         uregex_setUText(re, &text1, &status);
1584         result = uregex_lookingAt(re, 0, &status);
1585         TEST_ASSERT(result == TRUE);
1586         TEST_ASSERT_SUCCESS(status);
1587 
1588         uregex_close(re);
1589         utext_close(&text1);
1590         utext_close(&text2);
1591     }
1592 
1593 
1594     /*
1595      *  getText() and getUText()
1596      */
1597     {
1598         UText  text1 = UTEXT_INITIALIZER;
1599         UText  text2 = UTEXT_INITIALIZER;
1600         UChar  text2Chars[20];
1601         UText  *resultText;
1602         const UChar   *result;
1603         int32_t  textLength;
1604         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1605         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1606         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1607 
1608 
1609         status = U_ZERO_ERROR;
1610         utext_openUTF8(&text1, str_abcccd, -1, &status);
1611         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1612         utext_openUChars(&text2, text2Chars, -1, &status);
1613 
1614         utext_openUTF8(&patternText, str_abcd, -1, &status);
1615         re = uregex_openUText(&patternText, 0, NULL, &status);
1616 
1617         /* First set a UText */
1618         uregex_setUText(re, &text1, &status);
1619         resultText = uregex_getUText(re, NULL, &status);
1620         TEST_ASSERT_SUCCESS(status);
1621         TEST_ASSERT(resultText != &text1);
1622         utext_setNativeIndex(resultText, 0);
1623         utext_setNativeIndex(&text1, 0);
1624         TEST_ASSERT(testUTextEqual(resultText, &text1));
1625         utext_close(resultText);
1626 
1627         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1628         (void)result;    /* Suppress set but not used warning. */
1629         TEST_ASSERT(textLength == -1 || textLength == 6);
1630         resultText = uregex_getUText(re, NULL, &status);
1631         TEST_ASSERT_SUCCESS(status);
1632         TEST_ASSERT(resultText != &text1);
1633         utext_setNativeIndex(resultText, 0);
1634         utext_setNativeIndex(&text1, 0);
1635         TEST_ASSERT(testUTextEqual(resultText, &text1));
1636         utext_close(resultText);
1637 
1638         /* Then set a UChar * */
1639         uregex_setText(re, text2Chars, 7, &status);
1640         resultText = uregex_getUText(re, NULL, &status);
1641         TEST_ASSERT_SUCCESS(status);
1642         utext_setNativeIndex(resultText, 0);
1643         utext_setNativeIndex(&text2, 0);
1644         TEST_ASSERT(testUTextEqual(resultText, &text2));
1645         utext_close(resultText);
1646         result = uregex_getText(re, &textLength, &status);
1647         TEST_ASSERT(textLength == 7);
1648 
1649         uregex_close(re);
1650         utext_close(&text1);
1651         utext_close(&text2);
1652     }
1653 
1654     /*
1655      *  matches()
1656      */
1657     {
1658         UText   text1 = UTEXT_INITIALIZER;
1659         UBool   result;
1660         UText   nullText = UTEXT_INITIALIZER;
1661         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1662         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1663 
1664         status = U_ZERO_ERROR;
1665         utext_openUTF8(&text1, str_abcccde, -1, &status);
1666         utext_openUTF8(&patternText, str_abcd, -1, &status);
1667         re = uregex_openUText(&patternText, 0, NULL, &status);
1668 
1669         uregex_setUText(re, &text1, &status);
1670         result = uregex_matches(re, 0, &status);
1671         TEST_ASSERT(result == FALSE);
1672         TEST_ASSERT_SUCCESS(status);
1673         uregex_close(re);
1674 
1675         status = U_ZERO_ERROR;
1676         re = uregex_openC(".?", 0, NULL, &status);
1677         uregex_setUText(re, &text1, &status);
1678         result = uregex_matches(re, 7, &status);
1679         TEST_ASSERT(result == TRUE);
1680         TEST_ASSERT_SUCCESS(status);
1681 
1682         status = U_ZERO_ERROR;
1683         utext_openUTF8(&nullText, "", -1, &status);
1684         uregex_setUText(re, &nullText, &status);
1685         TEST_ASSERT_SUCCESS(status);
1686         result = uregex_matches(re, 0, &status);
1687         TEST_ASSERT(result == TRUE);
1688         TEST_ASSERT_SUCCESS(status);
1689 
1690         uregex_close(re);
1691         utext_close(&text1);
1692         utext_close(&nullText);
1693     }
1694 
1695 
1696     /*
1697      *  lookingAt()    Used in setText test.
1698      */
1699 
1700 
1701     /*
1702      *  find(), findNext, start, end, reset
1703      */
1704     {
1705         UChar    text1[50];
1706         UBool    result;
1707         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1708         status = U_ZERO_ERROR;
1709         re = uregex_openC("rx", 0, NULL, &status);
1710 
1711         uregex_setText(re, text1, -1, &status);
1712         result = uregex_find(re, 0, &status);
1713         TEST_ASSERT(result == TRUE);
1714         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1715         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1716         TEST_ASSERT_SUCCESS(status);
1717 
1718         result = uregex_find(re, 9, &status);
1719         TEST_ASSERT(result == TRUE);
1720         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1721         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1722         TEST_ASSERT_SUCCESS(status);
1723 
1724         result = uregex_find(re, 14, &status);
1725         TEST_ASSERT(result == FALSE);
1726         TEST_ASSERT_SUCCESS(status);
1727 
1728         status = U_ZERO_ERROR;
1729         uregex_reset(re, 0, &status);
1730 
1731         result = uregex_findNext(re, &status);
1732         TEST_ASSERT(result == TRUE);
1733         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1734         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1735         TEST_ASSERT_SUCCESS(status);
1736 
1737         result = uregex_findNext(re, &status);
1738         TEST_ASSERT(result == TRUE);
1739         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1740         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1741         TEST_ASSERT_SUCCESS(status);
1742 
1743         status = U_ZERO_ERROR;
1744         uregex_reset(re, 12, &status);
1745 
1746         result = uregex_findNext(re, &status);
1747         TEST_ASSERT(result == TRUE);
1748         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1749         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1750         TEST_ASSERT_SUCCESS(status);
1751 
1752         result = uregex_findNext(re, &status);
1753         TEST_ASSERT(result == FALSE);
1754         TEST_ASSERT_SUCCESS(status);
1755 
1756         uregex_close(re);
1757     }
1758 
1759     /*
1760      *  groupUText()
1761      */
1762     {
1763         UChar    text1[80];
1764         UText   *actual;
1765         UBool    result;
1766         int64_t  groupLen = 0;
1767         UChar    groupBuf[20];
1768 
1769         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1770 
1771         status = U_ZERO_ERROR;
1772         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1773         TEST_ASSERT_SUCCESS(status);
1774 
1775         uregex_setText(re, text1, -1, &status);
1776         result = uregex_find(re, 0, &status);
1777         TEST_ASSERT(result==TRUE);
1778 
1779         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1780         status = U_ZERO_ERROR;
1781         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1782         TEST_ASSERT_SUCCESS(status);
1783 
1784         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1785         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1786         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1787 
1788         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1789         utext_close(actual);
1790 
1791         /*  Capture group #1.  Should succeed. */
1792         status = U_ZERO_ERROR;
1793 
1794         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1795         TEST_ASSERT_SUCCESS(status);
1796         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1797                                                            /*    (within the string text1)           */
1798         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1799         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1800         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1801 
1802         utext_close(actual);
1803 
1804         /*  Capture group out of range.  Error. */
1805         status = U_ZERO_ERROR;
1806         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1807         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1808         utext_close(actual);
1809 
1810         uregex_close(re);
1811     }
1812 
1813     /*
1814      *  replaceFirst()
1815      */
1816     {
1817         UChar    text1[80];
1818         UChar    text2[80];
1819         UText    replText = UTEXT_INITIALIZER;
1820         UText   *result;
1821         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1822         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1823         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1824                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1825         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1826         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1827         status = U_ZERO_ERROR;
1828         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1829         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1830         utext_openUTF8(&replText, str_1x, -1, &status);
1831 
1832         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1833         TEST_ASSERT_SUCCESS(status);
1834 
1835         /*  Normal case, with match */
1836         uregex_setText(re, text1, -1, &status);
1837         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1838         TEST_ASSERT_SUCCESS(status);
1839         TEST_ASSERT_UTEXT(str_Replxxx, result);
1840         utext_close(result);
1841 
1842         /* No match.  Text should copy to output with no changes.  */
1843         uregex_setText(re, text2, -1, &status);
1844         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1845         TEST_ASSERT_SUCCESS(status);
1846         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1847         utext_close(result);
1848 
1849         /* Unicode escapes */
1850         uregex_setText(re, text1, -1, &status);
1851         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1852         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853         TEST_ASSERT_SUCCESS(status);
1854         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1855         utext_close(result);
1856 
1857         uregex_close(re);
1858         utext_close(&replText);
1859     }
1860 
1861 
1862     /*
1863      *  replaceAll()
1864      */
1865     {
1866         UChar    text1[80];
1867         UChar    text2[80];
1868         UText    replText = UTEXT_INITIALIZER;
1869         UText   *result;
1870         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1871         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1872         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1873         status = U_ZERO_ERROR;
1874         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1875         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1876         utext_openUTF8(&replText, str_1, -1, &status);
1877 
1878         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1879         TEST_ASSERT_SUCCESS(status);
1880 
1881         /*  Normal case, with match */
1882         uregex_setText(re, text1, -1, &status);
1883         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1884         TEST_ASSERT_SUCCESS(status);
1885         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1886         utext_close(result);
1887 
1888         /* No match.  Text should copy to output with no changes.  */
1889         uregex_setText(re, text2, -1, &status);
1890         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1891         TEST_ASSERT_SUCCESS(status);
1892         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1893         utext_close(result);
1894 
1895         uregex_close(re);
1896         utext_close(&replText);
1897     }
1898 
1899 
1900     /*
1901      *  appendReplacement()
1902      */
1903     {
1904         UChar    text[100];
1905         UChar    repl[100];
1906         UChar    buf[100];
1907         UChar   *bufPtr;
1908         int32_t  bufCap;
1909 
1910         status = U_ZERO_ERROR;
1911         re = uregex_openC(".*", 0, 0, &status);
1912         TEST_ASSERT_SUCCESS(status);
1913 
1914         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1915         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1916         uregex_setText(re, text, -1, &status);
1917 
1918         /* match covers whole target string */
1919         uregex_find(re, 0, &status);
1920         TEST_ASSERT_SUCCESS(status);
1921         bufPtr = buf;
1922         bufCap = UPRV_LENGTHOF(buf);
1923         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1924         TEST_ASSERT_SUCCESS(status);
1925         TEST_ASSERT_STRING("some other", buf, TRUE);
1926 
1927         /* Match has \u \U escapes */
1928         uregex_find(re, 0, &status);
1929         TEST_ASSERT_SUCCESS(status);
1930         bufPtr = buf;
1931         bufCap = UPRV_LENGTHOF(buf);
1932         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1933         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1934         TEST_ASSERT_SUCCESS(status);
1935         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1936 
1937         uregex_close(re);
1938     }
1939 
1940 
1941     /*
1942      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1943      */
1944 
1945     /*
1946      *  splitUText()
1947      */
1948     {
1949         UChar    textToSplit[80];
1950         UChar    text2[80];
1951         UText    *fields[10];
1952         int32_t  numFields;
1953         int32_t i;
1954 
1955         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1956         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1957 
1958         status = U_ZERO_ERROR;
1959         re = uregex_openC(":", 0, NULL, &status);
1960 
1961 
1962         /*  Simple split */
1963 
1964         uregex_setText(re, textToSplit, -1, &status);
1965         TEST_ASSERT_SUCCESS(status);
1966 
1967         /* The TEST_ASSERT_SUCCESS call above should change too... */
1968         if (U_SUCCESS(status)) {
1969             memset(fields, 0, sizeof(fields));
1970             numFields = uregex_splitUText(re, fields, 10, &status);
1971             TEST_ASSERT_SUCCESS(status);
1972 
1973             /* The TEST_ASSERT_SUCCESS call above should change too... */
1974             if(U_SUCCESS(status)) {
1975               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1976               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1977               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1978                 TEST_ASSERT(numFields == 3);
1979                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1980                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1981                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1982                 TEST_ASSERT(fields[3] == NULL);
1983             }
1984             for(i = 0; i < numFields; i++) {
1985                 utext_close(fields[i]);
1986             }
1987         }
1988 
1989         uregex_close(re);
1990 
1991 
1992         /*  Split with too few output strings available */
1993         status = U_ZERO_ERROR;
1994         re = uregex_openC(":", 0, NULL, &status);
1995         uregex_setText(re, textToSplit, -1, &status);
1996         TEST_ASSERT_SUCCESS(status);
1997 
1998         /* The TEST_ASSERT_SUCCESS call above should change too... */
1999         if(U_SUCCESS(status)) {
2000             fields[0] = NULL;
2001             fields[1] = NULL;
2002             fields[2] = &patternText;
2003             numFields = uregex_splitUText(re, fields, 2, &status);
2004             TEST_ASSERT_SUCCESS(status);
2005 
2006             /* The TEST_ASSERT_SUCCESS call above should change too... */
2007             if(U_SUCCESS(status)) {
2008                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2009                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2010                 TEST_ASSERT(numFields == 2);
2011                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2012                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2013                 TEST_ASSERT(fields[2] == &patternText);
2014             }
2015             for(i = 0; i < numFields; i++) {
2016                 utext_close(fields[i]);
2017             }
2018         }
2019 
2020         uregex_close(re);
2021     }
2022 
2023     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2024      *                   comes out as additional fields.  */
2025     {
2026         UChar    textToSplit[80];
2027         UText    *fields[10];
2028         int32_t  numFields;
2029         int32_t i;
2030 
2031         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2032 
2033         status = U_ZERO_ERROR;
2034         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2035 
2036         uregex_setText(re, textToSplit, -1, &status);
2037         TEST_ASSERT_SUCCESS(status);
2038 
2039         /* The TEST_ASSERT_SUCCESS call above should change too... */
2040         if(U_SUCCESS(status)) {
2041             memset(fields, 0, sizeof(fields));
2042             numFields = uregex_splitUText(re, fields, 10, &status);
2043             TEST_ASSERT_SUCCESS(status);
2044 
2045             /* The TEST_ASSERT_SUCCESS call above should change too... */
2046             if(U_SUCCESS(status)) {
2047                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2048                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2049                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2050                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2051                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2052 
2053                 TEST_ASSERT(numFields == 5);
2054                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2055                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2056                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2057                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2058                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2059                 TEST_ASSERT(fields[5] == NULL);
2060             }
2061             for(i = 0; i < numFields; i++) {
2062                 utext_close(fields[i]);
2063             }
2064         }
2065 
2066         /*  Split with too few output strings available (2) */
2067         status = U_ZERO_ERROR;
2068         fields[0] = NULL;
2069         fields[1] = NULL;
2070         fields[2] = &patternText;
2071         numFields = uregex_splitUText(re, fields, 2, &status);
2072         TEST_ASSERT_SUCCESS(status);
2073 
2074         /* The TEST_ASSERT_SUCCESS call above should change too... */
2075         if(U_SUCCESS(status)) {
2076             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2077             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2078             TEST_ASSERT(numFields == 2);
2079             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2080             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2081             TEST_ASSERT(fields[2] == &patternText);
2082         }
2083         for(i = 0; i < numFields; i++) {
2084             utext_close(fields[i]);
2085         }
2086 
2087 
2088         /*  Split with too few output strings available (3) */
2089         status = U_ZERO_ERROR;
2090         fields[0] = NULL;
2091         fields[1] = NULL;
2092         fields[2] = NULL;
2093         fields[3] = &patternText;
2094         numFields = uregex_splitUText(re, fields, 3, &status);
2095         TEST_ASSERT_SUCCESS(status);
2096 
2097         /* The TEST_ASSERT_SUCCESS call above should change too... */
2098         if(U_SUCCESS(status)) {
2099             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2100             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2101             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2102             TEST_ASSERT(numFields == 3);
2103             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2104             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2105             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2106             TEST_ASSERT(fields[3] == &patternText);
2107         }
2108         for(i = 0; i < numFields; i++) {
2109             utext_close(fields[i]);
2110         }
2111 
2112         /*  Split with just enough output strings available (5) */
2113         status = U_ZERO_ERROR;
2114         fields[0] = NULL;
2115         fields[1] = NULL;
2116         fields[2] = NULL;
2117         fields[3] = NULL;
2118         fields[4] = NULL;
2119         fields[5] = &patternText;
2120         numFields = uregex_splitUText(re, fields, 5, &status);
2121         TEST_ASSERT_SUCCESS(status);
2122 
2123         /* The TEST_ASSERT_SUCCESS call above should change too... */
2124         if(U_SUCCESS(status)) {
2125             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2126             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2127             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2128             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2129             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2130 
2131             TEST_ASSERT(numFields == 5);
2132             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2133             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2134             TEST_ASSERT_UTEXT(str_second, fields[2]);
2135             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2136             TEST_ASSERT_UTEXT(str_third, fields[4]);
2137             TEST_ASSERT(fields[5] == &patternText);
2138         }
2139         for(i = 0; i < numFields; i++) {
2140             utext_close(fields[i]);
2141         }
2142 
2143         /* Split, end of text is a field delimiter.   */
2144         status = U_ZERO_ERROR;
2145         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2146         TEST_ASSERT_SUCCESS(status);
2147 
2148         /* The TEST_ASSERT_SUCCESS call above should change too... */
2149         if(U_SUCCESS(status)) {
2150             memset(fields, 0, sizeof(fields));
2151             fields[9] = &patternText;
2152             numFields = uregex_splitUText(re, fields, 9, &status);
2153             TEST_ASSERT_SUCCESS(status);
2154 
2155             /* The TEST_ASSERT_SUCCESS call above should change too... */
2156             if(U_SUCCESS(status)) {
2157                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2158                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2159                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2160                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2161                 const char str_empty[] = { 0x00 };
2162 
2163                 TEST_ASSERT(numFields == 5);
2164                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2165                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2166                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2167                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2168                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2169                 TEST_ASSERT(fields[5] == NULL);
2170                 TEST_ASSERT(fields[8] == NULL);
2171                 TEST_ASSERT(fields[9] == &patternText);
2172             }
2173             for(i = 0; i < numFields; i++) {
2174                 utext_close(fields[i]);
2175             }
2176         }
2177 
2178         uregex_close(re);
2179     }
2180     utext_close(&patternText);
2181 }
2182 
2183 
TestRefreshInput(void)2184 static void TestRefreshInput(void) {
2185     /*
2186      *  RefreshInput changes out the input of a URegularExpression without
2187      *    changing anything else in the match state.  Used with Java JNI,
2188      *    when Java moves the underlying string storage.   This test
2189      *    runs a find() loop, moving the text after the first match.
2190      *    The right number of matches should still be found.
2191      */
2192     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2193     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2194     UErrorCode status = U_ZERO_ERROR;
2195     URegularExpression *re;
2196     UText ut1 = UTEXT_INITIALIZER;
2197     UText ut2 = UTEXT_INITIALIZER;
2198 
2199     re = uregex_openC("[ABC]", 0, 0, &status);
2200     TEST_ASSERT_SUCCESS(status);
2201 
2202     utext_openUChars(&ut1, testStr, -1, &status);
2203     TEST_ASSERT_SUCCESS(status);
2204     uregex_setUText(re, &ut1, &status);
2205     TEST_ASSERT_SUCCESS(status);
2206 
2207     /* Find the first match "A" in the original string */
2208     TEST_ASSERT(uregex_findNext(re, &status));
2209     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2210 
2211     /* Move the string, kill the original string.  */
2212     u_strcpy(movedStr, testStr);
2213     u_memset(testStr, 0, u_strlen(testStr));
2214     utext_openUChars(&ut2, movedStr, -1, &status);
2215     TEST_ASSERT_SUCCESS(status);
2216     uregex_refreshUText(re, &ut2, &status);
2217     TEST_ASSERT_SUCCESS(status);
2218 
2219     /* Find the following two matches, now working in the moved string. */
2220     TEST_ASSERT(uregex_findNext(re, &status));
2221     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2222     TEST_ASSERT(uregex_findNext(re, &status));
2223     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2224     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2225 
2226     uregex_close(re);
2227 }
2228 
2229 
TestBug8421(void)2230 static void TestBug8421(void) {
2231     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2232      *             was failing.
2233      */
2234     URegularExpression *re;
2235     UErrorCode status = U_ZERO_ERROR;
2236     int32_t  limit = -1;
2237 
2238     re = uregex_openC("abc", 0, 0, &status);
2239     TEST_ASSERT_SUCCESS(status);
2240 
2241     limit = uregex_getTimeLimit(re, &status);
2242     TEST_ASSERT_SUCCESS(status);
2243     TEST_ASSERT(limit == 0);
2244 
2245     uregex_setTimeLimit(re, 100, &status);
2246     TEST_ASSERT_SUCCESS(status);
2247     limit = uregex_getTimeLimit(re, &status);
2248     TEST_ASSERT_SUCCESS(status);
2249     TEST_ASSERT(limit == 100);
2250 
2251     uregex_close(re);
2252 }
2253 
FindCallback(const void * context,int64_t matchIndex)2254 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2255     return FALSE;
2256 }
2257 
MatchCallback(const void * context,int32_t steps)2258 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2259     return FALSE;
2260 }
2261 
TestBug10815()2262 static void TestBug10815() {
2263   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2264    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2265    */
2266     URegularExpression *re;
2267     UErrorCode status = U_ZERO_ERROR;
2268     UChar    text[100];
2269 
2270 
2271     // findNext() with a find progress callback function.
2272 
2273     re = uregex_openC(".z", 0, 0, &status);
2274     TEST_ASSERT_SUCCESS(status);
2275 
2276     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2277     uregex_setText(re, text, -1, &status);
2278     TEST_ASSERT_SUCCESS(status);
2279 
2280     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2281     TEST_ASSERT_SUCCESS(status);
2282 
2283     uregex_findNext(re, &status);
2284     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2285 
2286     uregex_close(re);
2287 
2288     // findNext() with a match progress callback function.
2289 
2290     status = U_ZERO_ERROR;
2291     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2292     TEST_ASSERT_SUCCESS(status);
2293 
2294     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2295     // it will appear to be stuck in a (near) infinite loop.
2296     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2297     uregex_setText(re, text, -1, &status);
2298     TEST_ASSERT_SUCCESS(status);
2299 
2300     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2301     TEST_ASSERT_SUCCESS(status);
2302 
2303     uregex_findNext(re, &status);
2304     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2305 
2306     uregex_close(re);
2307 }
2308 
2309 
2310 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2311