• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2004-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
16 *   try to test the full functionality.  It just calls each function and verifies that it
17 *   works on a basic level.
18 *
19 *   More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25 
26 #include <stdbool.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include "unicode/uloc.h"
30 #include "unicode/uregex.h"
31 #include "unicode/ustring.h"
32 #include "unicode/utext.h"
33 #include "unicode/utf8.h"
34 #include "cintltst.h"
35 #include "cmemory.h"
36 
37 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
38     if (U_FAILURE(status)) { \
39         log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
40     } \
41 } UPRV_BLOCK_MACRO_END
42 
43 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
44     if ((expr)==false) { \
45         log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
46     } \
47 } UPRV_BLOCK_MACRO_END
48 
49 /*
50  *   TEST_SETUP and TEST_TEARDOWN
51  *         macros to handle the boilerplate around setting up regex test cases.
52  *         parameteres to setup:
53  *              pattern:     The regex pattern, a (char *) null terminated C string.
54  *              testString:  The string data, also a (char *) C string.
55  *              flags:       Regex flags to set when compiling the pattern
56  *
57  *         Put arbitrary test code between SETUP and TEARDOWN.
58  *         're" is the compiled, ready-to-go  regular expression.
59  */
60 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
61     UChar   *srcString = NULL;  \
62     status = U_ZERO_ERROR; \
63     re = uregex_openC(pattern, flags, NULL, &status);  \
64     TEST_ASSERT_SUCCESS(status);   \
65     int32_t testStringLen = (int32_t)strlen(testString); \
66     srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
67     u_uastrncpy(srcString, testString, testStringLen + 1); \
68     uregex_setText(re, srcString, -1, &status); \
69     TEST_ASSERT_SUCCESS(status);  \
70     if (U_SUCCESS(status)) { \
71         UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
72 
73 #define TEST_TEARDOWN  \
74     }  \
75     TEST_ASSERT_SUCCESS(status);  \
76     uregex_close(re);  \
77     free(srcString);   \
78 } UPRV_BLOCK_MACRO_END
79 
80 
81 /**
82  * @param expected utf-8 array of bytes to be expected
83  */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)84 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
85      char     buf_inside_macro[120];
86      int32_t  len = (int32_t)strlen(expected);
87      UBool    success;
88      if (nulTerm) {
89          u_austrncpy(buf_inside_macro, (actual), len+1);
90          buf_inside_macro[len+2] = 0;
91          success = (strcmp((expected), buf_inside_macro) == 0);
92      } else {
93          u_austrncpy(buf_inside_macro, (actual), len);
94          buf_inside_macro[len+1] = 0;
95          success = (strncmp((expected), buf_inside_macro, len) == 0);
96      }
97      if (success == false) {
98          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
99              file, line, (expected), buf_inside_macro);
100      }
101 }
102 
103 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
104 
105 
equals_utf8_utext(const char * utf8,UText * utext)106 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
107     int32_t u8i = 0;
108     UChar32 u8c = 0;
109     UChar32 utc = 0;
110     UBool   stringsEqual = true;
111     utext_setNativeIndex(utext, 0);
112     for (;;) {
113         U8_NEXT_UNSAFE(utf8, u8i, u8c);
114         utc = utext_next32(utext);
115         if (u8c == 0 && utc == U_SENTINEL) {
116             break;
117         }
118         if (u8c != utc || u8c == 0) {
119             stringsEqual = false;
120             break;
121         }
122     }
123     return stringsEqual;
124 }
125 
126 
test_assert_utext(const char * expected,UText * actual,const char * file,int line)127 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
128     utext_setNativeIndex(actual, 0);
129     if (!equals_utf8_utext(expected, actual)) {
130         UChar32 c;
131         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
132         c = utext_next32From(actual, 0);
133         while (c != U_SENTINEL) {
134             if (0x20<c && c <0x7e) {
135                 log_err("%c", c);
136             } else {
137                 log_err("%#x", c);
138             }
139             c = UTEXT_NEXT32(actual);
140         }
141         log_err("\"\n");
142     }
143 }
144 
145 /*
146  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
147  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
148  */
149 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
150 
testUTextEqual(UText * uta,UText * utb)151 static UBool testUTextEqual(UText *uta, UText *utb) {
152     UChar32 ca = 0;
153     UChar32 cb = 0;
154     utext_setNativeIndex(uta, 0);
155     utext_setNativeIndex(utb, 0);
156     do {
157         ca = utext_next32(uta);
158         cb = utext_next32(utb);
159         if (ca != cb) {
160             break;
161         }
162     } while (ca != U_SENTINEL);
163     return ca == cb;
164 }
165 
166 
167 
168 
169 static void TestRegexCAPI(void);
170 static void TestBug4315(void);
171 static void TestUTextAPI(void);
172 static void TestRefreshInput(void);
173 static void TestBug8421(void);
174 static void TestBug10815(void);
175 
176 void addURegexTest(TestNode** root);
177 
addURegexTest(TestNode ** root)178 void addURegexTest(TestNode** root)
179 {
180     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
181     addTest(root, &TestBug4315,   "regex/TestBug4315");
182     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
183     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
184     addTest(root, &TestBug8421,   "regex/TestBug8421");
185     addTest(root, &TestBug10815,   "regex/TestBug10815");
186 }
187 
188 /*
189  * Call back function and context struct used for testing
190  *    regular expression user callbacks.  This test is mostly the same as
191  *   the corresponding C++ test in intltest.
192  */
193 typedef struct callBackContext {
194     int32_t          maxCalls;
195     int32_t          numCalls;
196     int32_t          lastSteps;
197 } callBackContext;
198 
199 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)200 TestCallbackFn(const void *context, int32_t steps) {
201   callBackContext  *info = (callBackContext *)context;
202   if (info->lastSteps+1 != steps) {
203       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
204   }
205   info->lastSteps = steps;
206   info->numCalls++;
207   return (info->numCalls < info->maxCalls);
208 }
209 
210 /*
211  *   Regular Expression C API Tests
212  */
TestRegexCAPI(void)213 static void TestRegexCAPI(void) {
214     UErrorCode           status = U_ZERO_ERROR;
215     URegularExpression  *re;
216     UChar                pat[200];
217     UChar               *minus1;
218 
219     memset(&minus1, -1, sizeof(minus1));
220 
221     /* Mimimalist open/close */
222     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
223     re = uregex_open(pat, -1, 0, 0, &status);
224     if (U_FAILURE(status)) {
225          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
226          return;
227     }
228     uregex_close(re);
229 
230     /* Open with all flag values set */
231     status = U_ZERO_ERROR;
232     re = uregex_open(pat, -1,
233         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
234         0, &status);
235     TEST_ASSERT_SUCCESS(status);
236     uregex_close(re);
237 
238     /* Open with an invalid flag */
239     status = U_ZERO_ERROR;
240     re = uregex_open(pat, -1, 0x40000000, 0, &status);
241     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
242     uregex_close(re);
243 
244     /* Open with an unimplemented flag */
245     status = U_ZERO_ERROR;
246     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
247     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
248     uregex_close(re);
249 
250     /* openC with an invalid parameter */
251     status = U_ZERO_ERROR;
252     re = uregex_openC(NULL,
253         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
254     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
255 
256     /* openC with an invalid parameter */
257     status = U_USELESS_COLLATOR_ERROR;
258     re = uregex_openC(NULL,
259         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
260     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
261 
262     /* openC   open from a C string */
263     {
264         const UChar   *p;
265         int32_t  len;
266         status = U_ZERO_ERROR;
267         re = uregex_openC("abc*", 0, 0, &status);
268         TEST_ASSERT_SUCCESS(status);
269         p = uregex_pattern(re, &len, &status);
270         TEST_ASSERT_SUCCESS(status);
271 
272         /* The TEST_ASSERT_SUCCESS above should change too... */
273         if(U_SUCCESS(status)) {
274             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
275             TEST_ASSERT(u_strcmp(pat, p) == 0);
276             TEST_ASSERT(len==(int32_t)strlen("abc*"));
277         }
278 
279         uregex_close(re);
280 
281         /*  TODO:  Open with ParseError parameter */
282     }
283 
284     /*
285      *  clone
286      */
287     {
288         URegularExpression *clone1;
289         URegularExpression *clone2;
290         URegularExpression *clone3;
291         UChar  testString1[30];
292         UChar  testString2[30];
293         UBool  result;
294 
295 
296         status = U_ZERO_ERROR;
297         re = uregex_openC("abc*", 0, 0, &status);
298         TEST_ASSERT_SUCCESS(status);
299         clone1 = uregex_clone(re, &status);
300         TEST_ASSERT_SUCCESS(status);
301         TEST_ASSERT(clone1 != NULL);
302 
303         status = U_ZERO_ERROR;
304         clone2 = uregex_clone(re, &status);
305         TEST_ASSERT_SUCCESS(status);
306         TEST_ASSERT(clone2 != NULL);
307         uregex_close(re);
308 
309         status = U_ZERO_ERROR;
310         clone3 = uregex_clone(clone2, &status);
311         TEST_ASSERT_SUCCESS(status);
312         TEST_ASSERT(clone3 != NULL);
313 
314         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
315         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
316 
317         status = U_ZERO_ERROR;
318         uregex_setText(clone1, testString1, -1, &status);
319         TEST_ASSERT_SUCCESS(status);
320         result = uregex_lookingAt(clone1, 0, &status);
321         TEST_ASSERT_SUCCESS(status);
322         TEST_ASSERT(result==true);
323 
324         status = U_ZERO_ERROR;
325         uregex_setText(clone2, testString2, -1, &status);
326         TEST_ASSERT_SUCCESS(status);
327         result = uregex_lookingAt(clone2, 0, &status);
328         TEST_ASSERT_SUCCESS(status);
329         TEST_ASSERT(result==false);
330         result = uregex_find(clone2, 0, &status);
331         TEST_ASSERT_SUCCESS(status);
332         TEST_ASSERT(result==true);
333 
334         uregex_close(clone1);
335         uregex_close(clone2);
336         uregex_close(clone3);
337 
338     }
339 
340     /*
341      *  pattern()
342     */
343     {
344         const UChar  *resultPat;
345         int32_t       resultLen;
346         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
347         status = U_ZERO_ERROR;
348         re = uregex_open(pat, -1, 0, NULL, &status);
349         resultPat = uregex_pattern(re, &resultLen, &status);
350         TEST_ASSERT_SUCCESS(status);
351 
352         /* The TEST_ASSERT_SUCCESS above should change too... */
353         if (U_SUCCESS(status)) {
354             TEST_ASSERT(resultLen == -1);
355             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
356         }
357 
358         uregex_close(re);
359 
360         status = U_ZERO_ERROR;
361         re = uregex_open(pat, 3, 0, NULL, &status);
362         resultPat = uregex_pattern(re, &resultLen, &status);
363         TEST_ASSERT_SUCCESS(status);
364         TEST_ASSERT_SUCCESS(status);
365 
366         /* The TEST_ASSERT_SUCCESS above should change too... */
367         if (U_SUCCESS(status)) {
368             TEST_ASSERT(resultLen == 3);
369             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
370             TEST_ASSERT(u_strlen(resultPat) == 3);
371         }
372 
373         uregex_close(re);
374     }
375 
376     /*
377      *  flags()
378      */
379     {
380         int32_t  t;
381 
382         status = U_ZERO_ERROR;
383         re = uregex_open(pat, -1, 0, NULL, &status);
384         t  = uregex_flags(re, &status);
385         TEST_ASSERT_SUCCESS(status);
386         TEST_ASSERT(t == 0);
387         uregex_close(re);
388 
389         status = U_ZERO_ERROR;
390         re = uregex_open(pat, -1, 0, NULL, &status);
391         t  = uregex_flags(re, &status);
392         TEST_ASSERT_SUCCESS(status);
393         TEST_ASSERT(t == 0);
394         uregex_close(re);
395 
396         status = U_ZERO_ERROR;
397         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
398         t  = uregex_flags(re, &status);
399         TEST_ASSERT_SUCCESS(status);
400         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
401         uregex_close(re);
402     }
403 
404     /*
405      *  setText() and lookingAt()
406      */
407     {
408         UChar  text1[50];
409         UChar  text2[50];
410         UBool  result;
411 
412         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
413         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
414         status = U_ZERO_ERROR;
415         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
416         re = uregex_open(pat, -1, 0, NULL, &status);
417         TEST_ASSERT_SUCCESS(status);
418 
419         /* Operation before doing a setText should fail... */
420         status = U_ZERO_ERROR;
421         uregex_lookingAt(re, 0, &status);
422         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
423 
424         status = U_ZERO_ERROR;
425         uregex_setText(re, text1, -1, &status);
426         result = uregex_lookingAt(re, 0, &status);
427         TEST_ASSERT(result == true);
428         TEST_ASSERT_SUCCESS(status);
429 
430         status = U_ZERO_ERROR;
431         uregex_setText(re, text2, -1, &status);
432         result = uregex_lookingAt(re, 0, &status);
433         TEST_ASSERT(result == false);
434         TEST_ASSERT_SUCCESS(status);
435 
436         status = U_ZERO_ERROR;
437         uregex_setText(re, text1, -1, &status);
438         result = uregex_lookingAt(re, 0, &status);
439         TEST_ASSERT(result == true);
440         TEST_ASSERT_SUCCESS(status);
441 
442         status = U_ZERO_ERROR;
443         uregex_setText(re, text1, 5, &status);
444         result = uregex_lookingAt(re, 0, &status);
445         TEST_ASSERT(result == false);
446         TEST_ASSERT_SUCCESS(status);
447 
448         status = U_ZERO_ERROR;
449         uregex_setText(re, text1, 6, &status);
450         result = uregex_lookingAt(re, 0, &status);
451         TEST_ASSERT(result == true);
452         TEST_ASSERT_SUCCESS(status);
453 
454         uregex_close(re);
455     }
456 
457 
458     /*
459      *  getText()
460      */
461     {
462         UChar    text1[50];
463         UChar    text2[50];
464         const UChar   *result;
465         int32_t  textLength;
466 
467         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
468         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
469         status = U_ZERO_ERROR;
470         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
471         re = uregex_open(pat, -1, 0, NULL, &status);
472 
473         uregex_setText(re, text1, -1, &status);
474         result = uregex_getText(re, &textLength, &status);
475         TEST_ASSERT(result == text1);
476         TEST_ASSERT(textLength == -1);
477         TEST_ASSERT_SUCCESS(status);
478 
479         status = U_ZERO_ERROR;
480         uregex_setText(re, text2, 7, &status);
481         result = uregex_getText(re, &textLength, &status);
482         TEST_ASSERT(result == text2);
483         TEST_ASSERT(textLength == 7);
484         TEST_ASSERT_SUCCESS(status);
485 
486         status = U_ZERO_ERROR;
487         uregex_setText(re, text2, 4, &status);
488         result = uregex_getText(re, &textLength, &status);
489         TEST_ASSERT(result == text2);
490         TEST_ASSERT(textLength == 4);
491         TEST_ASSERT_SUCCESS(status);
492         uregex_close(re);
493     }
494 
495     /*
496      *  matches()
497      */
498     {
499         UChar   text1[50];
500         UBool   result;
501         int     len;
502         UChar   nullString[] = {0,0,0};
503 
504         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
505         status = U_ZERO_ERROR;
506         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
507         re = uregex_open(pat, -1, 0, NULL, &status);
508 
509         uregex_setText(re, text1, -1, &status);
510         result = uregex_matches(re, 0, &status);
511         TEST_ASSERT(result == false);
512         TEST_ASSERT_SUCCESS(status);
513 
514         status = U_ZERO_ERROR;
515         uregex_setText(re, text1, 6, &status);
516         result = uregex_matches(re, 0, &status);
517         TEST_ASSERT(result == true);
518         TEST_ASSERT_SUCCESS(status);
519 
520         status = U_ZERO_ERROR;
521         uregex_setText(re, text1, 6, &status);
522         result = uregex_matches(re, 1, &status);
523         TEST_ASSERT(result == false);
524         TEST_ASSERT_SUCCESS(status);
525         uregex_close(re);
526 
527         status = U_ZERO_ERROR;
528         re = uregex_openC(".?", 0, NULL, &status);
529         uregex_setText(re, text1, -1, &status);
530         len = u_strlen(text1);
531         result = uregex_matches(re, len, &status);
532         TEST_ASSERT(result == true);
533         TEST_ASSERT_SUCCESS(status);
534 
535         status = U_ZERO_ERROR;
536         uregex_setText(re, nullString, -1, &status);
537         TEST_ASSERT_SUCCESS(status);
538         result = uregex_matches(re, 0, &status);
539         TEST_ASSERT(result == true);
540         TEST_ASSERT_SUCCESS(status);
541         uregex_close(re);
542     }
543 
544 
545     /*
546      *  lookingAt()    Used in setText test.
547      */
548 
549 
550     /*
551      *  find(), findNext, start, end, reset
552      */
553     {
554         UChar    text1[50];
555         UBool    result;
556         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
557         status = U_ZERO_ERROR;
558         re = uregex_openC("rx", 0, NULL, &status);
559 
560         uregex_setText(re, text1, -1, &status);
561         result = uregex_find(re, 0, &status);
562         TEST_ASSERT(result == true);
563         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
564         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
565         TEST_ASSERT_SUCCESS(status);
566 
567         result = uregex_find(re, 9, &status);
568         TEST_ASSERT(result == true);
569         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
570         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
571         TEST_ASSERT_SUCCESS(status);
572 
573         result = uregex_find(re, 14, &status);
574         TEST_ASSERT(result == false);
575         TEST_ASSERT_SUCCESS(status);
576 
577         status = U_ZERO_ERROR;
578         uregex_reset(re, 0, &status);
579 
580         result = uregex_findNext(re, &status);
581         TEST_ASSERT(result == true);
582         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
583         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
584         TEST_ASSERT_SUCCESS(status);
585 
586         result = uregex_findNext(re, &status);
587         TEST_ASSERT(result == true);
588         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
589         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
590         TEST_ASSERT_SUCCESS(status);
591 
592         status = U_ZERO_ERROR;
593         uregex_reset(re, 12, &status);
594 
595         result = uregex_findNext(re, &status);
596         TEST_ASSERT(result == true);
597         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
598         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
599         TEST_ASSERT_SUCCESS(status);
600 
601         result = uregex_findNext(re, &status);
602         TEST_ASSERT(result == false);
603         TEST_ASSERT_SUCCESS(status);
604 
605         uregex_close(re);
606     }
607 
608     /*
609      *  groupCount
610      */
611     {
612         int32_t result;
613 
614         status = U_ZERO_ERROR;
615         re = uregex_openC("abc", 0, NULL, &status);
616         result = uregex_groupCount(re, &status);
617         TEST_ASSERT_SUCCESS(status);
618         TEST_ASSERT(result == 0);
619         uregex_close(re);
620 
621         status = U_ZERO_ERROR;
622         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
623         result = uregex_groupCount(re, &status);
624         TEST_ASSERT_SUCCESS(status);
625         TEST_ASSERT(result == 3);
626         uregex_close(re);
627 
628     }
629 
630 
631     /*
632      *  group()
633      */
634     {
635         UChar    text1[80];
636         UChar    buf[80];
637         UBool    result;
638         int32_t  resultSz;
639         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
640 
641         status = U_ZERO_ERROR;
642         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
643         TEST_ASSERT_SUCCESS(status);
644 
645 
646         uregex_setText(re, text1, -1, &status);
647         result = uregex_find(re, 0, &status);
648         TEST_ASSERT(result==true);
649 
650         /*  Capture Group 0, the full match.  Should succeed.  */
651         status = U_ZERO_ERROR;
652         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
653         TEST_ASSERT_SUCCESS(status);
654         TEST_ASSERT_STRING("abc interior def", buf, true);
655         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
656 
657         /*  Capture group #1.  Should succeed. */
658         status = U_ZERO_ERROR;
659         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
660         TEST_ASSERT_SUCCESS(status);
661         TEST_ASSERT_STRING(" interior ", buf, true);
662         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
663 
664         /*  Capture group out of range.  Error. */
665         status = U_ZERO_ERROR;
666         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
667         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
668 
669         /* NULL buffer, pure pre-flight */
670         status = U_ZERO_ERROR;
671         resultSz = uregex_group(re, 0, NULL, 0, &status);
672         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
673         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
674 
675         /* Too small buffer, truncated string */
676         status = U_ZERO_ERROR;
677         memset(buf, -1, sizeof(buf));
678         resultSz = uregex_group(re, 0, buf, 5, &status);
679         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
680         TEST_ASSERT_STRING("abc i", buf, false);
681         TEST_ASSERT(buf[5] == (UChar)0xffff);
682         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
683 
684         /* Output string just fits buffer, no NUL term. */
685         status = U_ZERO_ERROR;
686         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
687         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
688         TEST_ASSERT_STRING("abc interior def", buf, false);
689         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
690         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
691 
692         uregex_close(re);
693 
694     }
695 
696     /*
697      *  Regions
698      */
699 
700 
701         /* SetRegion(), getRegion() do something  */
702         TEST_SETUP(".*", "0123456789ABCDEF", 0);
703         UChar resultString[40];
704         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
705         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
706         uregex_setRegion(re, 3, 6, &status);
707         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
708         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
709         TEST_ASSERT(uregex_findNext(re, &status));
710         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
711         TEST_ASSERT_STRING("345", resultString, true);
712         TEST_TEARDOWN;
713 
714         /* find(start=-1) uses regions   */
715         TEST_SETUP(".*", "0123456789ABCDEF", 0);
716         uregex_setRegion(re, 4, 6, &status);
717         TEST_ASSERT(uregex_find(re, -1, &status) == true);
718         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
719         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
720         TEST_TEARDOWN;
721 
722         /* find (start >=0) does not use regions   */
723         TEST_SETUP(".*", "0123456789ABCDEF", 0);
724         uregex_setRegion(re, 4, 6, &status);
725         TEST_ASSERT(uregex_find(re, 0, &status) == true);
726         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
727         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
728         TEST_TEARDOWN;
729 
730         /* findNext() obeys regions    */
731         TEST_SETUP(".", "0123456789ABCDEF", 0);
732         uregex_setRegion(re, 4, 6, &status);
733         TEST_ASSERT(uregex_findNext(re,&status) == true);
734         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
735         TEST_ASSERT(uregex_findNext(re, &status) == true);
736         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
737         TEST_ASSERT(uregex_findNext(re, &status) == false);
738         TEST_TEARDOWN;
739 
740         /* matches(start=-1) uses regions                                           */
741         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
742         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
743         uregex_setRegion(re, 4, 6, &status);
744         TEST_ASSERT(uregex_matches(re, -1, &status) == true);
745         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
746         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
747         TEST_TEARDOWN;
748 
749         /* matches (start >=0) does not use regions       */
750         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
751         uregex_setRegion(re, 4, 6, &status);
752         TEST_ASSERT(uregex_matches(re, 0, &status) == true);
753         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
754         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
755         TEST_TEARDOWN;
756 
757         /* lookingAt(start=-1) uses regions                                         */
758         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
759         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
760         uregex_setRegion(re, 4, 6, &status);
761         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == true);
762         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
763         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
764         TEST_TEARDOWN;
765 
766         /* lookingAt (start >=0) does not use regions  */
767         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
768         uregex_setRegion(re, 4, 6, &status);
769         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == true);
770         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
771         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
772         TEST_TEARDOWN;
773 
774         /* hitEnd()       */
775         TEST_SETUP("[a-f]*", "abcdefghij", 0);
776         TEST_ASSERT(uregex_find(re, 0, &status) == true);
777         TEST_ASSERT(uregex_hitEnd(re, &status) == false);
778         TEST_TEARDOWN;
779 
780         TEST_SETUP("[a-f]*", "abcdef", 0);
781         TEST_ASSERT(uregex_find(re, 0, &status) == true);
782         TEST_ASSERT(uregex_hitEnd(re, &status) == true);
783         TEST_TEARDOWN;
784 
785         /* requireEnd   */
786         TEST_SETUP("abcd", "abcd", 0);
787         TEST_ASSERT(uregex_find(re, 0, &status) == true);
788         TEST_ASSERT(uregex_requireEnd(re, &status) == false);
789         TEST_TEARDOWN;
790 
791         TEST_SETUP("abcd$", "abcd", 0);
792         TEST_ASSERT(uregex_find(re, 0, &status) == true);
793         TEST_ASSERT(uregex_requireEnd(re, &status) == true);
794         TEST_TEARDOWN;
795 
796         /* anchoringBounds        */
797         TEST_SETUP("abc$", "abcdef", 0);
798         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == true);
799         uregex_useAnchoringBounds(re, false, &status);
800         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == false);
801 
802         TEST_ASSERT(uregex_find(re, -1, &status) == false);
803         uregex_useAnchoringBounds(re, true, &status);
804         uregex_setRegion(re, 0, 3, &status);
805         TEST_ASSERT(uregex_find(re, -1, &status) == true);
806         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
807         TEST_TEARDOWN;
808 
809         /* Transparent Bounds      */
810         TEST_SETUP("abc(?=def)", "abcdef", 0);
811         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == false);
812         uregex_useTransparentBounds(re, true, &status);
813         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == true);
814 
815         uregex_useTransparentBounds(re, false, &status);
816         TEST_ASSERT(uregex_find(re, -1, &status) == true);    /* No Region */
817         uregex_setRegion(re, 0, 3, &status);
818         TEST_ASSERT(uregex_find(re, -1, &status) == false);   /* with region, opaque bounds */
819         uregex_useTransparentBounds(re, true, &status);
820         TEST_ASSERT(uregex_find(re, -1, &status) == true);    /* with region, transparent bounds */
821         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
822         TEST_TEARDOWN;
823 
824 
825     /*
826      *  replaceFirst()
827      */
828     {
829         UChar    text1[80];
830         UChar    text2[80];
831         UChar    replText[80];
832         UChar    buf[80];
833         int32_t  resultSz;
834         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
835         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
836         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
837 
838         status = U_ZERO_ERROR;
839         re = uregex_openC("x(.*?)x", 0, NULL, &status);
840         TEST_ASSERT_SUCCESS(status);
841 
842         /*  Normal case, with match */
843         uregex_setText(re, text1, -1, &status);
844         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
845         TEST_ASSERT_SUCCESS(status);
846         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, true);
847         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
848 
849         /* No match.  Text should copy to output with no changes.  */
850         status = U_ZERO_ERROR;
851         uregex_setText(re, text2, -1, &status);
852         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
853         TEST_ASSERT_SUCCESS(status);
854         TEST_ASSERT_STRING("No match here.", buf, true);
855         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
856 
857         /*  Match, output just fills buffer, no termination warning. */
858         status = U_ZERO_ERROR;
859         uregex_setText(re, text1, -1, &status);
860         memset(buf, -1, sizeof(buf));
861         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
862         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
863         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, false);
864         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
865         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
866 
867         /* Do the replaceFirst again, without first resetting anything.
868          *  Should give the same results.
869          */
870         status = U_ZERO_ERROR;
871         memset(buf, -1, sizeof(buf));
872         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
873         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
874         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, false);
875         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
876         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
877 
878         /* NULL buffer, zero buffer length */
879         status = U_ZERO_ERROR;
880         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
881         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
882         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
883 
884         /* Buffer too small by one */
885         status = U_ZERO_ERROR;
886         memset(buf, -1, sizeof(buf));
887         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
888         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
889         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, false);
890         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
891         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
892 
893         uregex_close(re);
894     }
895 
896 
897     /*
898      *  replaceAll()
899      */
900     {
901         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
902         UChar    text2[80];          /*  "No match Here"           */
903         UChar    replText[80];       /*  "<$1>"                    */
904         UChar    replText2[80];      /*  "<<$1>>"                  */
905         const char * pattern = "x(.*?)x";
906         const char * expectedResult = "Replace <aa> <1> <...>.";
907         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
908         UChar    buf[80];
909         int32_t  resultSize;
910         int32_t  expectedResultSize;
911         int32_t  expectedResultSize2;
912         int32_t  i;
913 
914         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
915         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
916         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
917         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
918         expectedResultSize = (int32_t)strlen(expectedResult);
919         expectedResultSize2 = (int32_t)strlen(expectedResult2);
920 
921         status = U_ZERO_ERROR;
922         re = uregex_openC(pattern, 0, NULL, &status);
923         TEST_ASSERT_SUCCESS(status);
924 
925         /*  Normal case, with match */
926         uregex_setText(re, text1, -1, &status);
927         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
928         TEST_ASSERT_SUCCESS(status);
929         TEST_ASSERT_STRING(expectedResult, buf, true);
930         TEST_ASSERT(resultSize == expectedResultSize);
931 
932         /* No match.  Text should copy to output with no changes.  */
933         status = U_ZERO_ERROR;
934         uregex_setText(re, text2, -1, &status);
935         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
936         TEST_ASSERT_SUCCESS(status);
937         TEST_ASSERT_STRING("No match here.", buf, true);
938         TEST_ASSERT(resultSize == u_strlen(text2));
939 
940         /*  Match, output just fills buffer, no termination warning. */
941         status = U_ZERO_ERROR;
942         uregex_setText(re, text1, -1, &status);
943         memset(buf, -1, sizeof(buf));
944         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
945         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
946         TEST_ASSERT_STRING(expectedResult, buf, false);
947         TEST_ASSERT(resultSize == expectedResultSize);
948         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
949 
950         /* Do the replaceFirst again, without first resetting anything.
951          *  Should give the same results.
952          */
953         status = U_ZERO_ERROR;
954         memset(buf, -1, sizeof(buf));
955         resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
956         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
957         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, false);
958         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
959         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
960 
961         /* NULL buffer, zero buffer length */
962         status = U_ZERO_ERROR;
963         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
964         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
965         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
966 
967         /* Buffer too small.  Try every size, which will tickle edge cases
968          * in uregex_appendReplacement (used by replaceAll)   */
969         for (i=0; i<expectedResultSize; i++) {
970             char  expected[80];
971             status = U_ZERO_ERROR;
972             memset(buf, -1, sizeof(buf));
973             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
974             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
975             strcpy(expected, expectedResult);
976             expected[i] = 0;
977             TEST_ASSERT_STRING(expected, buf, false);
978             TEST_ASSERT(resultSize == expectedResultSize);
979             TEST_ASSERT(buf[i] == (UChar)0xffff);
980         }
981 
982         /* Buffer too small.  Same as previous test, except this time the replacement
983          * text is longer than the match capture group, making the length of the complete
984          * replacement longer than the original string.
985          */
986         for (i=0; i<expectedResultSize2; i++) {
987             char  expected[80];
988             status = U_ZERO_ERROR;
989             memset(buf, -1, sizeof(buf));
990             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
991             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
992             strcpy(expected, expectedResult2);
993             expected[i] = 0;
994             TEST_ASSERT_STRING(expected, buf, false);
995             TEST_ASSERT(resultSize == expectedResultSize2);
996             TEST_ASSERT(buf[i] == (UChar)0xffff);
997         }
998 
999 
1000         uregex_close(re);
1001     }
1002 
1003 
1004     /*
1005      *  appendReplacement()
1006      */
1007     {
1008         UChar    text[100];
1009         UChar    repl[100];
1010         UChar    buf[100];
1011         UChar   *bufPtr;
1012         int32_t  bufCap;
1013 
1014 
1015         status = U_ZERO_ERROR;
1016         re = uregex_openC(".*", 0, 0, &status);
1017         TEST_ASSERT_SUCCESS(status);
1018 
1019         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1020         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1021         uregex_setText(re, text, -1, &status);
1022 
1023         /* match covers whole target string */
1024         uregex_find(re, 0, &status);
1025         TEST_ASSERT_SUCCESS(status);
1026         bufPtr = buf;
1027         bufCap = UPRV_LENGTHOF(buf);
1028         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1029         TEST_ASSERT_SUCCESS(status);
1030         TEST_ASSERT_STRING("some other", buf, true);
1031 
1032         /* Match has \u \U escapes */
1033         uregex_find(re, 0, &status);
1034         TEST_ASSERT_SUCCESS(status);
1035         bufPtr = buf;
1036         bufCap = UPRV_LENGTHOF(buf);
1037         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1038         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1039         TEST_ASSERT_SUCCESS(status);
1040         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, true);
1041 
1042         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1043         status = U_ZERO_ERROR;
1044         uregex_find(re, 0, &status);
1045         TEST_ASSERT_SUCCESS(status);
1046         bufPtr = buf;
1047         status = U_BUFFER_OVERFLOW_ERROR;
1048         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1049         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1050 
1051         uregex_close(re);
1052     }
1053 
1054 
1055     /*
1056      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1057      */
1058 
1059     /*
1060      *  split()
1061      */
1062     {
1063         UChar    textToSplit[80];
1064         UChar    text2[80];
1065         UChar    buf[200];
1066         UChar    *fields[10];
1067         int32_t  numFields;
1068         int32_t  requiredCapacity;
1069         int32_t  spaceNeeded;
1070         int32_t  sz;
1071 
1072         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1073         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1074 
1075         status = U_ZERO_ERROR;
1076         re = uregex_openC(":", 0, NULL, &status);
1077 
1078 
1079         /*  Simple split */
1080 
1081         uregex_setText(re, textToSplit, -1, &status);
1082         TEST_ASSERT_SUCCESS(status);
1083 
1084         /* The TEST_ASSERT_SUCCESS call above should change too... */
1085         if (U_SUCCESS(status)) {
1086             memset(fields, -1, sizeof(fields));
1087             numFields =
1088                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1089             TEST_ASSERT_SUCCESS(status);
1090 
1091             /* The TEST_ASSERT_SUCCESS call above should change too... */
1092             if(U_SUCCESS(status)) {
1093                 TEST_ASSERT(numFields == 3);
1094                 TEST_ASSERT_STRING("first ",  fields[0], true);
1095                 TEST_ASSERT_STRING(" second", fields[1], true);
1096                 TEST_ASSERT_STRING("  third", fields[2], true);
1097                 TEST_ASSERT(fields[3] == NULL);
1098 
1099                 spaceNeeded = u_strlen(textToSplit) -
1100                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1101                             numFields;          /* Each field gets a NUL terminator */
1102 
1103                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1104             }
1105         }
1106 
1107         uregex_close(re);
1108 
1109 
1110         /*  Split with too few output strings available */
1111         status = U_ZERO_ERROR;
1112         re = uregex_openC(":", 0, NULL, &status);
1113         uregex_setText(re, textToSplit, -1, &status);
1114         TEST_ASSERT_SUCCESS(status);
1115 
1116         /* The TEST_ASSERT_SUCCESS call above should change too... */
1117         if(U_SUCCESS(status)) {
1118             memset(fields, -1, sizeof(fields));
1119             numFields =
1120                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1121             TEST_ASSERT_SUCCESS(status);
1122 
1123             /* The TEST_ASSERT_SUCCESS call above should change too... */
1124             if(U_SUCCESS(status)) {
1125                 TEST_ASSERT(numFields == 2);
1126                 TEST_ASSERT_STRING("first ",  fields[0], true);
1127                 TEST_ASSERT_STRING(" second:  third", fields[1], true);
1128                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1129 
1130                 spaceNeeded = u_strlen(textToSplit) -
1131                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1132                             numFields;          /* Each field gets a NUL terminator */
1133 
1134                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1135 
1136                 /* Split with a range of output buffer sizes.  */
1137                 spaceNeeded = u_strlen(textToSplit) -
1138                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1139                     numFields;          /* Each field gets a NUL terminator */
1140 
1141                 for (sz=0; sz < spaceNeeded+1; sz++) {
1142                     memset(fields, -1, sizeof(fields));
1143                     status = U_ZERO_ERROR;
1144                     numFields =
1145                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1146                     if (sz >= spaceNeeded) {
1147                         TEST_ASSERT_SUCCESS(status);
1148                         TEST_ASSERT_STRING("first ",  fields[0], true);
1149                         TEST_ASSERT_STRING(" second", fields[1], true);
1150                         TEST_ASSERT_STRING("  third", fields[2], true);
1151                     } else {
1152                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1153                     }
1154                     TEST_ASSERT(numFields == 3);
1155                     TEST_ASSERT(fields[3] == NULL);
1156                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1157                 }
1158             }
1159         }
1160 
1161         uregex_close(re);
1162     }
1163 
1164 
1165 
1166 
1167     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1168      *                   comes out as additional fields.  */
1169     {
1170         UChar    textToSplit[80];
1171         UChar    buf[200];
1172         UChar    *fields[10];
1173         int32_t  numFields;
1174         int32_t  requiredCapacity;
1175         int32_t  spaceNeeded;
1176         int32_t  sz;
1177 
1178         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1179 
1180         status = U_ZERO_ERROR;
1181         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1182 
1183         uregex_setText(re, textToSplit, -1, &status);
1184         TEST_ASSERT_SUCCESS(status);
1185 
1186         /* The TEST_ASSERT_SUCCESS call above should change too... */
1187         if(U_SUCCESS(status)) {
1188             memset(fields, -1, sizeof(fields));
1189             numFields =
1190                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1191             TEST_ASSERT_SUCCESS(status);
1192 
1193             /* The TEST_ASSERT_SUCCESS call above should change too... */
1194             if(U_SUCCESS(status)) {
1195                 TEST_ASSERT(numFields == 5);
1196                 TEST_ASSERT_STRING("first ",  fields[0], true);
1197                 TEST_ASSERT_STRING("tag-a",   fields[1], true);
1198                 TEST_ASSERT_STRING(" second", fields[2], true);
1199                 TEST_ASSERT_STRING("tag-b",   fields[3], true);
1200                 TEST_ASSERT_STRING("  third", fields[4], true);
1201                 TEST_ASSERT(fields[5] == NULL);
1202                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1203                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1204             }
1205         }
1206 
1207         /*  Split with too few output strings available (2) */
1208         status = U_ZERO_ERROR;
1209         memset(fields, -1, sizeof(fields));
1210         numFields =
1211             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1212         TEST_ASSERT_SUCCESS(status);
1213 
1214         /* The TEST_ASSERT_SUCCESS call above should change too... */
1215         if(U_SUCCESS(status)) {
1216             TEST_ASSERT(numFields == 2);
1217             TEST_ASSERT_STRING("first ",  fields[0], true);
1218             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], true);
1219             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1220 
1221             spaceNeeded = (int32_t)strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1222             TEST_ASSERT(spaceNeeded == requiredCapacity);
1223         }
1224 
1225         /*  Split with too few output strings available (3) */
1226         status = U_ZERO_ERROR;
1227         memset(fields, -1, sizeof(fields));
1228         numFields =
1229             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1230         TEST_ASSERT_SUCCESS(status);
1231 
1232         /* The TEST_ASSERT_SUCCESS call above should change too... */
1233         if(U_SUCCESS(status)) {
1234             TEST_ASSERT(numFields == 3);
1235             TEST_ASSERT_STRING("first ",  fields[0], true);
1236             TEST_ASSERT_STRING("tag-a",   fields[1], true);
1237             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], true);
1238             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1239 
1240             spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1241             TEST_ASSERT(spaceNeeded == requiredCapacity);
1242         }
1243 
1244         /*  Split with just enough output strings available (5) */
1245         status = U_ZERO_ERROR;
1246         memset(fields, -1, sizeof(fields));
1247         numFields =
1248             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1249         TEST_ASSERT_SUCCESS(status);
1250 
1251         /* The TEST_ASSERT_SUCCESS call above should change too... */
1252         if(U_SUCCESS(status)) {
1253             TEST_ASSERT(numFields == 5);
1254             TEST_ASSERT_STRING("first ",  fields[0], true);
1255             TEST_ASSERT_STRING("tag-a",   fields[1], true);
1256             TEST_ASSERT_STRING(" second", fields[2], true);
1257             TEST_ASSERT_STRING("tag-b",   fields[3], true);
1258             TEST_ASSERT_STRING("  third", fields[4], true);
1259             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1260 
1261             spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1262             TEST_ASSERT(spaceNeeded == requiredCapacity);
1263         }
1264 
1265         /* Split, end of text is a field delimiter.   */
1266         status = U_ZERO_ERROR;
1267         sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1268         uregex_setText(re, textToSplit, sz, &status);
1269         TEST_ASSERT_SUCCESS(status);
1270 
1271         /* The TEST_ASSERT_SUCCESS call above should change too... */
1272         if(U_SUCCESS(status)) {
1273             memset(fields, -1, sizeof(fields));
1274             numFields =
1275                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1276             TEST_ASSERT_SUCCESS(status);
1277 
1278             /* The TEST_ASSERT_SUCCESS call above should change too... */
1279             if(U_SUCCESS(status)) {
1280                 TEST_ASSERT(numFields == 5);
1281                 TEST_ASSERT_STRING("first ",  fields[0], true);
1282                 TEST_ASSERT_STRING("tag-a",   fields[1], true);
1283                 TEST_ASSERT_STRING(" second", fields[2], true);
1284                 TEST_ASSERT_STRING("tag-b",   fields[3], true);
1285                 TEST_ASSERT_STRING("",        fields[4], true);
1286                 TEST_ASSERT(fields[5] == NULL);
1287                 TEST_ASSERT(fields[8] == NULL);
1288                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1289                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1290                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1291             }
1292         }
1293 
1294         uregex_close(re);
1295     }
1296 
1297     /*
1298      * set/getTimeLimit
1299      */
1300      TEST_SETUP("abc$", "abcdef", 0);
1301      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1302      uregex_setTimeLimit(re, 1000, &status);
1303      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1304      TEST_ASSERT_SUCCESS(status);
1305      uregex_setTimeLimit(re, -1, &status);
1306      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1307      status = U_ZERO_ERROR;
1308      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1309      TEST_TEARDOWN;
1310 
1311      /*
1312       * set/get Stack Limit
1313       */
1314      TEST_SETUP("abc$", "abcdef", 0);
1315      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1316      uregex_setStackLimit(re, 40000, &status);
1317      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1318      TEST_ASSERT_SUCCESS(status);
1319      uregex_setStackLimit(re, -1, &status);
1320      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1321      status = U_ZERO_ERROR;
1322      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1323      TEST_TEARDOWN;
1324 
1325 
1326      /*
1327       * Get/Set callback functions
1328       *     This test is copied from intltest regex/Callbacks
1329       *     The pattern and test data will run long enough to cause the callback
1330       *       to be invoked.  The nested '+' operators give exponential time
1331       *       behavior with increasing string length.
1332       */
1333      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1334      callBackContext cbInfo = {4, 0, 0};
1335      const void     *pContext   = &cbInfo;
1336      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1337 
1338      /*  Getting the callback fn when it hasn't been set must return NULL  */
1339      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1340      TEST_ASSERT_SUCCESS(status);
1341      TEST_ASSERT(returnedFn == NULL);
1342      TEST_ASSERT(pContext == NULL);
1343 
1344      /* Set thecallback and do a match.                                   */
1345      /* The callback function should record that it has been called.      */
1346      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1347      TEST_ASSERT_SUCCESS(status);
1348      TEST_ASSERT(cbInfo.numCalls == 0);
1349      TEST_ASSERT(uregex_matches(re, -1, &status) == false);
1350      TEST_ASSERT_SUCCESS(status);
1351      TEST_ASSERT(cbInfo.numCalls > 0);
1352 
1353      /* Getting the callback should return the values that were set above.  */
1354      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1355      TEST_ASSERT(returnedFn == &TestCallbackFn);
1356      TEST_ASSERT(pContext == &cbInfo);
1357 
1358      TEST_TEARDOWN;
1359 }
1360 
1361 
1362 
TestBug4315(void)1363 static void TestBug4315(void) {
1364     UErrorCode      theICUError = U_ZERO_ERROR;
1365     URegularExpression *theRegEx;
1366     UChar           *textBuff;
1367     const char      *thePattern;
1368     UChar            theString[100];
1369     UChar           *destFields[24];
1370     int32_t         neededLength1;
1371     int32_t         neededLength2;
1372 
1373     int32_t         wordCount = 0;
1374     int32_t         destFieldsSize = 24;
1375 
1376     thePattern  = "ck ";
1377     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1378 
1379     /* open a regex */
1380     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1381     TEST_ASSERT_SUCCESS(theICUError);
1382 
1383     /* set the input string */
1384     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1385     TEST_ASSERT_SUCCESS(theICUError);
1386 
1387     /* split */
1388     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1389      *  error occurs! */
1390     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1391         destFieldsSize, &theICUError);
1392 
1393     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1394     TEST_ASSERT(wordCount==3);
1395 
1396     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1397     {
1398         theICUError = U_ZERO_ERROR;
1399         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1400         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1401             destFields, destFieldsSize, &theICUError);
1402         TEST_ASSERT(wordCount==3);
1403         TEST_ASSERT_SUCCESS(theICUError);
1404         TEST_ASSERT(neededLength1 == neededLength2);
1405         TEST_ASSERT_STRING("The qui", destFields[0], true);
1406         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], true);
1407         TEST_ASSERT_STRING("turtle.", destFields[2], true);
1408         TEST_ASSERT(destFields[3] == NULL);
1409         free(textBuff);
1410     }
1411     uregex_close(theRegEx);
1412 }
1413 
1414 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1415 static void TestUTextAPI(void) {
1416     UErrorCode           status = U_ZERO_ERROR;
1417     URegularExpression  *re;
1418     UText                patternText = UTEXT_INITIALIZER;
1419     UChar                pat[200];
1420     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1421 
1422     /* Mimimalist open/close */
1423     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1424     re = uregex_openUText(&patternText, 0, 0, &status);
1425     if (U_FAILURE(status)) {
1426          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1427          utext_close(&patternText);
1428          return;
1429     }
1430     uregex_close(re);
1431 
1432     /* Open with all flag values set */
1433     status = U_ZERO_ERROR;
1434     re = uregex_openUText(&patternText,
1435         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1436         0, &status);
1437     TEST_ASSERT_SUCCESS(status);
1438     uregex_close(re);
1439 
1440     /* Open with an invalid flag */
1441     status = U_ZERO_ERROR;
1442     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1443     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1444     uregex_close(re);
1445 
1446     /* open with an invalid parameter */
1447     status = U_ZERO_ERROR;
1448     re = uregex_openUText(NULL,
1449         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1450     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1451 
1452     /*
1453      *  clone
1454      */
1455     {
1456         URegularExpression *clone1;
1457         URegularExpression *clone2;
1458         URegularExpression *clone3;
1459         UChar  testString1[30];
1460         UChar  testString2[30];
1461         UBool  result;
1462 
1463 
1464         status = U_ZERO_ERROR;
1465         re = uregex_openUText(&patternText, 0, 0, &status);
1466         TEST_ASSERT_SUCCESS(status);
1467         clone1 = uregex_clone(re, &status);
1468         TEST_ASSERT_SUCCESS(status);
1469         TEST_ASSERT(clone1 != NULL);
1470 
1471         status = U_ZERO_ERROR;
1472         clone2 = uregex_clone(re, &status);
1473         TEST_ASSERT_SUCCESS(status);
1474         TEST_ASSERT(clone2 != NULL);
1475         uregex_close(re);
1476 
1477         status = U_ZERO_ERROR;
1478         clone3 = uregex_clone(clone2, &status);
1479         TEST_ASSERT_SUCCESS(status);
1480         TEST_ASSERT(clone3 != NULL);
1481 
1482         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1483         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1484 
1485         status = U_ZERO_ERROR;
1486         uregex_setText(clone1, testString1, -1, &status);
1487         TEST_ASSERT_SUCCESS(status);
1488         result = uregex_lookingAt(clone1, 0, &status);
1489         TEST_ASSERT_SUCCESS(status);
1490         TEST_ASSERT(result==true);
1491 
1492         status = U_ZERO_ERROR;
1493         uregex_setText(clone2, testString2, -1, &status);
1494         TEST_ASSERT_SUCCESS(status);
1495         result = uregex_lookingAt(clone2, 0, &status);
1496         TEST_ASSERT_SUCCESS(status);
1497         TEST_ASSERT(result==false);
1498         result = uregex_find(clone2, 0, &status);
1499         TEST_ASSERT_SUCCESS(status);
1500         TEST_ASSERT(result==true);
1501 
1502         uregex_close(clone1);
1503         uregex_close(clone2);
1504         uregex_close(clone3);
1505 
1506     }
1507 
1508     /*
1509      *  pattern() and patternText()
1510      */
1511     {
1512         const UChar  *resultPat;
1513         int32_t       resultLen;
1514         UText        *resultText;
1515         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1516         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1517         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1518         status = U_ZERO_ERROR;
1519 
1520         utext_openUTF8(&patternText, str_hello, -1, &status);
1521         re = uregex_open(pat, -1, 0, NULL, &status);
1522         resultPat = uregex_pattern(re, &resultLen, &status);
1523         TEST_ASSERT_SUCCESS(status);
1524 
1525         /* The TEST_ASSERT_SUCCESS above should change too... */
1526         if (U_SUCCESS(status)) {
1527             TEST_ASSERT(resultLen == -1);
1528             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1529         }
1530 
1531         resultText = uregex_patternUText(re, &status);
1532         TEST_ASSERT_SUCCESS(status);
1533         TEST_ASSERT_UTEXT(str_hello, resultText);
1534 
1535         uregex_close(re);
1536 
1537         status = U_ZERO_ERROR;
1538         re = uregex_open(pat, 3, 0, NULL, &status);
1539         resultPat = uregex_pattern(re, &resultLen, &status);
1540         TEST_ASSERT_SUCCESS(status);
1541 
1542         /* The TEST_ASSERT_SUCCESS above should change too... */
1543         if (U_SUCCESS(status)) {
1544             TEST_ASSERT(resultLen == 3);
1545             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1546             TEST_ASSERT(u_strlen(resultPat) == 3);
1547         }
1548 
1549         resultText = uregex_patternUText(re, &status);
1550         TEST_ASSERT_SUCCESS(status);
1551         TEST_ASSERT_UTEXT(str_hel, resultText);
1552 
1553         uregex_close(re);
1554     }
1555 
1556     /*
1557      *  setUText() and lookingAt()
1558      */
1559     {
1560         UText  text1 = UTEXT_INITIALIZER;
1561         UText  text2 = UTEXT_INITIALIZER;
1562         UBool  result;
1563         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1564         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1565         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1566         status = U_ZERO_ERROR;
1567         utext_openUTF8(&text1, str_abcccd, -1, &status);
1568         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1569 
1570         utext_openUTF8(&patternText, str_abcd, -1, &status);
1571         re = uregex_openUText(&patternText, 0, NULL, &status);
1572         TEST_ASSERT_SUCCESS(status);
1573 
1574         /* Operation before doing a setText should fail... */
1575         status = U_ZERO_ERROR;
1576         uregex_lookingAt(re, 0, &status);
1577         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1578 
1579         status = U_ZERO_ERROR;
1580         uregex_setUText(re, &text1, &status);
1581         result = uregex_lookingAt(re, 0, &status);
1582         TEST_ASSERT(result == true);
1583         TEST_ASSERT_SUCCESS(status);
1584 
1585         status = U_ZERO_ERROR;
1586         uregex_setUText(re, &text2, &status);
1587         result = uregex_lookingAt(re, 0, &status);
1588         TEST_ASSERT(result == false);
1589         TEST_ASSERT_SUCCESS(status);
1590 
1591         status = U_ZERO_ERROR;
1592         uregex_setUText(re, &text1, &status);
1593         result = uregex_lookingAt(re, 0, &status);
1594         TEST_ASSERT(result == true);
1595         TEST_ASSERT_SUCCESS(status);
1596 
1597         uregex_close(re);
1598         utext_close(&text1);
1599         utext_close(&text2);
1600     }
1601 
1602 
1603     /*
1604      *  getText() and getUText()
1605      */
1606     {
1607         UText  text1 = UTEXT_INITIALIZER;
1608         UText  text2 = UTEXT_INITIALIZER;
1609         UChar  text2Chars[20];
1610         UText  *resultText;
1611         const UChar   *result;
1612         int32_t  textLength;
1613         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1614         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1615         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1616 
1617 
1618         status = U_ZERO_ERROR;
1619         utext_openUTF8(&text1, str_abcccd, -1, &status);
1620         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1621         utext_openUChars(&text2, text2Chars, -1, &status);
1622 
1623         utext_openUTF8(&patternText, str_abcd, -1, &status);
1624         re = uregex_openUText(&patternText, 0, NULL, &status);
1625 
1626         /* First set a UText */
1627         uregex_setUText(re, &text1, &status);
1628         resultText = uregex_getUText(re, NULL, &status);
1629         TEST_ASSERT_SUCCESS(status);
1630         TEST_ASSERT(resultText != &text1);
1631         utext_setNativeIndex(resultText, 0);
1632         utext_setNativeIndex(&text1, 0);
1633         TEST_ASSERT(testUTextEqual(resultText, &text1));
1634         utext_close(resultText);
1635 
1636         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1637         (void)result;    /* Suppress set but not used warning. */
1638         TEST_ASSERT(textLength == -1 || textLength == 6);
1639         resultText = uregex_getUText(re, NULL, &status);
1640         TEST_ASSERT_SUCCESS(status);
1641         TEST_ASSERT(resultText != &text1);
1642         utext_setNativeIndex(resultText, 0);
1643         utext_setNativeIndex(&text1, 0);
1644         TEST_ASSERT(testUTextEqual(resultText, &text1));
1645         utext_close(resultText);
1646 
1647         /* Then set a UChar * */
1648         uregex_setText(re, text2Chars, 7, &status);
1649         resultText = uregex_getUText(re, NULL, &status);
1650         TEST_ASSERT_SUCCESS(status);
1651         utext_setNativeIndex(resultText, 0);
1652         utext_setNativeIndex(&text2, 0);
1653         TEST_ASSERT(testUTextEqual(resultText, &text2));
1654         utext_close(resultText);
1655         result = uregex_getText(re, &textLength, &status);
1656         TEST_ASSERT(textLength == 7);
1657 
1658         uregex_close(re);
1659         utext_close(&text1);
1660         utext_close(&text2);
1661     }
1662 
1663     /*
1664      *  matches()
1665      */
1666     {
1667         UText   text1 = UTEXT_INITIALIZER;
1668         UBool   result;
1669         UText   nullText = UTEXT_INITIALIZER;
1670         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1671         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1672 
1673         status = U_ZERO_ERROR;
1674         utext_openUTF8(&text1, str_abcccde, -1, &status);
1675         utext_openUTF8(&patternText, str_abcd, -1, &status);
1676         re = uregex_openUText(&patternText, 0, NULL, &status);
1677 
1678         uregex_setUText(re, &text1, &status);
1679         result = uregex_matches(re, 0, &status);
1680         TEST_ASSERT(result == false);
1681         TEST_ASSERT_SUCCESS(status);
1682         uregex_close(re);
1683 
1684         status = U_ZERO_ERROR;
1685         re = uregex_openC(".?", 0, NULL, &status);
1686         uregex_setUText(re, &text1, &status);
1687         result = uregex_matches(re, 7, &status);
1688         TEST_ASSERT(result == true);
1689         TEST_ASSERT_SUCCESS(status);
1690 
1691         status = U_ZERO_ERROR;
1692         utext_openUTF8(&nullText, "", -1, &status);
1693         uregex_setUText(re, &nullText, &status);
1694         TEST_ASSERT_SUCCESS(status);
1695         result = uregex_matches(re, 0, &status);
1696         TEST_ASSERT(result == true);
1697         TEST_ASSERT_SUCCESS(status);
1698 
1699         uregex_close(re);
1700         utext_close(&text1);
1701         utext_close(&nullText);
1702     }
1703 
1704 
1705     /*
1706      *  lookingAt()    Used in setText test.
1707      */
1708 
1709 
1710     /*
1711      *  find(), findNext, start, end, reset
1712      */
1713     {
1714         UChar    text1[50];
1715         UBool    result;
1716         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1717         status = U_ZERO_ERROR;
1718         re = uregex_openC("rx", 0, NULL, &status);
1719 
1720         uregex_setText(re, text1, -1, &status);
1721         result = uregex_find(re, 0, &status);
1722         TEST_ASSERT(result == true);
1723         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1724         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1725         TEST_ASSERT_SUCCESS(status);
1726 
1727         result = uregex_find(re, 9, &status);
1728         TEST_ASSERT(result == true);
1729         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1730         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1731         TEST_ASSERT_SUCCESS(status);
1732 
1733         result = uregex_find(re, 14, &status);
1734         TEST_ASSERT(result == false);
1735         TEST_ASSERT_SUCCESS(status);
1736 
1737         status = U_ZERO_ERROR;
1738         uregex_reset(re, 0, &status);
1739 
1740         result = uregex_findNext(re, &status);
1741         TEST_ASSERT(result == true);
1742         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1743         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1744         TEST_ASSERT_SUCCESS(status);
1745 
1746         result = uregex_findNext(re, &status);
1747         TEST_ASSERT(result == true);
1748         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1749         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1750         TEST_ASSERT_SUCCESS(status);
1751 
1752         status = U_ZERO_ERROR;
1753         uregex_reset(re, 12, &status);
1754 
1755         result = uregex_findNext(re, &status);
1756         TEST_ASSERT(result == true);
1757         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1758         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1759         TEST_ASSERT_SUCCESS(status);
1760 
1761         result = uregex_findNext(re, &status);
1762         TEST_ASSERT(result == false);
1763         TEST_ASSERT_SUCCESS(status);
1764 
1765         uregex_close(re);
1766     }
1767 
1768     /*
1769      *  groupUText()
1770      */
1771     {
1772         UChar    text1[80];
1773         UText   *actual;
1774         UBool    result;
1775         int64_t  groupLen = 0;
1776         UChar    groupBuf[20];
1777 
1778         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1779 
1780         status = U_ZERO_ERROR;
1781         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1782         TEST_ASSERT_SUCCESS(status);
1783 
1784         uregex_setText(re, text1, -1, &status);
1785         result = uregex_find(re, 0, &status);
1786         TEST_ASSERT(result==true);
1787 
1788         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1789         status = U_ZERO_ERROR;
1790         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1791         TEST_ASSERT_SUCCESS(status);
1792 
1793         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1794         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1795         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1796 
1797         TEST_ASSERT_STRING("abc interior def", groupBuf, true);
1798         utext_close(actual);
1799 
1800         /*  Capture group #1.  Should succeed. */
1801         status = U_ZERO_ERROR;
1802 
1803         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1804         TEST_ASSERT_SUCCESS(status);
1805         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1806                                                            /*    (within the string text1)           */
1807         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1808         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1809         TEST_ASSERT_STRING(" interior ", groupBuf, true);
1810 
1811         utext_close(actual);
1812 
1813         /*  Capture group out of range.  Error. */
1814         status = U_ZERO_ERROR;
1815         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1816         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1817         utext_close(actual);
1818 
1819         uregex_close(re);
1820     }
1821 
1822     /*
1823      *  replaceFirst()
1824      */
1825     {
1826         UChar    text1[80];
1827         UChar    text2[80];
1828         UText    replText = UTEXT_INITIALIZER;
1829         UText   *result;
1830         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1831         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1832         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1833                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1834         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1835         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1836         status = U_ZERO_ERROR;
1837         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1838         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1839         utext_openUTF8(&replText, str_1x, -1, &status);
1840 
1841         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1842         TEST_ASSERT_SUCCESS(status);
1843 
1844         /*  Normal case, with match */
1845         uregex_setText(re, text1, -1, &status);
1846         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1847         TEST_ASSERT_SUCCESS(status);
1848         TEST_ASSERT_UTEXT(str_Replxxx, result);
1849         utext_close(result);
1850 
1851         /* No match.  Text should copy to output with no changes.  */
1852         uregex_setText(re, text2, -1, &status);
1853         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1854         TEST_ASSERT_SUCCESS(status);
1855         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1856         utext_close(result);
1857 
1858         /* Unicode escapes */
1859         uregex_setText(re, text1, -1, &status);
1860         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1861         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1862         TEST_ASSERT_SUCCESS(status);
1863         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1864         utext_close(result);
1865 
1866         uregex_close(re);
1867         utext_close(&replText);
1868     }
1869 
1870 
1871     /*
1872      *  replaceAll()
1873      */
1874     {
1875         UChar    text1[80];
1876         UChar    text2[80];
1877         UText    replText = UTEXT_INITIALIZER;
1878         UText   *result;
1879         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1880         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1881         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1882         status = U_ZERO_ERROR;
1883         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1884         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1885         utext_openUTF8(&replText, str_1, -1, &status);
1886 
1887         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1888         TEST_ASSERT_SUCCESS(status);
1889 
1890         /*  Normal case, with match */
1891         uregex_setText(re, text1, -1, &status);
1892         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1893         TEST_ASSERT_SUCCESS(status);
1894         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1895         utext_close(result);
1896 
1897         /* No match.  Text should copy to output with no changes.  */
1898         uregex_setText(re, text2, -1, &status);
1899         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1900         TEST_ASSERT_SUCCESS(status);
1901         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1902         utext_close(result);
1903 
1904         uregex_close(re);
1905         utext_close(&replText);
1906     }
1907 
1908 
1909     /*
1910      *  appendReplacement()
1911      */
1912     {
1913         UChar    text[100];
1914         UChar    repl[100];
1915         UChar    buf[100];
1916         UChar   *bufPtr;
1917         int32_t  bufCap;
1918 
1919         status = U_ZERO_ERROR;
1920         re = uregex_openC(".*", 0, 0, &status);
1921         TEST_ASSERT_SUCCESS(status);
1922 
1923         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1924         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1925         uregex_setText(re, text, -1, &status);
1926 
1927         /* match covers whole target string */
1928         uregex_find(re, 0, &status);
1929         TEST_ASSERT_SUCCESS(status);
1930         bufPtr = buf;
1931         bufCap = UPRV_LENGTHOF(buf);
1932         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1933         TEST_ASSERT_SUCCESS(status);
1934         TEST_ASSERT_STRING("some other", buf, true);
1935 
1936         /* Match has \u \U escapes */
1937         uregex_find(re, 0, &status);
1938         TEST_ASSERT_SUCCESS(status);
1939         bufPtr = buf;
1940         bufCap = UPRV_LENGTHOF(buf);
1941         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1942         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1943         TEST_ASSERT_SUCCESS(status);
1944         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, true);
1945 
1946         uregex_close(re);
1947     }
1948 
1949 
1950     /*
1951      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1952      */
1953 
1954     /*
1955      *  splitUText()
1956      */
1957     {
1958         UChar    textToSplit[80];
1959         UChar    text2[80];
1960         UText    *fields[10];
1961         int32_t  numFields;
1962         int32_t i;
1963 
1964         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1965         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1966 
1967         status = U_ZERO_ERROR;
1968         re = uregex_openC(":", 0, NULL, &status);
1969 
1970 
1971         /*  Simple split */
1972 
1973         uregex_setText(re, textToSplit, -1, &status);
1974         TEST_ASSERT_SUCCESS(status);
1975 
1976         /* The TEST_ASSERT_SUCCESS call above should change too... */
1977         if (U_SUCCESS(status)) {
1978             memset(fields, 0, sizeof(fields));
1979             numFields = uregex_splitUText(re, fields, 10, &status);
1980             TEST_ASSERT_SUCCESS(status);
1981 
1982             /* The TEST_ASSERT_SUCCESS call above should change too... */
1983             if(U_SUCCESS(status)) {
1984               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1985               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1986               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1987                 TEST_ASSERT(numFields == 3);
1988                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1989                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1990                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1991                 TEST_ASSERT(fields[3] == NULL);
1992             }
1993             for(i = 0; i < numFields; i++) {
1994                 utext_close(fields[i]);
1995             }
1996         }
1997 
1998         uregex_close(re);
1999 
2000 
2001         /*  Split with too few output strings available */
2002         status = U_ZERO_ERROR;
2003         re = uregex_openC(":", 0, NULL, &status);
2004         uregex_setText(re, textToSplit, -1, &status);
2005         TEST_ASSERT_SUCCESS(status);
2006 
2007         /* The TEST_ASSERT_SUCCESS call above should change too... */
2008         if(U_SUCCESS(status)) {
2009             fields[0] = NULL;
2010             fields[1] = NULL;
2011             fields[2] = &patternText;
2012             numFields = uregex_splitUText(re, fields, 2, &status);
2013             TEST_ASSERT_SUCCESS(status);
2014 
2015             /* The TEST_ASSERT_SUCCESS call above should change too... */
2016             if(U_SUCCESS(status)) {
2017                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2018                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2019                 TEST_ASSERT(numFields == 2);
2020                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2021                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2022                 TEST_ASSERT(fields[2] == &patternText);
2023             }
2024             for(i = 0; i < numFields; i++) {
2025                 utext_close(fields[i]);
2026             }
2027         }
2028 
2029         uregex_close(re);
2030     }
2031 
2032     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2033      *                   comes out as additional fields.  */
2034     {
2035         UChar    textToSplit[80];
2036         UText    *fields[10];
2037         int32_t  numFields;
2038         int32_t i;
2039 
2040         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2041 
2042         status = U_ZERO_ERROR;
2043         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2044 
2045         uregex_setText(re, textToSplit, -1, &status);
2046         TEST_ASSERT_SUCCESS(status);
2047 
2048         /* The TEST_ASSERT_SUCCESS call above should change too... */
2049         if(U_SUCCESS(status)) {
2050             memset(fields, 0, sizeof(fields));
2051             numFields = uregex_splitUText(re, fields, 10, &status);
2052             TEST_ASSERT_SUCCESS(status);
2053 
2054             /* The TEST_ASSERT_SUCCESS call above should change too... */
2055             if(U_SUCCESS(status)) {
2056                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2057                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2058                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2059                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2060                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2061 
2062                 TEST_ASSERT(numFields == 5);
2063                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2064                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2065                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2066                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2067                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2068                 TEST_ASSERT(fields[5] == NULL);
2069             }
2070             for(i = 0; i < numFields; i++) {
2071                 utext_close(fields[i]);
2072             }
2073         }
2074 
2075         /*  Split with too few output strings available (2) */
2076         status = U_ZERO_ERROR;
2077         fields[0] = NULL;
2078         fields[1] = NULL;
2079         fields[2] = &patternText;
2080         numFields = uregex_splitUText(re, fields, 2, &status);
2081         TEST_ASSERT_SUCCESS(status);
2082 
2083         /* The TEST_ASSERT_SUCCESS call above should change too... */
2084         if(U_SUCCESS(status)) {
2085             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2086             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2087             TEST_ASSERT(numFields == 2);
2088             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2089             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2090             TEST_ASSERT(fields[2] == &patternText);
2091         }
2092         for(i = 0; i < numFields; i++) {
2093             utext_close(fields[i]);
2094         }
2095 
2096 
2097         /*  Split with too few output strings available (3) */
2098         status = U_ZERO_ERROR;
2099         fields[0] = NULL;
2100         fields[1] = NULL;
2101         fields[2] = NULL;
2102         fields[3] = &patternText;
2103         numFields = uregex_splitUText(re, fields, 3, &status);
2104         TEST_ASSERT_SUCCESS(status);
2105 
2106         /* The TEST_ASSERT_SUCCESS call above should change too... */
2107         if(U_SUCCESS(status)) {
2108             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2109             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2110             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2111             TEST_ASSERT(numFields == 3);
2112             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2113             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2114             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2115             TEST_ASSERT(fields[3] == &patternText);
2116         }
2117         for(i = 0; i < numFields; i++) {
2118             utext_close(fields[i]);
2119         }
2120 
2121         /*  Split with just enough output strings available (5) */
2122         status = U_ZERO_ERROR;
2123         fields[0] = NULL;
2124         fields[1] = NULL;
2125         fields[2] = NULL;
2126         fields[3] = NULL;
2127         fields[4] = NULL;
2128         fields[5] = &patternText;
2129         numFields = uregex_splitUText(re, fields, 5, &status);
2130         TEST_ASSERT_SUCCESS(status);
2131 
2132         /* The TEST_ASSERT_SUCCESS call above should change too... */
2133         if(U_SUCCESS(status)) {
2134             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2135             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2136             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2137             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2138             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2139 
2140             TEST_ASSERT(numFields == 5);
2141             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2142             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2143             TEST_ASSERT_UTEXT(str_second, fields[2]);
2144             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2145             TEST_ASSERT_UTEXT(str_third, fields[4]);
2146             TEST_ASSERT(fields[5] == &patternText);
2147         }
2148         for(i = 0; i < numFields; i++) {
2149             utext_close(fields[i]);
2150         }
2151 
2152         /* Split, end of text is a field delimiter.   */
2153         status = U_ZERO_ERROR;
2154         uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2155         TEST_ASSERT_SUCCESS(status);
2156 
2157         /* The TEST_ASSERT_SUCCESS call above should change too... */
2158         if(U_SUCCESS(status)) {
2159             memset(fields, 0, sizeof(fields));
2160             fields[9] = &patternText;
2161             numFields = uregex_splitUText(re, fields, 9, &status);
2162             TEST_ASSERT_SUCCESS(status);
2163 
2164             /* The TEST_ASSERT_SUCCESS call above should change too... */
2165             if(U_SUCCESS(status)) {
2166                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2167                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2168                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2169                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2170                 const char str_empty[] = { 0x00 };
2171 
2172                 TEST_ASSERT(numFields == 5);
2173                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2174                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2175                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2176                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2177                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2178                 TEST_ASSERT(fields[5] == NULL);
2179                 TEST_ASSERT(fields[8] == NULL);
2180                 TEST_ASSERT(fields[9] == &patternText);
2181             }
2182             for(i = 0; i < numFields; i++) {
2183                 utext_close(fields[i]);
2184             }
2185         }
2186 
2187         uregex_close(re);
2188     }
2189     utext_close(&patternText);
2190 }
2191 
2192 
TestRefreshInput(void)2193 static void TestRefreshInput(void) {
2194     /*
2195      *  RefreshInput changes out the input of a URegularExpression without
2196      *    changing anything else in the match state.  Used with Java JNI,
2197      *    when Java moves the underlying string storage.   This test
2198      *    runs a find() loop, moving the text after the first match.
2199      *    The right number of matches should still be found.
2200      */
2201     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2202     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2203     UErrorCode status = U_ZERO_ERROR;
2204     URegularExpression *re;
2205     UText ut1 = UTEXT_INITIALIZER;
2206     UText ut2 = UTEXT_INITIALIZER;
2207 
2208     re = uregex_openC("[ABC]", 0, 0, &status);
2209     TEST_ASSERT_SUCCESS(status);
2210 
2211     utext_openUChars(&ut1, testStr, -1, &status);
2212     TEST_ASSERT_SUCCESS(status);
2213     uregex_setUText(re, &ut1, &status);
2214     TEST_ASSERT_SUCCESS(status);
2215 
2216     /* Find the first match "A" in the original string */
2217     TEST_ASSERT(uregex_findNext(re, &status));
2218     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2219 
2220     /* Move the string, kill the original string.  */
2221     u_strcpy(movedStr, testStr);
2222     u_memset(testStr, 0, u_strlen(testStr));
2223     utext_openUChars(&ut2, movedStr, -1, &status);
2224     TEST_ASSERT_SUCCESS(status);
2225     uregex_refreshUText(re, &ut2, &status);
2226     TEST_ASSERT_SUCCESS(status);
2227 
2228     /* Find the following two matches, now working in the moved string. */
2229     TEST_ASSERT(uregex_findNext(re, &status));
2230     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2231     TEST_ASSERT(uregex_findNext(re, &status));
2232     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2233     TEST_ASSERT(false == uregex_findNext(re, &status));
2234 
2235     uregex_close(re);
2236 }
2237 
2238 
TestBug8421(void)2239 static void TestBug8421(void) {
2240     /* Bug 8421:  setTimeLimit on a regular expression before setting text to be matched
2241      *             was failing.
2242      */
2243     URegularExpression *re;
2244     UErrorCode status = U_ZERO_ERROR;
2245     int32_t  limit = -1;
2246 
2247     re = uregex_openC("abc", 0, 0, &status);
2248     TEST_ASSERT_SUCCESS(status);
2249 
2250     limit = uregex_getTimeLimit(re, &status);
2251     TEST_ASSERT_SUCCESS(status);
2252     TEST_ASSERT(limit == 0);
2253 
2254     uregex_setTimeLimit(re, 100, &status);
2255     TEST_ASSERT_SUCCESS(status);
2256     limit = uregex_getTimeLimit(re, &status);
2257     TEST_ASSERT_SUCCESS(status);
2258     TEST_ASSERT(limit == 100);
2259 
2260     uregex_close(re);
2261 }
2262 
FindCallback(const void * context,int64_t matchIndex)2263 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2264     // suppress compiler warnings about unused variables
2265     (void)context;
2266     (void)matchIndex;
2267     return false;
2268 }
2269 
MatchCallback(const void * context,int32_t steps)2270 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2271     // suppress compiler warnings about unused variables
2272     (void)context;
2273     (void)steps;
2274     return false;
2275 }
2276 
TestBug10815()2277 static void TestBug10815() {
2278   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2279    *              when the callback function specified by uregex_setMatchCallback() returns false
2280    */
2281     URegularExpression *re;
2282     UErrorCode status = U_ZERO_ERROR;
2283     UChar    text[100];
2284 
2285 
2286     // findNext() with a find progress callback function.
2287 
2288     re = uregex_openC(".z", 0, 0, &status);
2289     TEST_ASSERT_SUCCESS(status);
2290 
2291     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2292     uregex_setText(re, text, -1, &status);
2293     TEST_ASSERT_SUCCESS(status);
2294 
2295     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2296     TEST_ASSERT_SUCCESS(status);
2297 
2298     uregex_findNext(re, &status);
2299     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2300 
2301     uregex_close(re);
2302 
2303     // findNext() with a match progress callback function.
2304 
2305     status = U_ZERO_ERROR;
2306     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2307     TEST_ASSERT_SUCCESS(status);
2308 
2309     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2310     // it will appear to be stuck in a (near) infinite loop.
2311     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2312     uregex_setText(re, text, -1, &status);
2313     TEST_ASSERT_SUCCESS(status);
2314 
2315     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2316     TEST_ASSERT_SUCCESS(status);
2317 
2318     uregex_findNext(re, &status);
2319     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2320 
2321     uregex_close(re);
2322 }
2323 
2324 
2325 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2326