• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 2004-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /********************************************************************************
9 *
10 * File reapits.c
11 *
12 *********************************************************************************/
13 /*C API TEST FOR Regular Expressions */
14 /**
15 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
16 *   try to test the full functionality.  It just calls each function and verifies that it
17 *   works on a basic level.
18 *
19 *   More complete testing of regular expression functionality is done with the C++ tests.
20 **/
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
25 
26 #include <stdlib.h>
27 #include <string.h>
28 #include "unicode/uloc.h"
29 #include "unicode/uregex.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utext.h"
32 #include "unicode/utf8.h"
33 #include "cintltst.h"
34 #include "cmemory.h"
35 
36 #define TEST_ASSERT_SUCCESS(status) UPRV_BLOCK_MACRO_BEGIN { \
37     if (U_FAILURE(status)) { \
38         log_data_err("Failure at file %s:%d - error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status)); \
39     } \
40 } UPRV_BLOCK_MACRO_END
41 
42 #define TEST_ASSERT(expr) UPRV_BLOCK_MACRO_BEGIN { \
43     if ((expr)==FALSE) { \
44         log_err("Test Failure at file %s:%d - ASSERT(%s) failed.\n", __FILE__, __LINE__, #expr); \
45     } \
46 } UPRV_BLOCK_MACRO_END
47 
48 /*
49  *   TEST_SETUP and TEST_TEARDOWN
50  *         macros to handle the boilerplate around setting up regex test cases.
51  *         parameteres to setup:
52  *              pattern:     The regex pattern, a (char *) null terminated C string.
53  *              testString:  The string data, also a (char *) C string.
54  *              flags:       Regex flags to set when compiling the pattern
55  *
56  *         Put arbitrary test code between SETUP and TEARDOWN.
57  *         're" is the compiled, ready-to-go  regular expression.
58  */
59 #define TEST_SETUP(pattern, testString, flags) UPRV_BLOCK_MACRO_BEGIN { \
60     UChar   *srcString = NULL;  \
61     status = U_ZERO_ERROR; \
62     re = uregex_openC(pattern, flags, NULL, &status);  \
63     TEST_ASSERT_SUCCESS(status);   \
64     int32_t testStringLen = (int32_t)strlen(testString); \
65     srcString = (UChar *)malloc( (testStringLen + 2) * sizeof(UChar) ); \
66     u_uastrncpy(srcString, testString, testStringLen + 1); \
67     uregex_setText(re, srcString, -1, &status); \
68     TEST_ASSERT_SUCCESS(status);  \
69     if (U_SUCCESS(status)) { \
70         UPRV_BLOCK_MACRO_BEGIN {} UPRV_BLOCK_MACRO_END
71 
72 #define TEST_TEARDOWN  \
73     }  \
74     TEST_ASSERT_SUCCESS(status);  \
75     uregex_close(re);  \
76     free(srcString);   \
77 } UPRV_BLOCK_MACRO_END
78 
79 
80 /**
81  * @param expected utf-8 array of bytes to be expected
82  */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)83 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
84      char     buf_inside_macro[120];
85      int32_t  len = (int32_t)strlen(expected);
86      UBool    success;
87      if (nulTerm) {
88          u_austrncpy(buf_inside_macro, (actual), len+1);
89          buf_inside_macro[len+2] = 0;
90          success = (strcmp((expected), buf_inside_macro) == 0);
91      } else {
92          u_austrncpy(buf_inside_macro, (actual), len);
93          buf_inside_macro[len+1] = 0;
94          success = (strncmp((expected), buf_inside_macro, len) == 0);
95      }
96      if (success == FALSE) {
97          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
98              file, line, (expected), buf_inside_macro);
99      }
100 }
101 
102 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
103 
104 
equals_utf8_utext(const char * utf8,UText * utext)105 static UBool equals_utf8_utext(const char *utf8, UText *utext) {
106     int32_t u8i = 0;
107     UChar32 u8c = 0;
108     UChar32 utc = 0;
109     UBool   stringsEqual = TRUE;
110     utext_setNativeIndex(utext, 0);
111     for (;;) {
112         U8_NEXT_UNSAFE(utf8, u8i, u8c);
113         utc = utext_next32(utext);
114         if (u8c == 0 && utc == U_SENTINEL) {
115             break;
116         }
117         if (u8c != utc || u8c == 0) {
118             stringsEqual = FALSE;
119             break;
120         }
121     }
122     return stringsEqual;
123 }
124 
125 
test_assert_utext(const char * expected,UText * actual,const char * file,int line)126 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
127     utext_setNativeIndex(actual, 0);
128     if (!equals_utf8_utext(expected, actual)) {
129         UChar32 c;
130         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
131         c = utext_next32From(actual, 0);
132         while (c != U_SENTINEL) {
133             if (0x20<c && c <0x7e) {
134                 log_err("%c", c);
135             } else {
136                 log_err("%#x", c);
137             }
138             c = UTEXT_NEXT32(actual);
139         }
140         log_err("\"\n");
141     }
142 }
143 
144 /*
145  * TEST_ASSERT_UTEXT(const char *expected, const UText *actual)
146  *     Note:  Expected is a UTF-8 encoded string, _not_ the system code page.
147  */
148 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
149 
testUTextEqual(UText * uta,UText * utb)150 static UBool testUTextEqual(UText *uta, UText *utb) {
151     UChar32 ca = 0;
152     UChar32 cb = 0;
153     utext_setNativeIndex(uta, 0);
154     utext_setNativeIndex(utb, 0);
155     do {
156         ca = utext_next32(uta);
157         cb = utext_next32(utb);
158         if (ca != cb) {
159             break;
160         }
161     } while (ca != U_SENTINEL);
162     return ca == cb;
163 }
164 
165 
166 
167 
168 static void TestRegexCAPI(void);
169 static void TestBug4315(void);
170 static void TestUTextAPI(void);
171 static void TestRefreshInput(void);
172 static void TestBug8421(void);
173 static void TestBug10815(void);
174 
175 void addURegexTest(TestNode** root);
176 
addURegexTest(TestNode ** root)177 void addURegexTest(TestNode** root)
178 {
179     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
180     addTest(root, &TestBug4315,   "regex/TestBug4315");
181     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
182     addTest(root, &TestRefreshInput, "regex/TestRefreshInput");
183     addTest(root, &TestBug8421,   "regex/TestBug8421");
184     addTest(root, &TestBug10815,   "regex/TestBug10815");
185 }
186 
187 /*
188  * Call back function and context struct used for testing
189  *    regular expression user callbacks.  This test is mostly the same as
190  *   the corresponding C++ test in intltest.
191  */
192 typedef struct callBackContext {
193     int32_t          maxCalls;
194     int32_t          numCalls;
195     int32_t          lastSteps;
196 } callBackContext;
197 
198 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)199 TestCallbackFn(const void *context, int32_t steps) {
200   callBackContext  *info = (callBackContext *)context;
201   if (info->lastSteps+1 != steps) {
202       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
203   }
204   info->lastSteps = steps;
205   info->numCalls++;
206   return (info->numCalls < info->maxCalls);
207 }
208 
209 /*
210  *   Regular Expression C API Tests
211  */
TestRegexCAPI(void)212 static void TestRegexCAPI(void) {
213     UErrorCode           status = U_ZERO_ERROR;
214     URegularExpression  *re;
215     UChar                pat[200];
216     UChar               *minus1;
217 
218     memset(&minus1, -1, sizeof(minus1));
219 
220     /* Mimimalist open/close */
221     u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
222     re = uregex_open(pat, -1, 0, 0, &status);
223     if (U_FAILURE(status)) {
224          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
225          return;
226     }
227     uregex_close(re);
228 
229     /* Open with all flag values set */
230     status = U_ZERO_ERROR;
231     re = uregex_open(pat, -1,
232         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | UREGEX_LITERAL,
233         0, &status);
234     TEST_ASSERT_SUCCESS(status);
235     uregex_close(re);
236 
237     /* Open with an invalid flag */
238     status = U_ZERO_ERROR;
239     re = uregex_open(pat, -1, 0x40000000, 0, &status);
240     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
241     uregex_close(re);
242 
243     /* Open with an unimplemented flag */
244     status = U_ZERO_ERROR;
245     re = uregex_open(pat, -1, UREGEX_CANON_EQ, 0, &status);
246     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
247     uregex_close(re);
248 
249     /* openC with an invalid parameter */
250     status = U_ZERO_ERROR;
251     re = uregex_openC(NULL,
252         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
253     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
254 
255     /* openC with an invalid parameter */
256     status = U_USELESS_COLLATOR_ERROR;
257     re = uregex_openC(NULL,
258         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
259     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
260 
261     /* openC   open from a C string */
262     {
263         const UChar   *p;
264         int32_t  len;
265         status = U_ZERO_ERROR;
266         re = uregex_openC("abc*", 0, 0, &status);
267         TEST_ASSERT_SUCCESS(status);
268         p = uregex_pattern(re, &len, &status);
269         TEST_ASSERT_SUCCESS(status);
270 
271         /* The TEST_ASSERT_SUCCESS above should change too... */
272         if(U_SUCCESS(status)) {
273             u_uastrncpy(pat, "abc*", UPRV_LENGTHOF(pat));
274             TEST_ASSERT(u_strcmp(pat, p) == 0);
275             TEST_ASSERT(len==(int32_t)strlen("abc*"));
276         }
277 
278         uregex_close(re);
279 
280         /*  TODO:  Open with ParseError parameter */
281     }
282 
283     /*
284      *  clone
285      */
286     {
287         URegularExpression *clone1;
288         URegularExpression *clone2;
289         URegularExpression *clone3;
290         UChar  testString1[30];
291         UChar  testString2[30];
292         UBool  result;
293 
294 
295         status = U_ZERO_ERROR;
296         re = uregex_openC("abc*", 0, 0, &status);
297         TEST_ASSERT_SUCCESS(status);
298         clone1 = uregex_clone(re, &status);
299         TEST_ASSERT_SUCCESS(status);
300         TEST_ASSERT(clone1 != NULL);
301 
302         status = U_ZERO_ERROR;
303         clone2 = uregex_clone(re, &status);
304         TEST_ASSERT_SUCCESS(status);
305         TEST_ASSERT(clone2 != NULL);
306         uregex_close(re);
307 
308         status = U_ZERO_ERROR;
309         clone3 = uregex_clone(clone2, &status);
310         TEST_ASSERT_SUCCESS(status);
311         TEST_ASSERT(clone3 != NULL);
312 
313         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
314         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
315 
316         status = U_ZERO_ERROR;
317         uregex_setText(clone1, testString1, -1, &status);
318         TEST_ASSERT_SUCCESS(status);
319         result = uregex_lookingAt(clone1, 0, &status);
320         TEST_ASSERT_SUCCESS(status);
321         TEST_ASSERT(result==TRUE);
322 
323         status = U_ZERO_ERROR;
324         uregex_setText(clone2, testString2, -1, &status);
325         TEST_ASSERT_SUCCESS(status);
326         result = uregex_lookingAt(clone2, 0, &status);
327         TEST_ASSERT_SUCCESS(status);
328         TEST_ASSERT(result==FALSE);
329         result = uregex_find(clone2, 0, &status);
330         TEST_ASSERT_SUCCESS(status);
331         TEST_ASSERT(result==TRUE);
332 
333         uregex_close(clone1);
334         uregex_close(clone2);
335         uregex_close(clone3);
336 
337     }
338 
339     /*
340      *  pattern()
341     */
342     {
343         const UChar  *resultPat;
344         int32_t       resultLen;
345         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat));
346         status = U_ZERO_ERROR;
347         re = uregex_open(pat, -1, 0, NULL, &status);
348         resultPat = uregex_pattern(re, &resultLen, &status);
349         TEST_ASSERT_SUCCESS(status);
350 
351         /* The TEST_ASSERT_SUCCESS above should change too... */
352         if (U_SUCCESS(status)) {
353             TEST_ASSERT(resultLen == -1);
354             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
355         }
356 
357         uregex_close(re);
358 
359         status = U_ZERO_ERROR;
360         re = uregex_open(pat, 3, 0, NULL, &status);
361         resultPat = uregex_pattern(re, &resultLen, &status);
362         TEST_ASSERT_SUCCESS(status);
363         TEST_ASSERT_SUCCESS(status);
364 
365         /* The TEST_ASSERT_SUCCESS above should change too... */
366         if (U_SUCCESS(status)) {
367             TEST_ASSERT(resultLen == 3);
368             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
369             TEST_ASSERT(u_strlen(resultPat) == 3);
370         }
371 
372         uregex_close(re);
373     }
374 
375     /*
376      *  flags()
377      */
378     {
379         int32_t  t;
380 
381         status = U_ZERO_ERROR;
382         re = uregex_open(pat, -1, 0, NULL, &status);
383         t  = uregex_flags(re, &status);
384         TEST_ASSERT_SUCCESS(status);
385         TEST_ASSERT(t == 0);
386         uregex_close(re);
387 
388         status = U_ZERO_ERROR;
389         re = uregex_open(pat, -1, 0, NULL, &status);
390         t  = uregex_flags(re, &status);
391         TEST_ASSERT_SUCCESS(status);
392         TEST_ASSERT(t == 0);
393         uregex_close(re);
394 
395         status = U_ZERO_ERROR;
396         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
397         t  = uregex_flags(re, &status);
398         TEST_ASSERT_SUCCESS(status);
399         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
400         uregex_close(re);
401     }
402 
403     /*
404      *  setText() and lookingAt()
405      */
406     {
407         UChar  text1[50];
408         UChar  text2[50];
409         UBool  result;
410 
411         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
412         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
413         status = U_ZERO_ERROR;
414         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
415         re = uregex_open(pat, -1, 0, NULL, &status);
416         TEST_ASSERT_SUCCESS(status);
417 
418         /* Operation before doing a setText should fail... */
419         status = U_ZERO_ERROR;
420         uregex_lookingAt(re, 0, &status);
421         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
422 
423         status = U_ZERO_ERROR;
424         uregex_setText(re, text1, -1, &status);
425         result = uregex_lookingAt(re, 0, &status);
426         TEST_ASSERT(result == TRUE);
427         TEST_ASSERT_SUCCESS(status);
428 
429         status = U_ZERO_ERROR;
430         uregex_setText(re, text2, -1, &status);
431         result = uregex_lookingAt(re, 0, &status);
432         TEST_ASSERT(result == FALSE);
433         TEST_ASSERT_SUCCESS(status);
434 
435         status = U_ZERO_ERROR;
436         uregex_setText(re, text1, -1, &status);
437         result = uregex_lookingAt(re, 0, &status);
438         TEST_ASSERT(result == TRUE);
439         TEST_ASSERT_SUCCESS(status);
440 
441         status = U_ZERO_ERROR;
442         uregex_setText(re, text1, 5, &status);
443         result = uregex_lookingAt(re, 0, &status);
444         TEST_ASSERT(result == FALSE);
445         TEST_ASSERT_SUCCESS(status);
446 
447         status = U_ZERO_ERROR;
448         uregex_setText(re, text1, 6, &status);
449         result = uregex_lookingAt(re, 0, &status);
450         TEST_ASSERT(result == TRUE);
451         TEST_ASSERT_SUCCESS(status);
452 
453         uregex_close(re);
454     }
455 
456 
457     /*
458      *  getText()
459      */
460     {
461         UChar    text1[50];
462         UChar    text2[50];
463         const UChar   *result;
464         int32_t  textLength;
465 
466         u_uastrncpy(text1, "abcccd",  UPRV_LENGTHOF(text1));
467         u_uastrncpy(text2, "abcccxd", UPRV_LENGTHOF(text2));
468         status = U_ZERO_ERROR;
469         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
470         re = uregex_open(pat, -1, 0, NULL, &status);
471 
472         uregex_setText(re, text1, -1, &status);
473         result = uregex_getText(re, &textLength, &status);
474         TEST_ASSERT(result == text1);
475         TEST_ASSERT(textLength == -1);
476         TEST_ASSERT_SUCCESS(status);
477 
478         status = U_ZERO_ERROR;
479         uregex_setText(re, text2, 7, &status);
480         result = uregex_getText(re, &textLength, &status);
481         TEST_ASSERT(result == text2);
482         TEST_ASSERT(textLength == 7);
483         TEST_ASSERT_SUCCESS(status);
484 
485         status = U_ZERO_ERROR;
486         uregex_setText(re, text2, 4, &status);
487         result = uregex_getText(re, &textLength, &status);
488         TEST_ASSERT(result == text2);
489         TEST_ASSERT(textLength == 4);
490         TEST_ASSERT_SUCCESS(status);
491         uregex_close(re);
492     }
493 
494     /*
495      *  matches()
496      */
497     {
498         UChar   text1[50];
499         UBool   result;
500         int     len;
501         UChar   nullString[] = {0,0,0};
502 
503         u_uastrncpy(text1, "abcccde",  UPRV_LENGTHOF(text1));
504         status = U_ZERO_ERROR;
505         u_uastrncpy(pat, "abc*d", UPRV_LENGTHOF(pat));
506         re = uregex_open(pat, -1, 0, NULL, &status);
507 
508         uregex_setText(re, text1, -1, &status);
509         result = uregex_matches(re, 0, &status);
510         TEST_ASSERT(result == FALSE);
511         TEST_ASSERT_SUCCESS(status);
512 
513         status = U_ZERO_ERROR;
514         uregex_setText(re, text1, 6, &status);
515         result = uregex_matches(re, 0, &status);
516         TEST_ASSERT(result == TRUE);
517         TEST_ASSERT_SUCCESS(status);
518 
519         status = U_ZERO_ERROR;
520         uregex_setText(re, text1, 6, &status);
521         result = uregex_matches(re, 1, &status);
522         TEST_ASSERT(result == FALSE);
523         TEST_ASSERT_SUCCESS(status);
524         uregex_close(re);
525 
526         status = U_ZERO_ERROR;
527         re = uregex_openC(".?", 0, NULL, &status);
528         uregex_setText(re, text1, -1, &status);
529         len = u_strlen(text1);
530         result = uregex_matches(re, len, &status);
531         TEST_ASSERT(result == TRUE);
532         TEST_ASSERT_SUCCESS(status);
533 
534         status = U_ZERO_ERROR;
535         uregex_setText(re, nullString, -1, &status);
536         TEST_ASSERT_SUCCESS(status);
537         result = uregex_matches(re, 0, &status);
538         TEST_ASSERT(result == TRUE);
539         TEST_ASSERT_SUCCESS(status);
540         uregex_close(re);
541     }
542 
543 
544     /*
545      *  lookingAt()    Used in setText test.
546      */
547 
548 
549     /*
550      *  find(), findNext, start, end, reset
551      */
552     {
553         UChar    text1[50];
554         UBool    result;
555         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
556         status = U_ZERO_ERROR;
557         re = uregex_openC("rx", 0, NULL, &status);
558 
559         uregex_setText(re, text1, -1, &status);
560         result = uregex_find(re, 0, &status);
561         TEST_ASSERT(result == TRUE);
562         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
563         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
564         TEST_ASSERT_SUCCESS(status);
565 
566         result = uregex_find(re, 9, &status);
567         TEST_ASSERT(result == TRUE);
568         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
569         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
570         TEST_ASSERT_SUCCESS(status);
571 
572         result = uregex_find(re, 14, &status);
573         TEST_ASSERT(result == FALSE);
574         TEST_ASSERT_SUCCESS(status);
575 
576         status = U_ZERO_ERROR;
577         uregex_reset(re, 0, &status);
578 
579         result = uregex_findNext(re, &status);
580         TEST_ASSERT(result == TRUE);
581         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
582         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
583         TEST_ASSERT_SUCCESS(status);
584 
585         result = uregex_findNext(re, &status);
586         TEST_ASSERT(result == TRUE);
587         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
588         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
589         TEST_ASSERT_SUCCESS(status);
590 
591         status = U_ZERO_ERROR;
592         uregex_reset(re, 12, &status);
593 
594         result = uregex_findNext(re, &status);
595         TEST_ASSERT(result == TRUE);
596         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
597         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
598         TEST_ASSERT_SUCCESS(status);
599 
600         result = uregex_findNext(re, &status);
601         TEST_ASSERT(result == FALSE);
602         TEST_ASSERT_SUCCESS(status);
603 
604         uregex_close(re);
605     }
606 
607     /*
608      *  groupCount
609      */
610     {
611         int32_t result;
612 
613         status = U_ZERO_ERROR;
614         re = uregex_openC("abc", 0, NULL, &status);
615         result = uregex_groupCount(re, &status);
616         TEST_ASSERT_SUCCESS(status);
617         TEST_ASSERT(result == 0);
618         uregex_close(re);
619 
620         status = U_ZERO_ERROR;
621         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
622         result = uregex_groupCount(re, &status);
623         TEST_ASSERT_SUCCESS(status);
624         TEST_ASSERT(result == 3);
625         uregex_close(re);
626 
627     }
628 
629 
630     /*
631      *  group()
632      */
633     {
634         UChar    text1[80];
635         UChar    buf[80];
636         UBool    result;
637         int32_t  resultSz;
638         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
639 
640         status = U_ZERO_ERROR;
641         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
642         TEST_ASSERT_SUCCESS(status);
643 
644 
645         uregex_setText(re, text1, -1, &status);
646         result = uregex_find(re, 0, &status);
647         TEST_ASSERT(result==TRUE);
648 
649         /*  Capture Group 0, the full match.  Should succeed.  */
650         status = U_ZERO_ERROR;
651         resultSz = uregex_group(re, 0, buf, UPRV_LENGTHOF(buf), &status);
652         TEST_ASSERT_SUCCESS(status);
653         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
654         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
655 
656         /*  Capture group #1.  Should succeed. */
657         status = U_ZERO_ERROR;
658         resultSz = uregex_group(re, 1, buf, UPRV_LENGTHOF(buf), &status);
659         TEST_ASSERT_SUCCESS(status);
660         TEST_ASSERT_STRING(" interior ", buf, TRUE);
661         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
662 
663         /*  Capture group out of range.  Error. */
664         status = U_ZERO_ERROR;
665         uregex_group(re, 2, buf, UPRV_LENGTHOF(buf), &status);
666         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
667 
668         /* NULL buffer, pure pre-flight */
669         status = U_ZERO_ERROR;
670         resultSz = uregex_group(re, 0, NULL, 0, &status);
671         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
672         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
673 
674         /* Too small buffer, truncated string */
675         status = U_ZERO_ERROR;
676         memset(buf, -1, sizeof(buf));
677         resultSz = uregex_group(re, 0, buf, 5, &status);
678         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
679         TEST_ASSERT_STRING("abc i", buf, FALSE);
680         TEST_ASSERT(buf[5] == (UChar)0xffff);
681         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
682 
683         /* Output string just fits buffer, no NUL term. */
684         status = U_ZERO_ERROR;
685         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
686         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
687         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
688         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
689         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
690 
691         uregex_close(re);
692 
693     }
694 
695     /*
696      *  Regions
697      */
698 
699 
700         /* SetRegion(), getRegion() do something  */
701         TEST_SETUP(".*", "0123456789ABCDEF", 0);
702         UChar resultString[40];
703         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
704         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
705         uregex_setRegion(re, 3, 6, &status);
706         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
707         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
708         TEST_ASSERT(uregex_findNext(re, &status));
709         TEST_ASSERT(uregex_group(re, 0, resultString, UPRV_LENGTHOF(resultString), &status) == 3);
710         TEST_ASSERT_STRING("345", resultString, TRUE);
711         TEST_TEARDOWN;
712 
713         /* find(start=-1) uses regions   */
714         TEST_SETUP(".*", "0123456789ABCDEF", 0);
715         uregex_setRegion(re, 4, 6, &status);
716         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
717         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
718         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
719         TEST_TEARDOWN;
720 
721         /* find (start >=0) does not use regions   */
722         TEST_SETUP(".*", "0123456789ABCDEF", 0);
723         uregex_setRegion(re, 4, 6, &status);
724         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
725         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
726         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
727         TEST_TEARDOWN;
728 
729         /* findNext() obeys regions    */
730         TEST_SETUP(".", "0123456789ABCDEF", 0);
731         uregex_setRegion(re, 4, 6, &status);
732         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
733         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
734         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
735         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
736         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
737         TEST_TEARDOWN;
738 
739         /* matches(start=-1) uses regions                                           */
740         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
741         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
742         uregex_setRegion(re, 4, 6, &status);
743         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
744         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
745         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
746         TEST_TEARDOWN;
747 
748         /* matches (start >=0) does not use regions       */
749         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
750         uregex_setRegion(re, 4, 6, &status);
751         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
752         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
753         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
754         TEST_TEARDOWN;
755 
756         /* lookingAt(start=-1) uses regions                                         */
757         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
758         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
759         uregex_setRegion(re, 4, 6, &status);
760         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
761         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
762         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
763         TEST_TEARDOWN;
764 
765         /* lookingAt (start >=0) does not use regions  */
766         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
767         uregex_setRegion(re, 4, 6, &status);
768         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
769         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
770         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
771         TEST_TEARDOWN;
772 
773         /* hitEnd()       */
774         TEST_SETUP("[a-f]*", "abcdefghij", 0);
775         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
776         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
777         TEST_TEARDOWN;
778 
779         TEST_SETUP("[a-f]*", "abcdef", 0);
780         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
781         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
782         TEST_TEARDOWN;
783 
784         /* requireEnd   */
785         TEST_SETUP("abcd", "abcd", 0);
786         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
787         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
788         TEST_TEARDOWN;
789 
790         TEST_SETUP("abcd$", "abcd", 0);
791         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
792         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
793         TEST_TEARDOWN;
794 
795         /* anchoringBounds        */
796         TEST_SETUP("abc$", "abcdef", 0);
797         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
798         uregex_useAnchoringBounds(re, FALSE, &status);
799         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
800 
801         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
802         uregex_useAnchoringBounds(re, TRUE, &status);
803         uregex_setRegion(re, 0, 3, &status);
804         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
805         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
806         TEST_TEARDOWN;
807 
808         /* Transparent Bounds      */
809         TEST_SETUP("abc(?=def)", "abcdef", 0);
810         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
811         uregex_useTransparentBounds(re, TRUE, &status);
812         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
813 
814         uregex_useTransparentBounds(re, FALSE, &status);
815         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
816         uregex_setRegion(re, 0, 3, &status);
817         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
818         uregex_useTransparentBounds(re, TRUE, &status);
819         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
820         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
821         TEST_TEARDOWN;
822 
823 
824     /*
825      *  replaceFirst()
826      */
827     {
828         UChar    text1[80];
829         UChar    text2[80];
830         UChar    replText[80];
831         UChar    buf[80];
832         int32_t  resultSz;
833         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
834         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
835         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
836 
837         status = U_ZERO_ERROR;
838         re = uregex_openC("x(.*?)x", 0, NULL, &status);
839         TEST_ASSERT_SUCCESS(status);
840 
841         /*  Normal case, with match */
842         uregex_setText(re, text1, -1, &status);
843         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
844         TEST_ASSERT_SUCCESS(status);
845         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
846         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
847 
848         /* No match.  Text should copy to output with no changes.  */
849         status = U_ZERO_ERROR;
850         uregex_setText(re, text2, -1, &status);
851         resultSz = uregex_replaceFirst(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
852         TEST_ASSERT_SUCCESS(status);
853         TEST_ASSERT_STRING("No match here.", buf, TRUE);
854         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
855 
856         /*  Match, output just fills buffer, no termination warning. */
857         status = U_ZERO_ERROR;
858         uregex_setText(re, text1, -1, &status);
859         memset(buf, -1, sizeof(buf));
860         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
861         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
862         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
863         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
864         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
865 
866         /* Do the replaceFirst again, without first resetting anything.
867          *  Should give the same results.
868          */
869         status = U_ZERO_ERROR;
870         memset(buf, -1, sizeof(buf));
871         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x."), &status);
872         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
873         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
874         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
875         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
876 
877         /* NULL buffer, zero buffer length */
878         status = U_ZERO_ERROR;
879         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
880         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
881         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
882 
883         /* Buffer too small by one */
884         status = U_ZERO_ERROR;
885         memset(buf, -1, sizeof(buf));
886         resultSz = uregex_replaceFirst(re, replText, -1, buf, (int32_t)strlen("Replace <aa> x1x x...x.")-1, &status);
887         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
888         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
889         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
890         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
891 
892         uregex_close(re);
893     }
894 
895 
896     /*
897      *  replaceAll()
898      */
899     {
900         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
901         UChar    text2[80];          /*  "No match Here"           */
902         UChar    replText[80];       /*  "<$1>"                    */
903         UChar    replText2[80];      /*  "<<$1>>"                  */
904         const char * pattern = "x(.*?)x";
905         const char * expectedResult = "Replace <aa> <1> <...>.";
906         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
907         UChar    buf[80];
908         int32_t  resultSize;
909         int32_t  expectedResultSize;
910         int32_t  expectedResultSize2;
911         int32_t  i;
912 
913         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
914         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
915         u_uastrncpy(replText, "<$1>", UPRV_LENGTHOF(replText));
916         u_uastrncpy(replText2, "<<$1>>", UPRV_LENGTHOF(replText2));
917         expectedResultSize = (int32_t)strlen(expectedResult);
918         expectedResultSize2 = (int32_t)strlen(expectedResult2);
919 
920         status = U_ZERO_ERROR;
921         re = uregex_openC(pattern, 0, NULL, &status);
922         TEST_ASSERT_SUCCESS(status);
923 
924         /*  Normal case, with match */
925         uregex_setText(re, text1, -1, &status);
926         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
927         TEST_ASSERT_SUCCESS(status);
928         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
929         TEST_ASSERT(resultSize == expectedResultSize);
930 
931         /* No match.  Text should copy to output with no changes.  */
932         status = U_ZERO_ERROR;
933         uregex_setText(re, text2, -1, &status);
934         resultSize = uregex_replaceAll(re, replText, -1, buf, UPRV_LENGTHOF(buf), &status);
935         TEST_ASSERT_SUCCESS(status);
936         TEST_ASSERT_STRING("No match here.", buf, TRUE);
937         TEST_ASSERT(resultSize == u_strlen(text2));
938 
939         /*  Match, output just fills buffer, no termination warning. */
940         status = U_ZERO_ERROR;
941         uregex_setText(re, text1, -1, &status);
942         memset(buf, -1, sizeof(buf));
943         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
944         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
945         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
946         TEST_ASSERT(resultSize == expectedResultSize);
947         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
948 
949         /* Do the replaceFirst again, without first resetting anything.
950          *  Should give the same results.
951          */
952         status = U_ZERO_ERROR;
953         memset(buf, -1, sizeof(buf));
954         resultSize = uregex_replaceAll(re, replText, -1, buf, (int32_t)strlen("Replace xaax x1x x...x."), &status);
955         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
956         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
957         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
958         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
959 
960         /* NULL buffer, zero buffer length */
961         status = U_ZERO_ERROR;
962         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
963         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
964         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
965 
966         /* Buffer too small.  Try every size, which will tickle edge cases
967          * in uregex_appendReplacement (used by replaceAll)   */
968         for (i=0; i<expectedResultSize; i++) {
969             char  expected[80];
970             status = U_ZERO_ERROR;
971             memset(buf, -1, sizeof(buf));
972             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
973             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
974             strcpy(expected, expectedResult);
975             expected[i] = 0;
976             TEST_ASSERT_STRING(expected, buf, FALSE);
977             TEST_ASSERT(resultSize == expectedResultSize);
978             TEST_ASSERT(buf[i] == (UChar)0xffff);
979         }
980 
981         /* Buffer too small.  Same as previous test, except this time the replacement
982          * text is longer than the match capture group, making the length of the complete
983          * replacement longer than the original string.
984          */
985         for (i=0; i<expectedResultSize2; i++) {
986             char  expected[80];
987             status = U_ZERO_ERROR;
988             memset(buf, -1, sizeof(buf));
989             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
990             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
991             strcpy(expected, expectedResult2);
992             expected[i] = 0;
993             TEST_ASSERT_STRING(expected, buf, FALSE);
994             TEST_ASSERT(resultSize == expectedResultSize2);
995             TEST_ASSERT(buf[i] == (UChar)0xffff);
996         }
997 
998 
999         uregex_close(re);
1000     }
1001 
1002 
1003     /*
1004      *  appendReplacement()
1005      */
1006     {
1007         UChar    text[100];
1008         UChar    repl[100];
1009         UChar    buf[100];
1010         UChar   *bufPtr;
1011         int32_t  bufCap;
1012 
1013 
1014         status = U_ZERO_ERROR;
1015         re = uregex_openC(".*", 0, 0, &status);
1016         TEST_ASSERT_SUCCESS(status);
1017 
1018         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1019         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1020         uregex_setText(re, text, -1, &status);
1021 
1022         /* match covers whole target string */
1023         uregex_find(re, 0, &status);
1024         TEST_ASSERT_SUCCESS(status);
1025         bufPtr = buf;
1026         bufCap = UPRV_LENGTHOF(buf);
1027         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1028         TEST_ASSERT_SUCCESS(status);
1029         TEST_ASSERT_STRING("some other", buf, TRUE);
1030 
1031         /* Match has \u \U escapes */
1032         uregex_find(re, 0, &status);
1033         TEST_ASSERT_SUCCESS(status);
1034         bufPtr = buf;
1035         bufCap = UPRV_LENGTHOF(buf);
1036         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1037         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1038         TEST_ASSERT_SUCCESS(status);
1039         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1040 
1041         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
1042         status = U_ZERO_ERROR;
1043         uregex_find(re, 0, &status);
1044         TEST_ASSERT_SUCCESS(status);
1045         bufPtr = buf;
1046         status = U_BUFFER_OVERFLOW_ERROR;
1047         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
1048         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1049 
1050         uregex_close(re);
1051     }
1052 
1053 
1054     /*
1055      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1056      */
1057 
1058     /*
1059      *  split()
1060      */
1061     {
1062         UChar    textToSplit[80];
1063         UChar    text2[80];
1064         UChar    buf[200];
1065         UChar    *fields[10];
1066         int32_t  numFields;
1067         int32_t  requiredCapacity;
1068         int32_t  spaceNeeded;
1069         int32_t  sz;
1070 
1071         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1072         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1073 
1074         status = U_ZERO_ERROR;
1075         re = uregex_openC(":", 0, NULL, &status);
1076 
1077 
1078         /*  Simple split */
1079 
1080         uregex_setText(re, textToSplit, -1, &status);
1081         TEST_ASSERT_SUCCESS(status);
1082 
1083         /* The TEST_ASSERT_SUCCESS call above should change too... */
1084         if (U_SUCCESS(status)) {
1085             memset(fields, -1, sizeof(fields));
1086             numFields =
1087                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1088             TEST_ASSERT_SUCCESS(status);
1089 
1090             /* The TEST_ASSERT_SUCCESS call above should change too... */
1091             if(U_SUCCESS(status)) {
1092                 TEST_ASSERT(numFields == 3);
1093                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1094                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1095                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1096                 TEST_ASSERT(fields[3] == NULL);
1097 
1098                 spaceNeeded = u_strlen(textToSplit) -
1099                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1100                             numFields;          /* Each field gets a NUL terminator */
1101 
1102                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1103             }
1104         }
1105 
1106         uregex_close(re);
1107 
1108 
1109         /*  Split with too few output strings available */
1110         status = U_ZERO_ERROR;
1111         re = uregex_openC(":", 0, NULL, &status);
1112         uregex_setText(re, textToSplit, -1, &status);
1113         TEST_ASSERT_SUCCESS(status);
1114 
1115         /* The TEST_ASSERT_SUCCESS call above should change too... */
1116         if(U_SUCCESS(status)) {
1117             memset(fields, -1, sizeof(fields));
1118             numFields =
1119                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1120             TEST_ASSERT_SUCCESS(status);
1121 
1122             /* The TEST_ASSERT_SUCCESS call above should change too... */
1123             if(U_SUCCESS(status)) {
1124                 TEST_ASSERT(numFields == 2);
1125                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1126                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1127                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1128 
1129                 spaceNeeded = u_strlen(textToSplit) -
1130                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1131                             numFields;          /* Each field gets a NUL terminator */
1132 
1133                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1134 
1135                 /* Split with a range of output buffer sizes.  */
1136                 spaceNeeded = u_strlen(textToSplit) -
1137                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1138                     numFields;          /* Each field gets a NUL terminator */
1139 
1140                 for (sz=0; sz < spaceNeeded+1; sz++) {
1141                     memset(fields, -1, sizeof(fields));
1142                     status = U_ZERO_ERROR;
1143                     numFields =
1144                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1145                     if (sz >= spaceNeeded) {
1146                         TEST_ASSERT_SUCCESS(status);
1147                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1148                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1149                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1150                     } else {
1151                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1152                     }
1153                     TEST_ASSERT(numFields == 3);
1154                     TEST_ASSERT(fields[3] == NULL);
1155                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1156                 }
1157             }
1158         }
1159 
1160         uregex_close(re);
1161     }
1162 
1163 
1164 
1165 
1166     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1167      *                   comes out as additional fields.  */
1168     {
1169         UChar    textToSplit[80];
1170         UChar    buf[200];
1171         UChar    *fields[10];
1172         int32_t  numFields;
1173         int32_t  requiredCapacity;
1174         int32_t  spaceNeeded;
1175         int32_t  sz;
1176 
1177         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
1178 
1179         status = U_ZERO_ERROR;
1180         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1181 
1182         uregex_setText(re, textToSplit, -1, &status);
1183         TEST_ASSERT_SUCCESS(status);
1184 
1185         /* The TEST_ASSERT_SUCCESS call above should change too... */
1186         if(U_SUCCESS(status)) {
1187             memset(fields, -1, sizeof(fields));
1188             numFields =
1189                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 10, &status);
1190             TEST_ASSERT_SUCCESS(status);
1191 
1192             /* The TEST_ASSERT_SUCCESS call above should change too... */
1193             if(U_SUCCESS(status)) {
1194                 TEST_ASSERT(numFields == 5);
1195                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1196                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1197                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1198                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1199                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1200                 TEST_ASSERT(fields[5] == NULL);
1201                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1202                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1203             }
1204         }
1205 
1206         /*  Split with too few output strings available (2) */
1207         status = U_ZERO_ERROR;
1208         memset(fields, -1, sizeof(fields));
1209         numFields =
1210             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 2, &status);
1211         TEST_ASSERT_SUCCESS(status);
1212 
1213         /* The TEST_ASSERT_SUCCESS call above should change too... */
1214         if(U_SUCCESS(status)) {
1215             TEST_ASSERT(numFields == 2);
1216             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1217             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1218             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1219 
1220             spaceNeeded = (int32_t)strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1221             TEST_ASSERT(spaceNeeded == requiredCapacity);
1222         }
1223 
1224         /*  Split with too few output strings available (3) */
1225         status = U_ZERO_ERROR;
1226         memset(fields, -1, sizeof(fields));
1227         numFields =
1228             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 3, &status);
1229         TEST_ASSERT_SUCCESS(status);
1230 
1231         /* The TEST_ASSERT_SUCCESS call above should change too... */
1232         if(U_SUCCESS(status)) {
1233             TEST_ASSERT(numFields == 3);
1234             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1235             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1236             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1237             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1238 
1239             spaceNeeded = (int32_t)strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1240             TEST_ASSERT(spaceNeeded == requiredCapacity);
1241         }
1242 
1243         /*  Split with just enough output strings available (5) */
1244         status = U_ZERO_ERROR;
1245         memset(fields, -1, sizeof(fields));
1246         numFields =
1247             uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 5, &status);
1248         TEST_ASSERT_SUCCESS(status);
1249 
1250         /* The TEST_ASSERT_SUCCESS call above should change too... */
1251         if(U_SUCCESS(status)) {
1252             TEST_ASSERT(numFields == 5);
1253             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1254             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1255             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1256             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1257             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1258             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1259 
1260             spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1261             TEST_ASSERT(spaceNeeded == requiredCapacity);
1262         }
1263 
1264         /* Split, end of text is a field delimiter.   */
1265         status = U_ZERO_ERROR;
1266         sz = (int32_t)strlen("first <tag-a> second<tag-b>");
1267         uregex_setText(re, textToSplit, sz, &status);
1268         TEST_ASSERT_SUCCESS(status);
1269 
1270         /* The TEST_ASSERT_SUCCESS call above should change too... */
1271         if(U_SUCCESS(status)) {
1272             memset(fields, -1, sizeof(fields));
1273             numFields =
1274                 uregex_split(re, buf, UPRV_LENGTHOF(buf), &requiredCapacity, fields, 9, &status);
1275             TEST_ASSERT_SUCCESS(status);
1276 
1277             /* The TEST_ASSERT_SUCCESS call above should change too... */
1278             if(U_SUCCESS(status)) {
1279                 TEST_ASSERT(numFields == 5);
1280                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1281                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1282                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1283                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1284                 TEST_ASSERT_STRING("",        fields[4], TRUE);
1285                 TEST_ASSERT(fields[5] == NULL);
1286                 TEST_ASSERT(fields[8] == NULL);
1287                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1288                 spaceNeeded = (int32_t)strlen("first .tag-a. second.tag-b..");  /* "." at NUL positions */
1289                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1290             }
1291         }
1292 
1293         uregex_close(re);
1294     }
1295 
1296     /*
1297      * set/getTimeLimit
1298      */
1299      TEST_SETUP("abc$", "abcdef", 0);
1300      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1301      uregex_setTimeLimit(re, 1000, &status);
1302      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1303      TEST_ASSERT_SUCCESS(status);
1304      uregex_setTimeLimit(re, -1, &status);
1305      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1306      status = U_ZERO_ERROR;
1307      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1308      TEST_TEARDOWN;
1309 
1310      /*
1311       * set/get Stack Limit
1312       */
1313      TEST_SETUP("abc$", "abcdef", 0);
1314      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1315      uregex_setStackLimit(re, 40000, &status);
1316      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1317      TEST_ASSERT_SUCCESS(status);
1318      uregex_setStackLimit(re, -1, &status);
1319      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1320      status = U_ZERO_ERROR;
1321      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1322      TEST_TEARDOWN;
1323 
1324 
1325      /*
1326       * Get/Set callback functions
1327       *     This test is copied from intltest regex/Callbacks
1328       *     The pattern and test data will run long enough to cause the callback
1329       *       to be invoked.  The nested '+' operators give exponential time
1330       *       behavior with increasing string length.
1331       */
1332      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0);
1333      callBackContext cbInfo = {4, 0, 0};
1334      const void     *pContext   = &cbInfo;
1335      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1336 
1337      /*  Getting the callback fn when it hasn't been set must return NULL  */
1338      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1339      TEST_ASSERT_SUCCESS(status);
1340      TEST_ASSERT(returnedFn == NULL);
1341      TEST_ASSERT(pContext == NULL);
1342 
1343      /* Set thecallback and do a match.                                   */
1344      /* The callback function should record that it has been called.      */
1345      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1346      TEST_ASSERT_SUCCESS(status);
1347      TEST_ASSERT(cbInfo.numCalls == 0);
1348      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1349      TEST_ASSERT_SUCCESS(status);
1350      TEST_ASSERT(cbInfo.numCalls > 0);
1351 
1352      /* Getting the callback should return the values that were set above.  */
1353      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1354      TEST_ASSERT(returnedFn == &TestCallbackFn);
1355      TEST_ASSERT(pContext == &cbInfo);
1356 
1357      TEST_TEARDOWN;
1358 }
1359 
1360 
1361 
TestBug4315(void)1362 static void TestBug4315(void) {
1363     UErrorCode      theICUError = U_ZERO_ERROR;
1364     URegularExpression *theRegEx;
1365     UChar           *textBuff;
1366     const char      *thePattern;
1367     UChar            theString[100];
1368     UChar           *destFields[24];
1369     int32_t         neededLength1;
1370     int32_t         neededLength2;
1371 
1372     int32_t         wordCount = 0;
1373     int32_t         destFieldsSize = 24;
1374 
1375     thePattern  = "ck ";
1376     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1377 
1378     /* open a regex */
1379     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1380     TEST_ASSERT_SUCCESS(theICUError);
1381 
1382     /* set the input string */
1383     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1384     TEST_ASSERT_SUCCESS(theICUError);
1385 
1386     /* split */
1387     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1388      *  error occurs! */
1389     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1390         destFieldsSize, &theICUError);
1391 
1392     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1393     TEST_ASSERT(wordCount==3);
1394 
1395     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1396     {
1397         theICUError = U_ZERO_ERROR;
1398         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1399         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1400             destFields, destFieldsSize, &theICUError);
1401         TEST_ASSERT(wordCount==3);
1402         TEST_ASSERT_SUCCESS(theICUError);
1403         TEST_ASSERT(neededLength1 == neededLength2);
1404         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1405         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1406         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1407         TEST_ASSERT(destFields[3] == NULL);
1408         free(textBuff);
1409     }
1410     uregex_close(theRegEx);
1411 }
1412 
1413 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1414 static void TestUTextAPI(void) {
1415     UErrorCode           status = U_ZERO_ERROR;
1416     URegularExpression  *re;
1417     UText                patternText = UTEXT_INITIALIZER;
1418     UChar                pat[200];
1419     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1420 
1421     /* Mimimalist open/close */
1422     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1423     re = uregex_openUText(&patternText, 0, 0, &status);
1424     if (U_FAILURE(status)) {
1425          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1426          utext_close(&patternText);
1427          return;
1428     }
1429     uregex_close(re);
1430 
1431     /* Open with all flag values set */
1432     status = U_ZERO_ERROR;
1433     re = uregex_openUText(&patternText,
1434         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1435         0, &status);
1436     TEST_ASSERT_SUCCESS(status);
1437     uregex_close(re);
1438 
1439     /* Open with an invalid flag */
1440     status = U_ZERO_ERROR;
1441     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1442     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1443     uregex_close(re);
1444 
1445     /* open with an invalid parameter */
1446     status = U_ZERO_ERROR;
1447     re = uregex_openUText(NULL,
1448         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1449     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1450 
1451     /*
1452      *  clone
1453      */
1454     {
1455         URegularExpression *clone1;
1456         URegularExpression *clone2;
1457         URegularExpression *clone3;
1458         UChar  testString1[30];
1459         UChar  testString2[30];
1460         UBool  result;
1461 
1462 
1463         status = U_ZERO_ERROR;
1464         re = uregex_openUText(&patternText, 0, 0, &status);
1465         TEST_ASSERT_SUCCESS(status);
1466         clone1 = uregex_clone(re, &status);
1467         TEST_ASSERT_SUCCESS(status);
1468         TEST_ASSERT(clone1 != NULL);
1469 
1470         status = U_ZERO_ERROR;
1471         clone2 = uregex_clone(re, &status);
1472         TEST_ASSERT_SUCCESS(status);
1473         TEST_ASSERT(clone2 != NULL);
1474         uregex_close(re);
1475 
1476         status = U_ZERO_ERROR;
1477         clone3 = uregex_clone(clone2, &status);
1478         TEST_ASSERT_SUCCESS(status);
1479         TEST_ASSERT(clone3 != NULL);
1480 
1481         u_uastrncpy(testString1, "abcccd", UPRV_LENGTHOF(pat));
1482         u_uastrncpy(testString2, "xxxabcccd", UPRV_LENGTHOF(pat));
1483 
1484         status = U_ZERO_ERROR;
1485         uregex_setText(clone1, testString1, -1, &status);
1486         TEST_ASSERT_SUCCESS(status);
1487         result = uregex_lookingAt(clone1, 0, &status);
1488         TEST_ASSERT_SUCCESS(status);
1489         TEST_ASSERT(result==TRUE);
1490 
1491         status = U_ZERO_ERROR;
1492         uregex_setText(clone2, testString2, -1, &status);
1493         TEST_ASSERT_SUCCESS(status);
1494         result = uregex_lookingAt(clone2, 0, &status);
1495         TEST_ASSERT_SUCCESS(status);
1496         TEST_ASSERT(result==FALSE);
1497         result = uregex_find(clone2, 0, &status);
1498         TEST_ASSERT_SUCCESS(status);
1499         TEST_ASSERT(result==TRUE);
1500 
1501         uregex_close(clone1);
1502         uregex_close(clone2);
1503         uregex_close(clone3);
1504 
1505     }
1506 
1507     /*
1508      *  pattern() and patternText()
1509      */
1510     {
1511         const UChar  *resultPat;
1512         int32_t       resultLen;
1513         UText        *resultText;
1514         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1515         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1516         u_uastrncpy(pat, "hello", UPRV_LENGTHOF(pat)); /* for comparison */
1517         status = U_ZERO_ERROR;
1518 
1519         utext_openUTF8(&patternText, str_hello, -1, &status);
1520         re = uregex_open(pat, -1, 0, NULL, &status);
1521         resultPat = uregex_pattern(re, &resultLen, &status);
1522         TEST_ASSERT_SUCCESS(status);
1523 
1524         /* The TEST_ASSERT_SUCCESS above should change too... */
1525         if (U_SUCCESS(status)) {
1526             TEST_ASSERT(resultLen == -1);
1527             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1528         }
1529 
1530         resultText = uregex_patternUText(re, &status);
1531         TEST_ASSERT_SUCCESS(status);
1532         TEST_ASSERT_UTEXT(str_hello, resultText);
1533 
1534         uregex_close(re);
1535 
1536         status = U_ZERO_ERROR;
1537         re = uregex_open(pat, 3, 0, NULL, &status);
1538         resultPat = uregex_pattern(re, &resultLen, &status);
1539         TEST_ASSERT_SUCCESS(status);
1540 
1541         /* The TEST_ASSERT_SUCCESS above should change too... */
1542         if (U_SUCCESS(status)) {
1543             TEST_ASSERT(resultLen == 3);
1544             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1545             TEST_ASSERT(u_strlen(resultPat) == 3);
1546         }
1547 
1548         resultText = uregex_patternUText(re, &status);
1549         TEST_ASSERT_SUCCESS(status);
1550         TEST_ASSERT_UTEXT(str_hel, resultText);
1551 
1552         uregex_close(re);
1553     }
1554 
1555     /*
1556      *  setUText() and lookingAt()
1557      */
1558     {
1559         UText  text1 = UTEXT_INITIALIZER;
1560         UText  text2 = UTEXT_INITIALIZER;
1561         UBool  result;
1562         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1563         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1564         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1565         status = U_ZERO_ERROR;
1566         utext_openUTF8(&text1, str_abcccd, -1, &status);
1567         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1568 
1569         utext_openUTF8(&patternText, str_abcd, -1, &status);
1570         re = uregex_openUText(&patternText, 0, NULL, &status);
1571         TEST_ASSERT_SUCCESS(status);
1572 
1573         /* Operation before doing a setText should fail... */
1574         status = U_ZERO_ERROR;
1575         uregex_lookingAt(re, 0, &status);
1576         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1577 
1578         status = U_ZERO_ERROR;
1579         uregex_setUText(re, &text1, &status);
1580         result = uregex_lookingAt(re, 0, &status);
1581         TEST_ASSERT(result == TRUE);
1582         TEST_ASSERT_SUCCESS(status);
1583 
1584         status = U_ZERO_ERROR;
1585         uregex_setUText(re, &text2, &status);
1586         result = uregex_lookingAt(re, 0, &status);
1587         TEST_ASSERT(result == FALSE);
1588         TEST_ASSERT_SUCCESS(status);
1589 
1590         status = U_ZERO_ERROR;
1591         uregex_setUText(re, &text1, &status);
1592         result = uregex_lookingAt(re, 0, &status);
1593         TEST_ASSERT(result == TRUE);
1594         TEST_ASSERT_SUCCESS(status);
1595 
1596         uregex_close(re);
1597         utext_close(&text1);
1598         utext_close(&text2);
1599     }
1600 
1601 
1602     /*
1603      *  getText() and getUText()
1604      */
1605     {
1606         UText  text1 = UTEXT_INITIALIZER;
1607         UText  text2 = UTEXT_INITIALIZER;
1608         UChar  text2Chars[20];
1609         UText  *resultText;
1610         const UChar   *result;
1611         int32_t  textLength;
1612         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1613         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1614         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1615 
1616 
1617         status = U_ZERO_ERROR;
1618         utext_openUTF8(&text1, str_abcccd, -1, &status);
1619         u_uastrncpy(text2Chars, str_abcccxd, UPRV_LENGTHOF(text2Chars));
1620         utext_openUChars(&text2, text2Chars, -1, &status);
1621 
1622         utext_openUTF8(&patternText, str_abcd, -1, &status);
1623         re = uregex_openUText(&patternText, 0, NULL, &status);
1624 
1625         /* First set a UText */
1626         uregex_setUText(re, &text1, &status);
1627         resultText = uregex_getUText(re, NULL, &status);
1628         TEST_ASSERT_SUCCESS(status);
1629         TEST_ASSERT(resultText != &text1);
1630         utext_setNativeIndex(resultText, 0);
1631         utext_setNativeIndex(&text1, 0);
1632         TEST_ASSERT(testUTextEqual(resultText, &text1));
1633         utext_close(resultText);
1634 
1635         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1636         (void)result;    /* Suppress set but not used warning. */
1637         TEST_ASSERT(textLength == -1 || textLength == 6);
1638         resultText = uregex_getUText(re, NULL, &status);
1639         TEST_ASSERT_SUCCESS(status);
1640         TEST_ASSERT(resultText != &text1);
1641         utext_setNativeIndex(resultText, 0);
1642         utext_setNativeIndex(&text1, 0);
1643         TEST_ASSERT(testUTextEqual(resultText, &text1));
1644         utext_close(resultText);
1645 
1646         /* Then set a UChar * */
1647         uregex_setText(re, text2Chars, 7, &status);
1648         resultText = uregex_getUText(re, NULL, &status);
1649         TEST_ASSERT_SUCCESS(status);
1650         utext_setNativeIndex(resultText, 0);
1651         utext_setNativeIndex(&text2, 0);
1652         TEST_ASSERT(testUTextEqual(resultText, &text2));
1653         utext_close(resultText);
1654         result = uregex_getText(re, &textLength, &status);
1655         TEST_ASSERT(textLength == 7);
1656 
1657         uregex_close(re);
1658         utext_close(&text1);
1659         utext_close(&text2);
1660     }
1661 
1662     /*
1663      *  matches()
1664      */
1665     {
1666         UText   text1 = UTEXT_INITIALIZER;
1667         UBool   result;
1668         UText   nullText = UTEXT_INITIALIZER;
1669         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1670         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1671 
1672         status = U_ZERO_ERROR;
1673         utext_openUTF8(&text1, str_abcccde, -1, &status);
1674         utext_openUTF8(&patternText, str_abcd, -1, &status);
1675         re = uregex_openUText(&patternText, 0, NULL, &status);
1676 
1677         uregex_setUText(re, &text1, &status);
1678         result = uregex_matches(re, 0, &status);
1679         TEST_ASSERT(result == FALSE);
1680         TEST_ASSERT_SUCCESS(status);
1681         uregex_close(re);
1682 
1683         status = U_ZERO_ERROR;
1684         re = uregex_openC(".?", 0, NULL, &status);
1685         uregex_setUText(re, &text1, &status);
1686         result = uregex_matches(re, 7, &status);
1687         TEST_ASSERT(result == TRUE);
1688         TEST_ASSERT_SUCCESS(status);
1689 
1690         status = U_ZERO_ERROR;
1691         utext_openUTF8(&nullText, "", -1, &status);
1692         uregex_setUText(re, &nullText, &status);
1693         TEST_ASSERT_SUCCESS(status);
1694         result = uregex_matches(re, 0, &status);
1695         TEST_ASSERT(result == TRUE);
1696         TEST_ASSERT_SUCCESS(status);
1697 
1698         uregex_close(re);
1699         utext_close(&text1);
1700         utext_close(&nullText);
1701     }
1702 
1703 
1704     /*
1705      *  lookingAt()    Used in setText test.
1706      */
1707 
1708 
1709     /*
1710      *  find(), findNext, start, end, reset
1711      */
1712     {
1713         UChar    text1[50];
1714         UBool    result;
1715         u_uastrncpy(text1, "012rx5rx890rxrx...",  UPRV_LENGTHOF(text1));
1716         status = U_ZERO_ERROR;
1717         re = uregex_openC("rx", 0, NULL, &status);
1718 
1719         uregex_setText(re, text1, -1, &status);
1720         result = uregex_find(re, 0, &status);
1721         TEST_ASSERT(result == TRUE);
1722         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1723         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1724         TEST_ASSERT_SUCCESS(status);
1725 
1726         result = uregex_find(re, 9, &status);
1727         TEST_ASSERT(result == TRUE);
1728         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1729         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1730         TEST_ASSERT_SUCCESS(status);
1731 
1732         result = uregex_find(re, 14, &status);
1733         TEST_ASSERT(result == FALSE);
1734         TEST_ASSERT_SUCCESS(status);
1735 
1736         status = U_ZERO_ERROR;
1737         uregex_reset(re, 0, &status);
1738 
1739         result = uregex_findNext(re, &status);
1740         TEST_ASSERT(result == TRUE);
1741         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1742         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1743         TEST_ASSERT_SUCCESS(status);
1744 
1745         result = uregex_findNext(re, &status);
1746         TEST_ASSERT(result == TRUE);
1747         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1748         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1749         TEST_ASSERT_SUCCESS(status);
1750 
1751         status = U_ZERO_ERROR;
1752         uregex_reset(re, 12, &status);
1753 
1754         result = uregex_findNext(re, &status);
1755         TEST_ASSERT(result == TRUE);
1756         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1757         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1758         TEST_ASSERT_SUCCESS(status);
1759 
1760         result = uregex_findNext(re, &status);
1761         TEST_ASSERT(result == FALSE);
1762         TEST_ASSERT_SUCCESS(status);
1763 
1764         uregex_close(re);
1765     }
1766 
1767     /*
1768      *  groupUText()
1769      */
1770     {
1771         UChar    text1[80];
1772         UText   *actual;
1773         UBool    result;
1774         int64_t  groupLen = 0;
1775         UChar    groupBuf[20];
1776 
1777         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  UPRV_LENGTHOF(text1));
1778 
1779         status = U_ZERO_ERROR;
1780         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1781         TEST_ASSERT_SUCCESS(status);
1782 
1783         uregex_setText(re, text1, -1, &status);
1784         result = uregex_find(re, 0, &status);
1785         TEST_ASSERT(result==TRUE);
1786 
1787         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1788         status = U_ZERO_ERROR;
1789         actual = uregex_groupUText(re, 0, NULL, &groupLen, &status);
1790         TEST_ASSERT_SUCCESS(status);
1791 
1792         TEST_ASSERT(utext_getNativeIndex(actual) == 6);  /* index of "abc " within "noise abc ..." */
1793         TEST_ASSERT(groupLen == 16);   /* length of "abc interior def"  */
1794         utext_extract(actual, 6 /*start index */, 6+16 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1795 
1796         TEST_ASSERT_STRING("abc interior def", groupBuf, TRUE);
1797         utext_close(actual);
1798 
1799         /*  Capture group #1.  Should succeed. */
1800         status = U_ZERO_ERROR;
1801 
1802         actual = uregex_groupUText(re, 1, NULL, &groupLen, &status);
1803         TEST_ASSERT_SUCCESS(status);
1804         TEST_ASSERT(9 == utext_getNativeIndex(actual));    /* index of " interior " within "noise abc interior def ... " */
1805                                                            /*    (within the string text1)           */
1806         TEST_ASSERT(10 == groupLen);                       /* length of " interior " */
1807         utext_extract(actual, 9 /*start index*/, 9+10 /*limit index*/, groupBuf, sizeof(groupBuf), &status);
1808         TEST_ASSERT_STRING(" interior ", groupBuf, TRUE);
1809 
1810         utext_close(actual);
1811 
1812         /*  Capture group out of range.  Error. */
1813         status = U_ZERO_ERROR;
1814         actual = uregex_groupUText(re, 2, NULL, &groupLen, &status);
1815         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1816         utext_close(actual);
1817 
1818         uregex_close(re);
1819     }
1820 
1821     /*
1822      *  replaceFirst()
1823      */
1824     {
1825         UChar    text1[80];
1826         UChar    text2[80];
1827         UText    replText = UTEXT_INITIALIZER;
1828         UText   *result;
1829         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1830         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1831         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31,
1832                0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x5c, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042\$\a */
1833         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1834         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1835         status = U_ZERO_ERROR;
1836         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1837         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1838         utext_openUTF8(&replText, str_1x, -1, &status);
1839 
1840         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1841         TEST_ASSERT_SUCCESS(status);
1842 
1843         /*  Normal case, with match */
1844         uregex_setText(re, text1, -1, &status);
1845         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1846         TEST_ASSERT_SUCCESS(status);
1847         TEST_ASSERT_UTEXT(str_Replxxx, result);
1848         utext_close(result);
1849 
1850         /* No match.  Text should copy to output with no changes.  */
1851         uregex_setText(re, text2, -1, &status);
1852         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1853         TEST_ASSERT_SUCCESS(status);
1854         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1855         utext_close(result);
1856 
1857         /* Unicode escapes */
1858         uregex_setText(re, text1, -1, &status);
1859         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1860         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1861         TEST_ASSERT_SUCCESS(status);
1862         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1863         utext_close(result);
1864 
1865         uregex_close(re);
1866         utext_close(&replText);
1867     }
1868 
1869 
1870     /*
1871      *  replaceAll()
1872      */
1873     {
1874         UChar    text1[80];
1875         UChar    text2[80];
1876         UText    replText = UTEXT_INITIALIZER;
1877         UText   *result;
1878         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1879         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1880         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1881         status = U_ZERO_ERROR;
1882         u_uastrncpy(text1, "Replace xaax x1x x...x.",  UPRV_LENGTHOF(text1));
1883         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1884         utext_openUTF8(&replText, str_1, -1, &status);
1885 
1886         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1887         TEST_ASSERT_SUCCESS(status);
1888 
1889         /*  Normal case, with match */
1890         uregex_setText(re, text1, -1, &status);
1891         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1892         TEST_ASSERT_SUCCESS(status);
1893         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1894         utext_close(result);
1895 
1896         /* No match.  Text should copy to output with no changes.  */
1897         uregex_setText(re, text2, -1, &status);
1898         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1899         TEST_ASSERT_SUCCESS(status);
1900         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1901         utext_close(result);
1902 
1903         uregex_close(re);
1904         utext_close(&replText);
1905     }
1906 
1907 
1908     /*
1909      *  appendReplacement()
1910      */
1911     {
1912         UChar    text[100];
1913         UChar    repl[100];
1914         UChar    buf[100];
1915         UChar   *bufPtr;
1916         int32_t  bufCap;
1917 
1918         status = U_ZERO_ERROR;
1919         re = uregex_openC(".*", 0, 0, &status);
1920         TEST_ASSERT_SUCCESS(status);
1921 
1922         u_uastrncpy(text, "whatever",  UPRV_LENGTHOF(text));
1923         u_uastrncpy(repl, "some other", UPRV_LENGTHOF(repl));
1924         uregex_setText(re, text, -1, &status);
1925 
1926         /* match covers whole target string */
1927         uregex_find(re, 0, &status);
1928         TEST_ASSERT_SUCCESS(status);
1929         bufPtr = buf;
1930         bufCap = UPRV_LENGTHOF(buf);
1931         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1932         TEST_ASSERT_SUCCESS(status);
1933         TEST_ASSERT_STRING("some other", buf, TRUE);
1934 
1935         /* Match has \u \U escapes */
1936         uregex_find(re, 0, &status);
1937         TEST_ASSERT_SUCCESS(status);
1938         bufPtr = buf;
1939         bufCap = UPRV_LENGTHOF(buf);
1940         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ \\$ \\abc", UPRV_LENGTHOF(repl));
1941         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1942         TEST_ASSERT_SUCCESS(status);
1943         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1944 
1945         uregex_close(re);
1946     }
1947 
1948 
1949     /*
1950      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1951      */
1952 
1953     /*
1954      *  splitUText()
1955      */
1956     {
1957         UChar    textToSplit[80];
1958         UChar    text2[80];
1959         UText    *fields[10];
1960         int32_t  numFields;
1961         int32_t i;
1962 
1963         u_uastrncpy(textToSplit, "first : second:  third",  UPRV_LENGTHOF(textToSplit));
1964         u_uastrncpy(text2, "No match here.",  UPRV_LENGTHOF(text2));
1965 
1966         status = U_ZERO_ERROR;
1967         re = uregex_openC(":", 0, NULL, &status);
1968 
1969 
1970         /*  Simple split */
1971 
1972         uregex_setText(re, textToSplit, -1, &status);
1973         TEST_ASSERT_SUCCESS(status);
1974 
1975         /* The TEST_ASSERT_SUCCESS call above should change too... */
1976         if (U_SUCCESS(status)) {
1977             memset(fields, 0, sizeof(fields));
1978             numFields = uregex_splitUText(re, fields, 10, &status);
1979             TEST_ASSERT_SUCCESS(status);
1980 
1981             /* The TEST_ASSERT_SUCCESS call above should change too... */
1982             if(U_SUCCESS(status)) {
1983               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1984               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1985               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1986                 TEST_ASSERT(numFields == 3);
1987                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1988                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1989                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1990                 TEST_ASSERT(fields[3] == NULL);
1991             }
1992             for(i = 0; i < numFields; i++) {
1993                 utext_close(fields[i]);
1994             }
1995         }
1996 
1997         uregex_close(re);
1998 
1999 
2000         /*  Split with too few output strings available */
2001         status = U_ZERO_ERROR;
2002         re = uregex_openC(":", 0, NULL, &status);
2003         uregex_setText(re, textToSplit, -1, &status);
2004         TEST_ASSERT_SUCCESS(status);
2005 
2006         /* The TEST_ASSERT_SUCCESS call above should change too... */
2007         if(U_SUCCESS(status)) {
2008             fields[0] = NULL;
2009             fields[1] = NULL;
2010             fields[2] = &patternText;
2011             numFields = uregex_splitUText(re, fields, 2, &status);
2012             TEST_ASSERT_SUCCESS(status);
2013 
2014             /* The TEST_ASSERT_SUCCESS call above should change too... */
2015             if(U_SUCCESS(status)) {
2016                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2017                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
2018                 TEST_ASSERT(numFields == 2);
2019                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2020                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
2021                 TEST_ASSERT(fields[2] == &patternText);
2022             }
2023             for(i = 0; i < numFields; i++) {
2024                 utext_close(fields[i]);
2025             }
2026         }
2027 
2028         uregex_close(re);
2029     }
2030 
2031     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
2032      *                   comes out as additional fields.  */
2033     {
2034         UChar    textToSplit[80];
2035         UText    *fields[10];
2036         int32_t  numFields;
2037         int32_t i;
2038 
2039         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  UPRV_LENGTHOF(textToSplit));
2040 
2041         status = U_ZERO_ERROR;
2042         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2043 
2044         uregex_setText(re, textToSplit, -1, &status);
2045         TEST_ASSERT_SUCCESS(status);
2046 
2047         /* The TEST_ASSERT_SUCCESS call above should change too... */
2048         if(U_SUCCESS(status)) {
2049             memset(fields, 0, sizeof(fields));
2050             numFields = uregex_splitUText(re, fields, 10, &status);
2051             TEST_ASSERT_SUCCESS(status);
2052 
2053             /* The TEST_ASSERT_SUCCESS call above should change too... */
2054             if(U_SUCCESS(status)) {
2055                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2056                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2057                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2058                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2059                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2060 
2061                 TEST_ASSERT(numFields == 5);
2062                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2063                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2064                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2065                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2066                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2067                 TEST_ASSERT(fields[5] == NULL);
2068             }
2069             for(i = 0; i < numFields; i++) {
2070                 utext_close(fields[i]);
2071             }
2072         }
2073 
2074         /*  Split with too few output strings available (2) */
2075         status = U_ZERO_ERROR;
2076         fields[0] = NULL;
2077         fields[1] = NULL;
2078         fields[2] = &patternText;
2079         numFields = uregex_splitUText(re, fields, 2, &status);
2080         TEST_ASSERT_SUCCESS(status);
2081 
2082         /* The TEST_ASSERT_SUCCESS call above should change too... */
2083         if(U_SUCCESS(status)) {
2084             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2085             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2086             TEST_ASSERT(numFields == 2);
2087             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2088             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2089             TEST_ASSERT(fields[2] == &patternText);
2090         }
2091         for(i = 0; i < numFields; i++) {
2092             utext_close(fields[i]);
2093         }
2094 
2095 
2096         /*  Split with too few output strings available (3) */
2097         status = U_ZERO_ERROR;
2098         fields[0] = NULL;
2099         fields[1] = NULL;
2100         fields[2] = NULL;
2101         fields[3] = &patternText;
2102         numFields = uregex_splitUText(re, fields, 3, &status);
2103         TEST_ASSERT_SUCCESS(status);
2104 
2105         /* The TEST_ASSERT_SUCCESS call above should change too... */
2106         if(U_SUCCESS(status)) {
2107             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2108             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2109             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2110             TEST_ASSERT(numFields == 3);
2111             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2112             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2113             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2114             TEST_ASSERT(fields[3] == &patternText);
2115         }
2116         for(i = 0; i < numFields; i++) {
2117             utext_close(fields[i]);
2118         }
2119 
2120         /*  Split with just enough output strings available (5) */
2121         status = U_ZERO_ERROR;
2122         fields[0] = NULL;
2123         fields[1] = NULL;
2124         fields[2] = NULL;
2125         fields[3] = NULL;
2126         fields[4] = NULL;
2127         fields[5] = &patternText;
2128         numFields = uregex_splitUText(re, fields, 5, &status);
2129         TEST_ASSERT_SUCCESS(status);
2130 
2131         /* The TEST_ASSERT_SUCCESS call above should change too... */
2132         if(U_SUCCESS(status)) {
2133             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2134             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2135             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2136             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2137             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2138 
2139             TEST_ASSERT(numFields == 5);
2140             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2141             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2142             TEST_ASSERT_UTEXT(str_second, fields[2]);
2143             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2144             TEST_ASSERT_UTEXT(str_third, fields[4]);
2145             TEST_ASSERT(fields[5] == &patternText);
2146         }
2147         for(i = 0; i < numFields; i++) {
2148             utext_close(fields[i]);
2149         }
2150 
2151         /* Split, end of text is a field delimiter.   */
2152         status = U_ZERO_ERROR;
2153         uregex_setText(re, textToSplit, (int32_t)strlen("first <tag-a> second<tag-b>"), &status);
2154         TEST_ASSERT_SUCCESS(status);
2155 
2156         /* The TEST_ASSERT_SUCCESS call above should change too... */
2157         if(U_SUCCESS(status)) {
2158             memset(fields, 0, sizeof(fields));
2159             fields[9] = &patternText;
2160             numFields = uregex_splitUText(re, fields, 9, &status);
2161             TEST_ASSERT_SUCCESS(status);
2162 
2163             /* The TEST_ASSERT_SUCCESS call above should change too... */
2164             if(U_SUCCESS(status)) {
2165                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2166                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2167                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2168                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2169                 const char str_empty[] = { 0x00 };
2170 
2171                 TEST_ASSERT(numFields == 5);
2172                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2173                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2174                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2175                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2176                 TEST_ASSERT_UTEXT(str_empty,  fields[4]);
2177                 TEST_ASSERT(fields[5] == NULL);
2178                 TEST_ASSERT(fields[8] == NULL);
2179                 TEST_ASSERT(fields[9] == &patternText);
2180             }
2181             for(i = 0; i < numFields; i++) {
2182                 utext_close(fields[i]);
2183             }
2184         }
2185 
2186         uregex_close(re);
2187     }
2188     utext_close(&patternText);
2189 }
2190 
2191 
TestRefreshInput(void)2192 static void TestRefreshInput(void) {
2193     /*
2194      *  RefreshInput changes out the input of a URegularExpression without
2195      *    changing anything else in the match state.  Used with Java JNI,
2196      *    when Java moves the underlying string storage.   This test
2197      *    runs a find() loop, moving the text after the first match.
2198      *    The right number of matches should still be found.
2199      */
2200     UChar testStr[]  = {0x41, 0x20, 0x42, 0x20, 0x43, 0x0};  /* = "A B C"  */
2201     UChar movedStr[] = {   0,    0,    0,    0,    0,   0};
2202     UErrorCode status = U_ZERO_ERROR;
2203     URegularExpression *re;
2204     UText ut1 = UTEXT_INITIALIZER;
2205     UText ut2 = UTEXT_INITIALIZER;
2206 
2207     re = uregex_openC("[ABC]", 0, 0, &status);
2208     TEST_ASSERT_SUCCESS(status);
2209 
2210     utext_openUChars(&ut1, testStr, -1, &status);
2211     TEST_ASSERT_SUCCESS(status);
2212     uregex_setUText(re, &ut1, &status);
2213     TEST_ASSERT_SUCCESS(status);
2214 
2215     /* Find the first match "A" in the original string */
2216     TEST_ASSERT(uregex_findNext(re, &status));
2217     TEST_ASSERT(uregex_start(re, 0, &status) == 0);
2218 
2219     /* Move the string, kill the original string.  */
2220     u_strcpy(movedStr, testStr);
2221     u_memset(testStr, 0, u_strlen(testStr));
2222     utext_openUChars(&ut2, movedStr, -1, &status);
2223     TEST_ASSERT_SUCCESS(status);
2224     uregex_refreshUText(re, &ut2, &status);
2225     TEST_ASSERT_SUCCESS(status);
2226 
2227     /* Find the following two matches, now working in the moved string. */
2228     TEST_ASSERT(uregex_findNext(re, &status));
2229     TEST_ASSERT(uregex_start(re, 0, &status) == 2);
2230     TEST_ASSERT(uregex_findNext(re, &status));
2231     TEST_ASSERT(uregex_start(re, 0, &status) == 4);
2232     TEST_ASSERT(FALSE == uregex_findNext(re, &status));
2233 
2234     uregex_close(re);
2235 }
2236 
2237 
TestBug8421(void)2238 static void TestBug8421(void) {
2239     /* Bug 8421:  setTimeLimit on a regular expresssion before setting text to be matched
2240      *             was failing.
2241      */
2242     URegularExpression *re;
2243     UErrorCode status = U_ZERO_ERROR;
2244     int32_t  limit = -1;
2245 
2246     re = uregex_openC("abc", 0, 0, &status);
2247     TEST_ASSERT_SUCCESS(status);
2248 
2249     limit = uregex_getTimeLimit(re, &status);
2250     TEST_ASSERT_SUCCESS(status);
2251     TEST_ASSERT(limit == 0);
2252 
2253     uregex_setTimeLimit(re, 100, &status);
2254     TEST_ASSERT_SUCCESS(status);
2255     limit = uregex_getTimeLimit(re, &status);
2256     TEST_ASSERT_SUCCESS(status);
2257     TEST_ASSERT(limit == 100);
2258 
2259     uregex_close(re);
2260 }
2261 
FindCallback(const void * context,int64_t matchIndex)2262 static UBool U_CALLCONV FindCallback(const void* context , int64_t matchIndex) {
2263     // suppress compiler warnings about unused variables
2264     (void)context;
2265     (void)matchIndex;
2266     return FALSE;
2267 }
2268 
MatchCallback(const void * context,int32_t steps)2269 static UBool U_CALLCONV MatchCallback(const void *context, int32_t steps) {
2270     // suppress compiler warnings about unused variables
2271     (void)context;
2272     (void)steps;
2273     return FALSE;
2274 }
2275 
TestBug10815()2276 static void TestBug10815() {
2277   /* Bug 10815:   uregex_findNext() does not set U_REGEX_STOPPED_BY_CALLER
2278    *              when the callback function specified by uregex_setMatchCallback() returns FALSE
2279    */
2280     URegularExpression *re;
2281     UErrorCode status = U_ZERO_ERROR;
2282     UChar    text[100];
2283 
2284 
2285     // findNext() with a find progress callback function.
2286 
2287     re = uregex_openC(".z", 0, 0, &status);
2288     TEST_ASSERT_SUCCESS(status);
2289 
2290     u_uastrncpy(text, "Hello, World.",  UPRV_LENGTHOF(text));
2291     uregex_setText(re, text, -1, &status);
2292     TEST_ASSERT_SUCCESS(status);
2293 
2294     uregex_setFindProgressCallback(re, FindCallback, NULL, &status);
2295     TEST_ASSERT_SUCCESS(status);
2296 
2297     uregex_findNext(re, &status);
2298     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2299 
2300     uregex_close(re);
2301 
2302     // findNext() with a match progress callback function.
2303 
2304     status = U_ZERO_ERROR;
2305     re = uregex_openC("((xxx)*)*y", 0, 0, &status);
2306     TEST_ASSERT_SUCCESS(status);
2307 
2308     // Pattern + this text gives an exponential time match. Without the callback to stop the match,
2309     // it will appear to be stuck in a (near) infinite loop.
2310     u_uastrncpy(text, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",  UPRV_LENGTHOF(text));
2311     uregex_setText(re, text, -1, &status);
2312     TEST_ASSERT_SUCCESS(status);
2313 
2314     uregex_setMatchCallback(re, MatchCallback, NULL, &status);
2315     TEST_ASSERT_SUCCESS(status);
2316 
2317     uregex_findNext(re, &status);
2318     TEST_ASSERT(status == U_REGEX_STOPPED_BY_CALLER);
2319 
2320     uregex_close(re);
2321 }
2322 
2323 
2324 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2325