• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * COPYRIGHT:
3  * Copyright (c) 2004-2010, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  ********************************************************************/
6 /********************************************************************************
7 *
8 * File reapits.c
9 *
10 *********************************************************************************/
11 /*C API TEST FOR Regular Expressions */
12 /**
13 *   This is an API test for ICU regular expressions in C.  It doesn't test very many cases, and doesn't
14 *   try to test the full functionality.  It just calls each function and verifies that it
15 *   works on a basic level.
16 *
17 *   More complete testing of regular expression functionality is done with the C++ tests.
18 **/
19 
20 #include "unicode/utypes.h"
21 
22 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
23 
24 #include <stdlib.h>
25 #include <string.h>
26 #include "unicode/uloc.h"
27 #include "unicode/uregex.h"
28 #include "unicode/ustring.h"
29 #include "unicode/utext.h"
30 #include "cintltst.h"
31 
32 #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \
33 log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}}
34 
35 #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \
36 log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}}
37 
38 /*
39  *   TEST_SETUP and TEST_TEARDOWN
40  *         macros to handle the boilerplate around setting up regex test cases.
41  *         parameteres to setup:
42  *              pattern:     The regex pattern, a (char *) null terminated C string.
43  *              testString:  The string data, also a (char *) C string.
44  *              flags:       Regex flags to set when compiling the pattern
45  *
46  *         Put arbitrary test code between SETUP and TEARDOWN.
47  *         're" is the compiled, ready-to-go  regular expression.
48  */
49 #define TEST_SETUP(pattern, testString, flags) {  \
50     UChar   *srcString = NULL;  \
51     status = U_ZERO_ERROR; \
52     re = uregex_openC(pattern, flags, NULL, &status);  \
53     TEST_ASSERT_SUCCESS(status);   \
54     srcString = (UChar *)malloc((strlen(testString)+2)*sizeof(UChar)); \
55     u_uastrncpy(srcString, testString,  strlen(testString)+1); \
56     uregex_setText(re, srcString, -1, &status); \
57     TEST_ASSERT_SUCCESS(status);  \
58     if (U_SUCCESS(status)) {
59 
60 #define TEST_TEARDOWN  \
61     }  \
62     TEST_ASSERT_SUCCESS(status);  \
63     uregex_close(re);  \
64     free(srcString);   \
65     }
66 
67 
68 /**
69  * @param expected utf-8 array of bytes to be expected
70  */
test_assert_string(const char * expected,const UChar * actual,UBool nulTerm,const char * file,int line)71 static void test_assert_string(const char *expected, const UChar *actual, UBool nulTerm, const char *file, int line) {
72      char     buf_inside_macro[120];
73      int32_t  len = (int32_t)strlen(expected);
74      UBool    success;
75      if (nulTerm) {
76          u_austrncpy(buf_inside_macro, (actual), len+1);
77          buf_inside_macro[len+2] = 0;
78          success = (strcmp((expected), buf_inside_macro) == 0);
79      } else {
80          u_austrncpy(buf_inside_macro, (actual), len);
81          buf_inside_macro[len+1] = 0;
82          success = (strncmp((expected), buf_inside_macro, len) == 0);
83      }
84      if (success == FALSE) {
85          log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n",
86              file, line, (expected), buf_inside_macro);
87      }
88 }
89 
90 #define TEST_ASSERT_STRING(expected, actual, nulTerm) test_assert_string(expected, actual, nulTerm, __FILE__, __LINE__)
91 
92 
test_assert_utext(const char * expected,UText * actual,const char * file,int line)93 static void test_assert_utext(const char *expected, UText *actual, const char *file, int line) {
94     UErrorCode status = U_ZERO_ERROR;
95     UText expectedText = UTEXT_INITIALIZER;
96     utext_openUTF8(&expectedText, expected, -1, &status);
97     utext_setNativeIndex(actual, 0);
98     if (utext_compare(&expectedText, -1, actual, -1) != 0) {
99         UChar32 c;
100         log_err("Failure at file %s, line %d, expected \"%s\", got \"", file, line, expected);
101         c = utext_next32From(actual, 0);
102         while (c != U_SENTINEL) {
103             if (0x20<c && c <0x7e) {
104                 log_err("%c", c);
105             } else {
106                 log_err("%#x", c);
107             }
108             c = UTEXT_NEXT32(actual);
109         }
110         log_err("\"\n");
111     }
112     utext_close(&expectedText);
113 }
114 
115 #define TEST_ASSERT_UTEXT(expected, actual) test_assert_utext(expected, actual, __FILE__, __LINE__)
116 
117 
118 
119 static void TestRegexCAPI(void);
120 static void TestBug4315(void);
121 static void TestUTextAPI(void);
122 
123 void addURegexTest(TestNode** root);
124 
addURegexTest(TestNode ** root)125 void addURegexTest(TestNode** root)
126 {
127     addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI");
128     addTest(root, &TestBug4315,   "regex/TestBug4315");
129     addTest(root, &TestUTextAPI,  "regex/TestUTextAPI");
130 }
131 
132 /*
133  * Call back function and context struct used for testing
134  *    regular expression user callbacks.  This test is mostly the same as
135  *   the corresponding C++ test in intltest.
136  */
137 typedef struct callBackContext {
138     int32_t          maxCalls;
139     int32_t          numCalls;
140     int32_t          lastSteps;
141 } callBackContext;
142 
143 static UBool U_EXPORT2 U_CALLCONV
TestCallbackFn(const void * context,int32_t steps)144 TestCallbackFn(const void *context, int32_t steps) {
145   callBackContext  *info = (callBackContext *)context;
146   if (info->lastSteps+1 != steps) {
147       log_err("incorrect steps in callback.  Expected %d, got %d\n", info->lastSteps+1, steps);
148   }
149   info->lastSteps = steps;
150   info->numCalls++;
151   return (info->numCalls < info->maxCalls);
152 }
153 
154 /*
155  *   Regular Expression C API Tests
156  */
TestRegexCAPI(void)157 static void TestRegexCAPI(void) {
158     UErrorCode           status = U_ZERO_ERROR;
159     URegularExpression  *re;
160     UChar                pat[200];
161     UChar               *minus1;
162 
163     memset(&minus1, -1, sizeof(minus1));
164 
165     /* Mimimalist open/close */
166     u_uastrncpy(pat, "abc*", sizeof(pat)/2);
167     re = uregex_open(pat, -1, 0, 0, &status);
168     if (U_FAILURE(status)) {
169          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
170          return;
171     }
172     uregex_close(re);
173 
174     /* Open with all flag values set */
175     status = U_ZERO_ERROR;
176     re = uregex_open(pat, -1,
177         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
178         0, &status);
179     TEST_ASSERT_SUCCESS(status);
180     uregex_close(re);
181 
182     /* Open with an invalid flag */
183     status = U_ZERO_ERROR;
184     re = uregex_open(pat, -1, 0x40000000, 0, &status);
185     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
186     uregex_close(re);
187 
188     /* Open with an unimplemented flag */
189     status = U_ZERO_ERROR;
190     re = uregex_open(pat, -1, UREGEX_LITERAL, 0, &status);
191     TEST_ASSERT(status == U_REGEX_UNIMPLEMENTED);
192     uregex_close(re);
193 
194     /* openC with an invalid parameter */
195     status = U_ZERO_ERROR;
196     re = uregex_openC(NULL,
197         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
198     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
199 
200     /* openC with an invalid parameter */
201     status = U_USELESS_COLLATOR_ERROR;
202     re = uregex_openC(NULL,
203         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
204     TEST_ASSERT(status == U_USELESS_COLLATOR_ERROR && re == NULL);
205 
206     /* openC   open from a C string */
207     {
208         const UChar   *p;
209         int32_t  len;
210         status = U_ZERO_ERROR;
211         re = uregex_openC("abc*", 0, 0, &status);
212         TEST_ASSERT_SUCCESS(status);
213         p = uregex_pattern(re, &len, &status);
214         TEST_ASSERT_SUCCESS(status);
215 
216         /* The TEST_ASSERT_SUCCESS above should change too... */
217         if(U_SUCCESS(status)) {
218             u_uastrncpy(pat, "abc*", sizeof(pat)/2);
219             TEST_ASSERT(u_strcmp(pat, p) == 0);
220             TEST_ASSERT(len==(int32_t)strlen("abc*"));
221         }
222 
223         uregex_close(re);
224 
225         /*  TODO:  Open with ParseError parameter */
226     }
227 
228     /*
229      *  clone
230      */
231     {
232         URegularExpression *clone1;
233         URegularExpression *clone2;
234         URegularExpression *clone3;
235         UChar  testString1[30];
236         UChar  testString2[30];
237         UBool  result;
238 
239 
240         status = U_ZERO_ERROR;
241         re = uregex_openC("abc*", 0, 0, &status);
242         TEST_ASSERT_SUCCESS(status);
243         clone1 = uregex_clone(re, &status);
244         TEST_ASSERT_SUCCESS(status);
245         TEST_ASSERT(clone1 != NULL);
246 
247         status = U_ZERO_ERROR;
248         clone2 = uregex_clone(re, &status);
249         TEST_ASSERT_SUCCESS(status);
250         TEST_ASSERT(clone2 != NULL);
251         uregex_close(re);
252 
253         status = U_ZERO_ERROR;
254         clone3 = uregex_clone(clone2, &status);
255         TEST_ASSERT_SUCCESS(status);
256         TEST_ASSERT(clone3 != NULL);
257 
258         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
259         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
260 
261         status = U_ZERO_ERROR;
262         uregex_setText(clone1, testString1, -1, &status);
263         TEST_ASSERT_SUCCESS(status);
264         result = uregex_lookingAt(clone1, 0, &status);
265         TEST_ASSERT_SUCCESS(status);
266         TEST_ASSERT(result==TRUE);
267 
268         status = U_ZERO_ERROR;
269         uregex_setText(clone2, testString2, -1, &status);
270         TEST_ASSERT_SUCCESS(status);
271         result = uregex_lookingAt(clone2, 0, &status);
272         TEST_ASSERT_SUCCESS(status);
273         TEST_ASSERT(result==FALSE);
274         result = uregex_find(clone2, 0, &status);
275         TEST_ASSERT_SUCCESS(status);
276         TEST_ASSERT(result==TRUE);
277 
278         uregex_close(clone1);
279         uregex_close(clone2);
280         uregex_close(clone3);
281 
282     }
283 
284     /*
285      *  pattern()
286     */
287     {
288         const UChar  *resultPat;
289         int32_t       resultLen;
290         u_uastrncpy(pat, "hello", sizeof(pat)/2);
291         status = U_ZERO_ERROR;
292         re = uregex_open(pat, -1, 0, NULL, &status);
293         resultPat = uregex_pattern(re, &resultLen, &status);
294         TEST_ASSERT_SUCCESS(status);
295 
296         /* The TEST_ASSERT_SUCCESS above should change too... */
297         if (U_SUCCESS(status)) {
298             TEST_ASSERT(resultLen == -1);
299             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
300         }
301 
302         uregex_close(re);
303 
304         status = U_ZERO_ERROR;
305         re = uregex_open(pat, 3, 0, NULL, &status);
306         resultPat = uregex_pattern(re, &resultLen, &status);
307         TEST_ASSERT_SUCCESS(status);
308         TEST_ASSERT_SUCCESS(status);
309 
310         /* The TEST_ASSERT_SUCCESS above should change too... */
311         if (U_SUCCESS(status)) {
312             TEST_ASSERT(resultLen == 3);
313             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
314             TEST_ASSERT(u_strlen(resultPat) == 3);
315         }
316 
317         uregex_close(re);
318     }
319 
320     /*
321      *  flags()
322      */
323     {
324         int32_t  t;
325 
326         status = U_ZERO_ERROR;
327         re = uregex_open(pat, -1, 0, NULL, &status);
328         t  = uregex_flags(re, &status);
329         TEST_ASSERT_SUCCESS(status);
330         TEST_ASSERT(t == 0);
331         uregex_close(re);
332 
333         status = U_ZERO_ERROR;
334         re = uregex_open(pat, -1, 0, NULL, &status);
335         t  = uregex_flags(re, &status);
336         TEST_ASSERT_SUCCESS(status);
337         TEST_ASSERT(t == 0);
338         uregex_close(re);
339 
340         status = U_ZERO_ERROR;
341         re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status);
342         t  = uregex_flags(re, &status);
343         TEST_ASSERT_SUCCESS(status);
344         TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL));
345         uregex_close(re);
346     }
347 
348     /*
349      *  setText() and lookingAt()
350      */
351     {
352         UChar  text1[50];
353         UChar  text2[50];
354         UBool  result;
355 
356         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
357         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
358         status = U_ZERO_ERROR;
359         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
360         re = uregex_open(pat, -1, 0, NULL, &status);
361         TEST_ASSERT_SUCCESS(status);
362 
363         /* Operation before doing a setText should fail... */
364         status = U_ZERO_ERROR;
365         uregex_lookingAt(re, 0, &status);
366         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
367 
368         status = U_ZERO_ERROR;
369         uregex_setText(re, text1, -1, &status);
370         result = uregex_lookingAt(re, 0, &status);
371         TEST_ASSERT(result == TRUE);
372         TEST_ASSERT_SUCCESS(status);
373 
374         status = U_ZERO_ERROR;
375         uregex_setText(re, text2, -1, &status);
376         result = uregex_lookingAt(re, 0, &status);
377         TEST_ASSERT(result == FALSE);
378         TEST_ASSERT_SUCCESS(status);
379 
380         status = U_ZERO_ERROR;
381         uregex_setText(re, text1, -1, &status);
382         result = uregex_lookingAt(re, 0, &status);
383         TEST_ASSERT(result == TRUE);
384         TEST_ASSERT_SUCCESS(status);
385 
386         status = U_ZERO_ERROR;
387         uregex_setText(re, text1, 5, &status);
388         result = uregex_lookingAt(re, 0, &status);
389         TEST_ASSERT(result == FALSE);
390         TEST_ASSERT_SUCCESS(status);
391 
392         status = U_ZERO_ERROR;
393         uregex_setText(re, text1, 6, &status);
394         result = uregex_lookingAt(re, 0, &status);
395         TEST_ASSERT(result == TRUE);
396         TEST_ASSERT_SUCCESS(status);
397 
398         uregex_close(re);
399     }
400 
401 
402     /*
403      *  getText()
404      */
405     {
406         UChar    text1[50];
407         UChar    text2[50];
408         const UChar   *result;
409         int32_t  textLength;
410 
411         u_uastrncpy(text1, "abcccd",  sizeof(text1)/2);
412         u_uastrncpy(text2, "abcccxd", sizeof(text2)/2);
413         status = U_ZERO_ERROR;
414         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
415         re = uregex_open(pat, -1, 0, NULL, &status);
416 
417         uregex_setText(re, text1, -1, &status);
418         result = uregex_getText(re, &textLength, &status);
419         TEST_ASSERT(result == text1);
420         TEST_ASSERT(textLength == -1);
421         TEST_ASSERT_SUCCESS(status);
422 
423         status = U_ZERO_ERROR;
424         uregex_setText(re, text2, 7, &status);
425         result = uregex_getText(re, &textLength, &status);
426         TEST_ASSERT(result == text2);
427         TEST_ASSERT(textLength == 7);
428         TEST_ASSERT_SUCCESS(status);
429 
430         status = U_ZERO_ERROR;
431         uregex_setText(re, text2, 4, &status);
432         result = uregex_getText(re, &textLength, &status);
433         TEST_ASSERT(result == text2);
434         TEST_ASSERT(textLength == 4);
435         TEST_ASSERT_SUCCESS(status);
436         uregex_close(re);
437     }
438 
439     /*
440      *  matches()
441      */
442     {
443         UChar   text1[50];
444         UBool   result;
445         int     len;
446         UChar   nullString[] = {0,0,0};
447 
448         u_uastrncpy(text1, "abcccde",  sizeof(text1)/2);
449         status = U_ZERO_ERROR;
450         u_uastrncpy(pat, "abc*d", sizeof(pat)/2);
451         re = uregex_open(pat, -1, 0, NULL, &status);
452 
453         uregex_setText(re, text1, -1, &status);
454         result = uregex_matches(re, 0, &status);
455         TEST_ASSERT(result == FALSE);
456         TEST_ASSERT_SUCCESS(status);
457 
458         status = U_ZERO_ERROR;
459         uregex_setText(re, text1, 6, &status);
460         result = uregex_matches(re, 0, &status);
461         TEST_ASSERT(result == TRUE);
462         TEST_ASSERT_SUCCESS(status);
463 
464         status = U_ZERO_ERROR;
465         uregex_setText(re, text1, 6, &status);
466         result = uregex_matches(re, 1, &status);
467         TEST_ASSERT(result == FALSE);
468         TEST_ASSERT_SUCCESS(status);
469         uregex_close(re);
470 
471         status = U_ZERO_ERROR;
472         re = uregex_openC(".?", 0, NULL, &status);
473         uregex_setText(re, text1, -1, &status);
474         len = u_strlen(text1);
475         result = uregex_matches(re, len, &status);
476         TEST_ASSERT(result == TRUE);
477         TEST_ASSERT_SUCCESS(status);
478 
479         status = U_ZERO_ERROR;
480         uregex_setText(re, nullString, -1, &status);
481         TEST_ASSERT_SUCCESS(status);
482         result = uregex_matches(re, 0, &status);
483         TEST_ASSERT(result == TRUE);
484         TEST_ASSERT_SUCCESS(status);
485         uregex_close(re);
486     }
487 
488 
489     /*
490      *  lookingAt()    Used in setText test.
491      */
492 
493 
494     /*
495      *  find(), findNext, start, end, reset
496      */
497     {
498         UChar    text1[50];
499         UBool    result;
500         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
501         status = U_ZERO_ERROR;
502         re = uregex_openC("rx", 0, NULL, &status);
503 
504         uregex_setText(re, text1, -1, &status);
505         result = uregex_find(re, 0, &status);
506         TEST_ASSERT(result == TRUE);
507         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
508         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
509         TEST_ASSERT_SUCCESS(status);
510 
511         result = uregex_find(re, 9, &status);
512         TEST_ASSERT(result == TRUE);
513         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
514         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
515         TEST_ASSERT_SUCCESS(status);
516 
517         result = uregex_find(re, 14, &status);
518         TEST_ASSERT(result == FALSE);
519         TEST_ASSERT_SUCCESS(status);
520 
521         status = U_ZERO_ERROR;
522         uregex_reset(re, 0, &status);
523 
524         result = uregex_findNext(re, &status);
525         TEST_ASSERT(result == TRUE);
526         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
527         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
528         TEST_ASSERT_SUCCESS(status);
529 
530         result = uregex_findNext(re, &status);
531         TEST_ASSERT(result == TRUE);
532         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
533         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
534         TEST_ASSERT_SUCCESS(status);
535 
536         status = U_ZERO_ERROR;
537         uregex_reset(re, 12, &status);
538 
539         result = uregex_findNext(re, &status);
540         TEST_ASSERT(result == TRUE);
541         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
542         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
543         TEST_ASSERT_SUCCESS(status);
544 
545         result = uregex_findNext(re, &status);
546         TEST_ASSERT(result == FALSE);
547         TEST_ASSERT_SUCCESS(status);
548 
549         uregex_close(re);
550     }
551 
552     /*
553      *  groupCount
554      */
555     {
556         int32_t result;
557 
558         status = U_ZERO_ERROR;
559         re = uregex_openC("abc", 0, NULL, &status);
560         result = uregex_groupCount(re, &status);
561         TEST_ASSERT_SUCCESS(status);
562         TEST_ASSERT(result == 0);
563         uregex_close(re);
564 
565         status = U_ZERO_ERROR;
566         re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status);
567         result = uregex_groupCount(re, &status);
568         TEST_ASSERT_SUCCESS(status);
569         TEST_ASSERT(result == 3);
570         uregex_close(re);
571 
572     }
573 
574 
575     /*
576      *  group()
577      */
578     {
579         UChar    text1[80];
580         UChar    buf[80];
581         UBool    result;
582         int32_t  resultSz;
583         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
584 
585         status = U_ZERO_ERROR;
586         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
587         TEST_ASSERT_SUCCESS(status);
588 
589 
590         uregex_setText(re, text1, -1, &status);
591         result = uregex_find(re, 0, &status);
592         TEST_ASSERT(result==TRUE);
593 
594         /*  Capture Group 0, the full match.  Should succeed.  */
595         status = U_ZERO_ERROR;
596         resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status);
597         TEST_ASSERT_SUCCESS(status);
598         TEST_ASSERT_STRING("abc interior def", buf, TRUE);
599         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
600 
601         /*  Capture group #1.  Should succeed. */
602         status = U_ZERO_ERROR;
603         resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status);
604         TEST_ASSERT_SUCCESS(status);
605         TEST_ASSERT_STRING(" interior ", buf, TRUE);
606         TEST_ASSERT(resultSz == (int32_t)strlen(" interior "));
607 
608         /*  Capture group out of range.  Error. */
609         status = U_ZERO_ERROR;
610         uregex_group(re, 2, buf, sizeof(buf)/2, &status);
611         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
612 
613         /* NULL buffer, pure pre-flight */
614         status = U_ZERO_ERROR;
615         resultSz = uregex_group(re, 0, NULL, 0, &status);
616         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
617         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
618 
619         /* Too small buffer, truncated string */
620         status = U_ZERO_ERROR;
621         memset(buf, -1, sizeof(buf));
622         resultSz = uregex_group(re, 0, buf, 5, &status);
623         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
624         TEST_ASSERT_STRING("abc i", buf, FALSE);
625         TEST_ASSERT(buf[5] == (UChar)0xffff);
626         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
627 
628         /* Output string just fits buffer, no NUL term. */
629         status = U_ZERO_ERROR;
630         resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status);
631         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
632         TEST_ASSERT_STRING("abc interior def", buf, FALSE);
633         TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def"));
634         TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff);
635 
636         uregex_close(re);
637 
638     }
639 
640     /*
641      *  Regions
642      */
643 
644 
645         /* SetRegion(), getRegion() do something  */
646         TEST_SETUP(".*", "0123456789ABCDEF", 0)
647         UChar resultString[40];
648         TEST_ASSERT(uregex_regionStart(re, &status) == 0);
649         TEST_ASSERT(uregex_regionEnd(re, &status) == 16);
650         uregex_setRegion(re, 3, 6, &status);
651         TEST_ASSERT(uregex_regionStart(re, &status) == 3);
652         TEST_ASSERT(uregex_regionEnd(re, &status) == 6);
653         TEST_ASSERT(uregex_findNext(re, &status));
654         TEST_ASSERT(uregex_group(re, 0, resultString, sizeof(resultString)/2, &status) == 3)
655         TEST_ASSERT_STRING("345", resultString, TRUE);
656         TEST_TEARDOWN;
657 
658         /* find(start=-1) uses regions   */
659         TEST_SETUP(".*", "0123456789ABCDEF", 0);
660         uregex_setRegion(re, 4, 6, &status);
661         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
662         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
663         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
664         TEST_TEARDOWN;
665 
666         /* find (start >=0) does not use regions   */
667         TEST_SETUP(".*", "0123456789ABCDEF", 0);
668         uregex_setRegion(re, 4, 6, &status);
669         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
670         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
671         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
672         TEST_TEARDOWN;
673 
674         /* findNext() obeys regions    */
675         TEST_SETUP(".", "0123456789ABCDEF", 0);
676         uregex_setRegion(re, 4, 6, &status);
677         TEST_ASSERT(uregex_findNext(re,&status) == TRUE);
678         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
679         TEST_ASSERT(uregex_findNext(re, &status) == TRUE);
680         TEST_ASSERT(uregex_start(re, 0, &status) == 5);
681         TEST_ASSERT(uregex_findNext(re, &status) == FALSE);
682         TEST_TEARDOWN;
683 
684         /* matches(start=-1) uses regions                                           */
685         /*    Also, verify that non-greedy *? succeeds in finding the full match.   */
686         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
687         uregex_setRegion(re, 4, 6, &status);
688         TEST_ASSERT(uregex_matches(re, -1, &status) == TRUE);
689         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
690         TEST_ASSERT(uregex_end(re, 0, &status) == 6);
691         TEST_TEARDOWN;
692 
693         /* matches (start >=0) does not use regions       */
694         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
695         uregex_setRegion(re, 4, 6, &status);
696         TEST_ASSERT(uregex_matches(re, 0, &status) == TRUE);
697         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
698         TEST_ASSERT(uregex_end(re, 0, &status) == 16);
699         TEST_TEARDOWN;
700 
701         /* lookingAt(start=-1) uses regions                                         */
702         /*    Also, verify that non-greedy *? finds the first (shortest) match.     */
703         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
704         uregex_setRegion(re, 4, 6, &status);
705         TEST_ASSERT(uregex_lookingAt(re, -1, &status) == TRUE);
706         TEST_ASSERT(uregex_start(re, 0, &status) == 4);
707         TEST_ASSERT(uregex_end(re, 0, &status) == 4);
708         TEST_TEARDOWN;
709 
710         /* lookingAt (start >=0) does not use regions  */
711         TEST_SETUP(".*?", "0123456789ABCDEF", 0);
712         uregex_setRegion(re, 4, 6, &status);
713         TEST_ASSERT(uregex_lookingAt(re, 0, &status) == TRUE);
714         TEST_ASSERT(uregex_start(re, 0, &status) == 0);
715         TEST_ASSERT(uregex_end(re, 0, &status) == 0);
716         TEST_TEARDOWN;
717 
718         /* hitEnd()       */
719         TEST_SETUP("[a-f]*", "abcdefghij", 0);
720         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
721         TEST_ASSERT(uregex_hitEnd(re, &status) == FALSE);
722         TEST_TEARDOWN;
723 
724         TEST_SETUP("[a-f]*", "abcdef", 0);
725         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
726         TEST_ASSERT(uregex_hitEnd(re, &status) == TRUE);
727         TEST_TEARDOWN;
728 
729         /* requireEnd   */
730         TEST_SETUP("abcd", "abcd", 0);
731         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
732         TEST_ASSERT(uregex_requireEnd(re, &status) == FALSE);
733         TEST_TEARDOWN;
734 
735         TEST_SETUP("abcd$", "abcd", 0);
736         TEST_ASSERT(uregex_find(re, 0, &status) == TRUE);
737         TEST_ASSERT(uregex_requireEnd(re, &status) == TRUE);
738         TEST_TEARDOWN;
739 
740         /* anchoringBounds        */
741         TEST_SETUP("abc$", "abcdef", 0);
742         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == TRUE);
743         uregex_useAnchoringBounds(re, FALSE, &status);
744         TEST_ASSERT(uregex_hasAnchoringBounds(re, &status) == FALSE);
745 
746         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);
747         uregex_useAnchoringBounds(re, TRUE, &status);
748         uregex_setRegion(re, 0, 3, &status);
749         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);
750         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
751         TEST_TEARDOWN;
752 
753         /* Transparent Bounds      */
754         TEST_SETUP("abc(?=def)", "abcdef", 0);
755         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == FALSE);
756         uregex_useTransparentBounds(re, TRUE, &status);
757         TEST_ASSERT(uregex_hasTransparentBounds(re, &status) == TRUE);
758 
759         uregex_useTransparentBounds(re, FALSE, &status);
760         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* No Region */
761         uregex_setRegion(re, 0, 3, &status);
762         TEST_ASSERT(uregex_find(re, -1, &status) == FALSE);   /* with region, opaque bounds */
763         uregex_useTransparentBounds(re, TRUE, &status);
764         TEST_ASSERT(uregex_find(re, -1, &status) == TRUE);    /* with region, transparent bounds */
765         TEST_ASSERT(uregex_end(re, 0, &status) == 3);
766         TEST_TEARDOWN;
767 
768 
769     /*
770      *  replaceFirst()
771      */
772     {
773         UChar    text1[80];
774         UChar    text2[80];
775         UChar    replText[80];
776         UChar    buf[80];
777         int32_t  resultSz;
778         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
779         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
780         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
781 
782         status = U_ZERO_ERROR;
783         re = uregex_openC("x(.*?)x", 0, NULL, &status);
784         TEST_ASSERT_SUCCESS(status);
785 
786         /*  Normal case, with match */
787         uregex_setText(re, text1, -1, &status);
788         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
789         TEST_ASSERT_SUCCESS(status);
790         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE);
791         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
792 
793         /* No match.  Text should copy to output with no changes.  */
794         status = U_ZERO_ERROR;
795         uregex_setText(re, text2, -1, &status);
796         resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status);
797         TEST_ASSERT_SUCCESS(status);
798         TEST_ASSERT_STRING("No match here.", buf, TRUE);
799         TEST_ASSERT(resultSz == (int32_t)strlen("No match here."));
800 
801         /*  Match, output just fills buffer, no termination warning. */
802         status = U_ZERO_ERROR;
803         uregex_setText(re, text1, -1, &status);
804         memset(buf, -1, sizeof(buf));
805         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
806         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
807         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
808         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
809         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
810 
811         /* Do the replaceFirst again, without first resetting anything.
812          *  Should give the same results.
813          */
814         status = U_ZERO_ERROR;
815         memset(buf, -1, sizeof(buf));
816         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status);
817         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
818         TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE);
819         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
820         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
821 
822         /* NULL buffer, zero buffer length */
823         status = U_ZERO_ERROR;
824         resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status);
825         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
826         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
827 
828         /* Buffer too small by one */
829         status = U_ZERO_ERROR;
830         memset(buf, -1, sizeof(buf));
831         resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status);
832         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
833         TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE);
834         TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x."));
835         TEST_ASSERT(buf[resultSz] == (UChar)0xffff);
836 
837         uregex_close(re);
838     }
839 
840 
841     /*
842      *  replaceAll()
843      */
844     {
845         UChar    text1[80];          /*  "Replace xaax x1x x...x." */
846         UChar    text2[80];          /*  "No match Here"           */
847         UChar    replText[80];       /*  "<$1>"                    */
848         UChar    replText2[80];      /*  "<<$1>>"                  */
849         const char * pattern = "x(.*?)x";
850         const char * expectedResult = "Replace <aa> <1> <...>.";
851         const char * expectedResult2 = "Replace <<aa>> <<1>> <<...>>.";
852         UChar    buf[80];
853         int32_t  resultSize;
854         int32_t  expectedResultSize;
855         int32_t  expectedResultSize2;
856         int32_t  i;
857 
858         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
859         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
860         u_uastrncpy(replText, "<$1>", sizeof(replText)/2);
861         u_uastrncpy(replText2, "<<$1>>", sizeof(replText2)/2);
862         expectedResultSize = strlen(expectedResult);
863         expectedResultSize2 = strlen(expectedResult2);
864 
865         status = U_ZERO_ERROR;
866         re = uregex_openC(pattern, 0, NULL, &status);
867         TEST_ASSERT_SUCCESS(status);
868 
869         /*  Normal case, with match */
870         uregex_setText(re, text1, -1, &status);
871         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
872         TEST_ASSERT_SUCCESS(status);
873         TEST_ASSERT_STRING(expectedResult, buf, TRUE);
874         TEST_ASSERT(resultSize == expectedResultSize);
875 
876         /* No match.  Text should copy to output with no changes.  */
877         status = U_ZERO_ERROR;
878         uregex_setText(re, text2, -1, &status);
879         resultSize = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status);
880         TEST_ASSERT_SUCCESS(status);
881         TEST_ASSERT_STRING("No match here.", buf, TRUE);
882         TEST_ASSERT(resultSize == u_strlen(text2));
883 
884         /*  Match, output just fills buffer, no termination warning. */
885         status = U_ZERO_ERROR;
886         uregex_setText(re, text1, -1, &status);
887         memset(buf, -1, sizeof(buf));
888         resultSize = uregex_replaceAll(re, replText, -1, buf, expectedResultSize, &status);
889         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
890         TEST_ASSERT_STRING(expectedResult, buf, FALSE);
891         TEST_ASSERT(resultSize == expectedResultSize);
892         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
893 
894         /* Do the replaceFirst again, without first resetting anything.
895          *  Should give the same results.
896          */
897         status = U_ZERO_ERROR;
898         memset(buf, -1, sizeof(buf));
899         resultSize = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status);
900         TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
901         TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE);
902         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
903         TEST_ASSERT(buf[resultSize] == (UChar)0xffff);
904 
905         /* NULL buffer, zero buffer length */
906         status = U_ZERO_ERROR;
907         resultSize = uregex_replaceAll(re, replText, -1, NULL, 0, &status);
908         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
909         TEST_ASSERT(resultSize == (int32_t)strlen("Replace <aa> <1> <...>."));
910 
911         /* Buffer too small.  Try every size, which will tickle edge cases
912          * in uregex_appendReplacement (used by replaceAll)   */
913         for (i=0; i<expectedResultSize; i++) {
914             char  expected[80];
915             status = U_ZERO_ERROR;
916             memset(buf, -1, sizeof(buf));
917             resultSize = uregex_replaceAll(re, replText, -1, buf, i, &status);
918             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
919             strcpy(expected, expectedResult);
920             expected[i] = 0;
921             TEST_ASSERT_STRING(expected, buf, FALSE);
922             TEST_ASSERT(resultSize == expectedResultSize);
923             TEST_ASSERT(buf[i] == (UChar)0xffff);
924         }
925 
926         /* Buffer too small.  Same as previous test, except this time the replacement
927          * text is longer than the match capture group, making the length of the complete
928          * replacement longer than the original string.
929          */
930         for (i=0; i<expectedResultSize2; i++) {
931             char  expected[80];
932             status = U_ZERO_ERROR;
933             memset(buf, -1, sizeof(buf));
934             resultSize = uregex_replaceAll(re, replText2, -1, buf, i, &status);
935             TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
936             strcpy(expected, expectedResult2);
937             expected[i] = 0;
938             TEST_ASSERT_STRING(expected, buf, FALSE);
939             TEST_ASSERT(resultSize == expectedResultSize2);
940             TEST_ASSERT(buf[i] == (UChar)0xffff);
941         }
942 
943 
944         uregex_close(re);
945     }
946 
947 
948     /*
949      *  appendReplacement()
950      */
951     {
952         UChar    text[100];
953         UChar    repl[100];
954         UChar    buf[100];
955         UChar   *bufPtr;
956         int32_t  bufCap;
957 
958 
959         status = U_ZERO_ERROR;
960         re = uregex_openC(".*", 0, 0, &status);
961         TEST_ASSERT_SUCCESS(status);
962 
963         u_uastrncpy(text, "whatever",  sizeof(text)/2);
964         u_uastrncpy(repl, "some other", sizeof(repl)/2);
965         uregex_setText(re, text, -1, &status);
966 
967         /* match covers whole target string */
968         uregex_find(re, 0, &status);
969         TEST_ASSERT_SUCCESS(status);
970         bufPtr = buf;
971         bufCap = sizeof(buf) / 2;
972         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
973         TEST_ASSERT_SUCCESS(status);
974         TEST_ASSERT_STRING("some other", buf, TRUE);
975 
976         /* Match has \u \U escapes */
977         uregex_find(re, 0, &status);
978         TEST_ASSERT_SUCCESS(status);
979         bufPtr = buf;
980         bufCap = sizeof(buf) / 2;
981         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
982         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
983         TEST_ASSERT_SUCCESS(status);
984         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
985 
986         /* Bug 6813, parameter check of NULL destCapacity; crashed before fix. */
987         status = U_ZERO_ERROR;
988         uregex_find(re, 0, &status);
989         TEST_ASSERT_SUCCESS(status);
990         bufPtr = buf;
991         status = U_BUFFER_OVERFLOW_ERROR;
992         uregex_appendReplacement(re, repl, -1, &bufPtr, NULL, &status);
993         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
994 
995         uregex_close(re);
996     }
997 
998 
999     /*
1000      *  appendTail().   Checked in ReplaceFirst(), replaceAll().
1001      */
1002 
1003     /*
1004      *  split()
1005      */
1006     {
1007         UChar    textToSplit[80];
1008         UChar    text2[80];
1009         UChar    buf[200];
1010         UChar    *fields[10];
1011         int32_t  numFields;
1012         int32_t  requiredCapacity;
1013         int32_t  spaceNeeded;
1014         int32_t  sz;
1015 
1016         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1017         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1018 
1019         status = U_ZERO_ERROR;
1020         re = uregex_openC(":", 0, NULL, &status);
1021 
1022 
1023         /*  Simple split */
1024 
1025         uregex_setText(re, textToSplit, -1, &status);
1026         TEST_ASSERT_SUCCESS(status);
1027 
1028         /* The TEST_ASSERT_SUCCESS call above should change too... */
1029         if (U_SUCCESS(status)) {
1030             memset(fields, -1, sizeof(fields));
1031             numFields =
1032                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1033             TEST_ASSERT_SUCCESS(status);
1034 
1035             /* The TEST_ASSERT_SUCCESS call above should change too... */
1036             if(U_SUCCESS(status)) {
1037                 TEST_ASSERT(numFields == 3);
1038                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1039                 TEST_ASSERT_STRING(" second", fields[1], TRUE);
1040                 TEST_ASSERT_STRING("  third", fields[2], TRUE);
1041                 TEST_ASSERT(fields[3] == NULL);
1042 
1043                 spaceNeeded = u_strlen(textToSplit) -
1044                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1045                             numFields;          /* Each field gets a NUL terminator */
1046 
1047                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1048             }
1049         }
1050 
1051         uregex_close(re);
1052 
1053 
1054         /*  Split with too few output strings available */
1055         status = U_ZERO_ERROR;
1056         re = uregex_openC(":", 0, NULL, &status);
1057         uregex_setText(re, textToSplit, -1, &status);
1058         TEST_ASSERT_SUCCESS(status);
1059 
1060         /* The TEST_ASSERT_SUCCESS call above should change too... */
1061         if(U_SUCCESS(status)) {
1062             memset(fields, -1, sizeof(fields));
1063             numFields =
1064                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1065             TEST_ASSERT_SUCCESS(status);
1066 
1067             /* The TEST_ASSERT_SUCCESS call above should change too... */
1068             if(U_SUCCESS(status)) {
1069                 TEST_ASSERT(numFields == 2);
1070                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1071                 TEST_ASSERT_STRING(" second:  third", fields[1], TRUE);
1072                 TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1073 
1074                 spaceNeeded = u_strlen(textToSplit) -
1075                             (numFields - 1)  +  /* Field delimiters do not appear in output */
1076                             numFields;          /* Each field gets a NUL terminator */
1077 
1078                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1079 
1080                 /* Split with a range of output buffer sizes.  */
1081                 spaceNeeded = u_strlen(textToSplit) -
1082                     (numFields - 1)  +  /* Field delimiters do not appear in output */
1083                     numFields;          /* Each field gets a NUL terminator */
1084 
1085                 for (sz=0; sz < spaceNeeded+1; sz++) {
1086                     memset(fields, -1, sizeof(fields));
1087                     status = U_ZERO_ERROR;
1088                     numFields =
1089                         uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status);
1090                     if (sz >= spaceNeeded) {
1091                         TEST_ASSERT_SUCCESS(status);
1092                         TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1093                         TEST_ASSERT_STRING(" second", fields[1], TRUE);
1094                         TEST_ASSERT_STRING("  third", fields[2], TRUE);
1095                     } else {
1096                         TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
1097                     }
1098                     TEST_ASSERT(numFields == 3);
1099                     TEST_ASSERT(fields[3] == NULL);
1100                     TEST_ASSERT(spaceNeeded == requiredCapacity);
1101                 }
1102             }
1103         }
1104 
1105         uregex_close(re);
1106     }
1107 
1108 
1109 
1110 
1111     /* Split(), part 2.  Patterns with capture groups.  The capture group text
1112      *                   comes out as additional fields.  */
1113     {
1114         UChar    textToSplit[80];
1115         UChar    buf[200];
1116         UChar    *fields[10];
1117         int32_t  numFields;
1118         int32_t  requiredCapacity;
1119         int32_t  spaceNeeded;
1120         int32_t  sz;
1121 
1122         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
1123 
1124         status = U_ZERO_ERROR;
1125         re = uregex_openC("<(.*?)>", 0, NULL, &status);
1126 
1127         uregex_setText(re, textToSplit, -1, &status);
1128         TEST_ASSERT_SUCCESS(status);
1129 
1130         /* The TEST_ASSERT_SUCCESS call above should change too... */
1131         if(U_SUCCESS(status)) {
1132             memset(fields, -1, sizeof(fields));
1133             numFields =
1134                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status);
1135             TEST_ASSERT_SUCCESS(status);
1136 
1137             /* The TEST_ASSERT_SUCCESS call above should change too... */
1138             if(U_SUCCESS(status)) {
1139                 TEST_ASSERT(numFields == 5);
1140                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1141                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1142                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1143                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1144                 TEST_ASSERT_STRING("  third", fields[4], TRUE);
1145                 TEST_ASSERT(fields[5] == NULL);
1146                 spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1147                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1148             }
1149         }
1150 
1151         /*  Split with too few output strings available (2) */
1152         status = U_ZERO_ERROR;
1153         memset(fields, -1, sizeof(fields));
1154         numFields =
1155             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status);
1156         TEST_ASSERT_SUCCESS(status);
1157 
1158         /* The TEST_ASSERT_SUCCESS call above should change too... */
1159         if(U_SUCCESS(status)) {
1160             TEST_ASSERT(numFields == 2);
1161             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1162             TEST_ASSERT_STRING(" second<tag-b>  third", fields[1], TRUE);
1163             TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*)));
1164 
1165             spaceNeeded = strlen("first . second<tag-b>  third.");  /* "." at NUL positions */
1166             TEST_ASSERT(spaceNeeded == requiredCapacity);
1167         }
1168 
1169         /*  Split with too few output strings available (3) */
1170         status = U_ZERO_ERROR;
1171         memset(fields, -1, sizeof(fields));
1172         numFields =
1173             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status);
1174         TEST_ASSERT_SUCCESS(status);
1175 
1176         /* The TEST_ASSERT_SUCCESS call above should change too... */
1177         if(U_SUCCESS(status)) {
1178             TEST_ASSERT(numFields == 3);
1179             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1180             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1181             TEST_ASSERT_STRING(" second<tag-b>  third", fields[2], TRUE);
1182             TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*)));
1183 
1184             spaceNeeded = strlen("first .tag-a. second<tag-b>  third.");  /* "." at NUL positions */
1185             TEST_ASSERT(spaceNeeded == requiredCapacity);
1186         }
1187 
1188         /*  Split with just enough output strings available (5) */
1189         status = U_ZERO_ERROR;
1190         memset(fields, -1, sizeof(fields));
1191         numFields =
1192             uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status);
1193         TEST_ASSERT_SUCCESS(status);
1194 
1195         /* The TEST_ASSERT_SUCCESS call above should change too... */
1196         if(U_SUCCESS(status)) {
1197             TEST_ASSERT(numFields == 5);
1198             TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1199             TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1200             TEST_ASSERT_STRING(" second", fields[2], TRUE);
1201             TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1202             TEST_ASSERT_STRING("  third", fields[4], TRUE);
1203             TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*)));
1204 
1205             spaceNeeded = strlen("first .tag-a. second.tag-b.  third.");  /* "." at NUL positions */
1206             TEST_ASSERT(spaceNeeded == requiredCapacity);
1207         }
1208 
1209         /* Split, end of text is a field delimiter.   */
1210         status = U_ZERO_ERROR;
1211         sz = strlen("first <tag-a> second<tag-b>");
1212         uregex_setText(re, textToSplit, sz, &status);
1213         TEST_ASSERT_SUCCESS(status);
1214 
1215         /* The TEST_ASSERT_SUCCESS call above should change too... */
1216         if(U_SUCCESS(status)) {
1217             memset(fields, -1, sizeof(fields));
1218             numFields =
1219                 uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status);
1220             TEST_ASSERT_SUCCESS(status);
1221 
1222             /* The TEST_ASSERT_SUCCESS call above should change too... */
1223             if(U_SUCCESS(status)) {
1224                 TEST_ASSERT(numFields == 4);
1225                 TEST_ASSERT_STRING("first ",  fields[0], TRUE);
1226                 TEST_ASSERT_STRING("tag-a",   fields[1], TRUE);
1227                 TEST_ASSERT_STRING(" second", fields[2], TRUE);
1228                 TEST_ASSERT_STRING("tag-b",   fields[3], TRUE);
1229                 TEST_ASSERT(fields[4] == NULL);
1230                 TEST_ASSERT(fields[8] == NULL);
1231                 TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*)));
1232                 spaceNeeded = strlen("first .tag-a. second.tag-b.");  /* "." at NUL positions */
1233                 TEST_ASSERT(spaceNeeded == requiredCapacity);
1234             }
1235         }
1236 
1237         uregex_close(re);
1238     }
1239 
1240     /*
1241      * set/getTimeLimit
1242      */
1243      TEST_SETUP("abc$", "abcdef", 0);
1244      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 0);
1245      uregex_setTimeLimit(re, 1000, &status);
1246      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1247      TEST_ASSERT_SUCCESS(status);
1248      uregex_setTimeLimit(re, -1, &status);
1249      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1250      status = U_ZERO_ERROR;
1251      TEST_ASSERT(uregex_getTimeLimit(re, &status) == 1000);
1252      TEST_TEARDOWN;
1253 
1254      /*
1255       * set/get Stack Limit
1256       */
1257      TEST_SETUP("abc$", "abcdef", 0);
1258      TEST_ASSERT(uregex_getStackLimit(re, &status) == 8000000);
1259      uregex_setStackLimit(re, 40000, &status);
1260      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1261      TEST_ASSERT_SUCCESS(status);
1262      uregex_setStackLimit(re, -1, &status);
1263      TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR);
1264      status = U_ZERO_ERROR;
1265      TEST_ASSERT(uregex_getStackLimit(re, &status) == 40000);
1266      TEST_TEARDOWN;
1267 
1268 
1269      /*
1270       * Get/Set callback functions
1271       *     This test is copied from intltest regex/Callbacks
1272       *     The pattern and test data will run long enough to cause the callback
1273       *       to be invoked.  The nested '+' operators give exponential time
1274       *       behavior with increasing string length.
1275       */
1276      TEST_SETUP("((.)+\\2)+x", "aaaaaaaaaaaaaaaaaaab", 0)
1277      callBackContext cbInfo = {4, 0, 0};
1278      const void     *pContext   = &cbInfo;
1279      URegexMatchCallback    *returnedFn = &TestCallbackFn;
1280 
1281      /*  Getting the callback fn when it hasn't been set must return NULL  */
1282      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1283      TEST_ASSERT_SUCCESS(status);
1284      TEST_ASSERT(returnedFn == NULL);
1285      TEST_ASSERT(pContext == NULL);
1286 
1287      /* Set thecallback and do a match.                                   */
1288      /* The callback function should record that it has been called.      */
1289      uregex_setMatchCallback(re, &TestCallbackFn, &cbInfo, &status);
1290      TEST_ASSERT_SUCCESS(status);
1291      TEST_ASSERT(cbInfo.numCalls == 0);
1292      TEST_ASSERT(uregex_matches(re, -1, &status) == FALSE);
1293      TEST_ASSERT_SUCCESS(status);
1294      TEST_ASSERT(cbInfo.numCalls > 0);
1295 
1296      /* Getting the callback should return the values that were set above.  */
1297      uregex_getMatchCallback(re, &returnedFn, &pContext, &status);
1298      TEST_ASSERT(returnedFn == &TestCallbackFn);
1299      TEST_ASSERT(pContext == &cbInfo);
1300 
1301      TEST_TEARDOWN;
1302 }
1303 
1304 
1305 
TestBug4315(void)1306 static void TestBug4315(void) {
1307     UErrorCode      theICUError = U_ZERO_ERROR;
1308     URegularExpression *theRegEx;
1309     UChar           *textBuff;
1310     const char      *thePattern;
1311     UChar            theString[100];
1312     UChar           *destFields[24];
1313     int32_t         neededLength1;
1314     int32_t         neededLength2;
1315 
1316     int32_t         wordCount = 0;
1317     int32_t         destFieldsSize = 24;
1318 
1319     thePattern  = "ck ";
1320     u_uastrcpy(theString, "The quick brown fox jumped over the slow black turtle.");
1321 
1322     /* open a regex */
1323     theRegEx = uregex_openC(thePattern, 0, NULL, &theICUError);
1324     TEST_ASSERT_SUCCESS(theICUError);
1325 
1326     /* set the input string */
1327     uregex_setText(theRegEx, theString, u_strlen(theString), &theICUError);
1328     TEST_ASSERT_SUCCESS(theICUError);
1329 
1330     /* split */
1331     /*explicitly pass NULL and 0 to force the overflow error -> this is where the
1332      *  error occurs! */
1333     wordCount = uregex_split(theRegEx, NULL, 0, &neededLength1, destFields,
1334         destFieldsSize, &theICUError);
1335 
1336     TEST_ASSERT(theICUError == U_BUFFER_OVERFLOW_ERROR);
1337     TEST_ASSERT(wordCount==3);
1338 
1339     if(theICUError == U_BUFFER_OVERFLOW_ERROR)
1340     {
1341         theICUError = U_ZERO_ERROR;
1342         textBuff = (UChar *) malloc(sizeof(UChar) * (neededLength1 + 1));
1343         wordCount = uregex_split(theRegEx, textBuff, neededLength1+1, &neededLength2,
1344             destFields, destFieldsSize, &theICUError);
1345         TEST_ASSERT(wordCount==3);
1346         TEST_ASSERT_SUCCESS(theICUError);
1347         TEST_ASSERT(neededLength1 == neededLength2);
1348         TEST_ASSERT_STRING("The qui", destFields[0], TRUE);
1349         TEST_ASSERT_STRING("brown fox jumped over the slow bla", destFields[1], TRUE);
1350         TEST_ASSERT_STRING("turtle.", destFields[2], TRUE);
1351         TEST_ASSERT(destFields[3] == NULL);
1352         free(textBuff);
1353     }
1354     uregex_close(theRegEx);
1355 }
1356 
1357 /* Based on TestRegexCAPI() */
TestUTextAPI(void)1358 static void TestUTextAPI(void) {
1359     UErrorCode           status = U_ZERO_ERROR;
1360     URegularExpression  *re;
1361     UText                patternText = UTEXT_INITIALIZER;
1362     UChar                pat[200];
1363     const char           patternTextUTF8[5] = { 0x61, 0x62, 0x63, 0x2a, 0x00 };
1364 
1365     /* Mimimalist open/close */
1366     utext_openUTF8(&patternText, patternTextUTF8, -1, &status);
1367     re = uregex_openUText(&patternText, 0, 0, &status);
1368     if (U_FAILURE(status)) {
1369          log_data_err("Failed to open regular expression, %s:%d, error is \"%s\" (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));
1370          utext_close(&patternText);
1371          return;
1372     }
1373     uregex_close(re);
1374 
1375     /* Open with all flag values set */
1376     status = U_ZERO_ERROR;
1377     re = uregex_openUText(&patternText,
1378         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD,
1379         0, &status);
1380     TEST_ASSERT_SUCCESS(status);
1381     uregex_close(re);
1382 
1383     /* Open with an invalid flag */
1384     status = U_ZERO_ERROR;
1385     re = uregex_openUText(&patternText, 0x40000000, 0, &status);
1386     TEST_ASSERT(status == U_REGEX_INVALID_FLAG);
1387     uregex_close(re);
1388 
1389     /* open with an invalid parameter */
1390     status = U_ZERO_ERROR;
1391     re = uregex_openUText(NULL,
1392         UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, 0, &status);
1393     TEST_ASSERT(status == U_ILLEGAL_ARGUMENT_ERROR && re == NULL);
1394 
1395     /*
1396      *  clone
1397      */
1398     {
1399         URegularExpression *clone1;
1400         URegularExpression *clone2;
1401         URegularExpression *clone3;
1402         UChar  testString1[30];
1403         UChar  testString2[30];
1404         UBool  result;
1405 
1406 
1407         status = U_ZERO_ERROR;
1408         re = uregex_openUText(&patternText, 0, 0, &status);
1409         TEST_ASSERT_SUCCESS(status);
1410         clone1 = uregex_clone(re, &status);
1411         TEST_ASSERT_SUCCESS(status);
1412         TEST_ASSERT(clone1 != NULL);
1413 
1414         status = U_ZERO_ERROR;
1415         clone2 = uregex_clone(re, &status);
1416         TEST_ASSERT_SUCCESS(status);
1417         TEST_ASSERT(clone2 != NULL);
1418         uregex_close(re);
1419 
1420         status = U_ZERO_ERROR;
1421         clone3 = uregex_clone(clone2, &status);
1422         TEST_ASSERT_SUCCESS(status);
1423         TEST_ASSERT(clone3 != NULL);
1424 
1425         u_uastrncpy(testString1, "abcccd", sizeof(pat)/2);
1426         u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2);
1427 
1428         status = U_ZERO_ERROR;
1429         uregex_setText(clone1, testString1, -1, &status);
1430         TEST_ASSERT_SUCCESS(status);
1431         result = uregex_lookingAt(clone1, 0, &status);
1432         TEST_ASSERT_SUCCESS(status);
1433         TEST_ASSERT(result==TRUE);
1434 
1435         status = U_ZERO_ERROR;
1436         uregex_setText(clone2, testString2, -1, &status);
1437         TEST_ASSERT_SUCCESS(status);
1438         result = uregex_lookingAt(clone2, 0, &status);
1439         TEST_ASSERT_SUCCESS(status);
1440         TEST_ASSERT(result==FALSE);
1441         result = uregex_find(clone2, 0, &status);
1442         TEST_ASSERT_SUCCESS(status);
1443         TEST_ASSERT(result==TRUE);
1444 
1445         uregex_close(clone1);
1446         uregex_close(clone2);
1447         uregex_close(clone3);
1448 
1449     }
1450 
1451     /*
1452      *  pattern() and patternText()
1453      */
1454     {
1455         const UChar  *resultPat;
1456         int32_t       resultLen;
1457         UText        *resultText;
1458         const char str_hello[] = { 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x00 }; /* hello */
1459         const char str_hel[] = { 0x68, 0x65, 0x6c, 0x00 }; /* hel */
1460         u_uastrncpy(pat, "hello", sizeof(pat)/2); /* for comparison */
1461         status = U_ZERO_ERROR;
1462 
1463         utext_openUTF8(&patternText, str_hello, -1, &status);
1464         re = uregex_open(pat, -1, 0, NULL, &status);
1465         resultPat = uregex_pattern(re, &resultLen, &status);
1466         TEST_ASSERT_SUCCESS(status);
1467 
1468         /* The TEST_ASSERT_SUCCESS above should change too... */
1469         if (U_SUCCESS(status)) {
1470             TEST_ASSERT(resultLen == -1);
1471             TEST_ASSERT(u_strcmp(resultPat, pat) == 0);
1472         }
1473 
1474         resultText = uregex_patternUText(re, &status);
1475         TEST_ASSERT_SUCCESS(status);
1476         TEST_ASSERT_UTEXT(str_hello, resultText);
1477 
1478         uregex_close(re);
1479 
1480         status = U_ZERO_ERROR;
1481         re = uregex_open(pat, 3, 0, NULL, &status);
1482         resultPat = uregex_pattern(re, &resultLen, &status);
1483         TEST_ASSERT_SUCCESS(status);
1484 
1485         /* The TEST_ASSERT_SUCCESS above should change too... */
1486         if (U_SUCCESS(status)) {
1487             TEST_ASSERT(resultLen == 3);
1488             TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0);
1489             TEST_ASSERT(u_strlen(resultPat) == 3);
1490         }
1491 
1492         resultText = uregex_patternUText(re, &status);
1493         TEST_ASSERT_SUCCESS(status);
1494         TEST_ASSERT_UTEXT(str_hel, resultText);
1495 
1496         uregex_close(re);
1497     }
1498 
1499     /*
1500      *  setUText() and lookingAt()
1501      */
1502     {
1503         UText  text1 = UTEXT_INITIALIZER;
1504         UText  text2 = UTEXT_INITIALIZER;
1505         UBool  result;
1506         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1507         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1508         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1509         status = U_ZERO_ERROR;
1510         utext_openUTF8(&text1, str_abcccd, -1, &status);
1511         utext_openUTF8(&text2, str_abcccxd, -1, &status);
1512 
1513         utext_openUTF8(&patternText, str_abcd, -1, &status);
1514         re = uregex_openUText(&patternText, 0, NULL, &status);
1515         TEST_ASSERT_SUCCESS(status);
1516 
1517         /* Operation before doing a setText should fail... */
1518         status = U_ZERO_ERROR;
1519         uregex_lookingAt(re, 0, &status);
1520         TEST_ASSERT( status== U_REGEX_INVALID_STATE);
1521 
1522         status = U_ZERO_ERROR;
1523         uregex_setUText(re, &text1, &status);
1524         result = uregex_lookingAt(re, 0, &status);
1525         TEST_ASSERT(result == TRUE);
1526         TEST_ASSERT_SUCCESS(status);
1527 
1528         status = U_ZERO_ERROR;
1529         uregex_setUText(re, &text2, &status);
1530         result = uregex_lookingAt(re, 0, &status);
1531         TEST_ASSERT(result == FALSE);
1532         TEST_ASSERT_SUCCESS(status);
1533 
1534         status = U_ZERO_ERROR;
1535         uregex_setUText(re, &text1, &status);
1536         result = uregex_lookingAt(re, 0, &status);
1537         TEST_ASSERT(result == TRUE);
1538         TEST_ASSERT_SUCCESS(status);
1539 
1540         uregex_close(re);
1541         utext_close(&text1);
1542         utext_close(&text2);
1543     }
1544 
1545 
1546     /*
1547      *  getText() and getUText()
1548      */
1549     {
1550         UText  text1 = UTEXT_INITIALIZER;
1551         UText  text2 = UTEXT_INITIALIZER;
1552         UChar  text2Chars[20];
1553         UText  *resultText;
1554         const UChar   *result;
1555         int32_t  textLength;
1556         const char str_abcccd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x65, 0x00 }; /* abcccd */
1557         const char str_abcccxd[] = { 0x62, 0x63, 0x64, 0x64, 0x64, 0x79, 0x65, 0x00 }; /* abcccxd */
1558         const char str_abcd[] = { 0x62, 0x63, 0x64, 0x2b, 0x65, 0x00 }; /* abc*d */
1559 
1560 
1561         status = U_ZERO_ERROR;
1562         utext_openUTF8(&text1, str_abcccd, -1, &status);
1563         u_uastrncpy(text2Chars, str_abcccxd, sizeof(text2)/2);
1564         utext_openUChars(&text2, text2Chars, -1, &status);
1565 
1566         utext_openUTF8(&patternText, str_abcd, -1, &status);
1567         re = uregex_openUText(&patternText, 0, NULL, &status);
1568 
1569         /* First set a UText */
1570         uregex_setUText(re, &text1, &status);
1571         resultText = uregex_getUText(re, NULL, &status);
1572         TEST_ASSERT_SUCCESS(status);
1573         TEST_ASSERT(resultText != &text1);
1574         utext_setNativeIndex(resultText, 0);
1575         utext_setNativeIndex(&text1, 0);
1576         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1577         utext_close(resultText);
1578 
1579         result = uregex_getText(re, &textLength, &status); /* flattens UText into buffer */
1580         TEST_ASSERT(textLength == -1 || textLength == 6);
1581         resultText = uregex_getUText(re, NULL, &status);
1582         TEST_ASSERT_SUCCESS(status);
1583         TEST_ASSERT(resultText != &text1);
1584         utext_setNativeIndex(resultText, 0);
1585         utext_setNativeIndex(&text1, 0);
1586         TEST_ASSERT(utext_compare(resultText, -1, &text1, -1) == 0);
1587         utext_close(resultText);
1588 
1589         /* Then set a UChar * */
1590         uregex_setText(re, text2Chars, 7, &status);
1591         resultText = uregex_getUText(re, NULL, &status);
1592         TEST_ASSERT_SUCCESS(status);
1593         utext_setNativeIndex(resultText, 0);
1594         utext_setNativeIndex(&text2, 0);
1595         TEST_ASSERT(utext_compare(resultText, -1, &text2, -1) == 0);
1596         utext_close(resultText);
1597         result = uregex_getText(re, &textLength, &status);
1598         TEST_ASSERT(textLength == 7);
1599 
1600         uregex_close(re);
1601         utext_close(&text1);
1602         utext_close(&text2);
1603     }
1604 
1605     /*
1606      *  matches()
1607      */
1608     {
1609         UText   text1 = UTEXT_INITIALIZER;
1610         UBool   result;
1611         UText   nullText = UTEXT_INITIALIZER;
1612         const char str_abcccde[] = { 0x61, 0x62, 0x63, 0x63, 0x63, 0x64, 0x65, 0x00 }; /* abcccde */
1613         const char str_abcd[] = { 0x61, 0x62, 0x63, 0x2a, 0x64, 0x00 }; /* abc*d */
1614 
1615         status = U_ZERO_ERROR;
1616         utext_openUTF8(&text1, str_abcccde, -1, &status);
1617         utext_openUTF8(&patternText, str_abcd, -1, &status);
1618         re = uregex_openUText(&patternText, 0, NULL, &status);
1619 
1620         uregex_setUText(re, &text1, &status);
1621         result = uregex_matches(re, 0, &status);
1622         TEST_ASSERT(result == FALSE);
1623         TEST_ASSERT_SUCCESS(status);
1624         uregex_close(re);
1625 
1626         status = U_ZERO_ERROR;
1627         re = uregex_openC(".?", 0, NULL, &status);
1628         uregex_setUText(re, &text1, &status);
1629         result = uregex_matches(re, 7, &status);
1630         TEST_ASSERT(result == TRUE);
1631         TEST_ASSERT_SUCCESS(status);
1632 
1633         status = U_ZERO_ERROR;
1634         utext_openUTF8(&nullText, "", -1, &status);
1635         uregex_setUText(re, &nullText, &status);
1636         TEST_ASSERT_SUCCESS(status);
1637         result = uregex_matches(re, 0, &status);
1638         TEST_ASSERT(result == TRUE);
1639         TEST_ASSERT_SUCCESS(status);
1640 
1641         uregex_close(re);
1642         utext_close(&text1);
1643         utext_close(&nullText);
1644     }
1645 
1646 
1647     /*
1648      *  lookingAt()    Used in setText test.
1649      */
1650 
1651 
1652     /*
1653      *  find(), findNext, start, end, reset
1654      */
1655     {
1656         UChar    text1[50];
1657         UBool    result;
1658         u_uastrncpy(text1, "012rx5rx890rxrx...",  sizeof(text1)/2);
1659         status = U_ZERO_ERROR;
1660         re = uregex_openC("rx", 0, NULL, &status);
1661 
1662         uregex_setText(re, text1, -1, &status);
1663         result = uregex_find(re, 0, &status);
1664         TEST_ASSERT(result == TRUE);
1665         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1666         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1667         TEST_ASSERT_SUCCESS(status);
1668 
1669         result = uregex_find(re, 9, &status);
1670         TEST_ASSERT(result == TRUE);
1671         TEST_ASSERT(uregex_start(re, 0, &status) == 11);
1672         TEST_ASSERT(uregex_end(re, 0, &status) == 13);
1673         TEST_ASSERT_SUCCESS(status);
1674 
1675         result = uregex_find(re, 14, &status);
1676         TEST_ASSERT(result == FALSE);
1677         TEST_ASSERT_SUCCESS(status);
1678 
1679         status = U_ZERO_ERROR;
1680         uregex_reset(re, 0, &status);
1681 
1682         result = uregex_findNext(re, &status);
1683         TEST_ASSERT(result == TRUE);
1684         TEST_ASSERT(uregex_start(re, 0, &status) == 3);
1685         TEST_ASSERT(uregex_end(re, 0, &status) == 5);
1686         TEST_ASSERT_SUCCESS(status);
1687 
1688         result = uregex_findNext(re, &status);
1689         TEST_ASSERT(result == TRUE);
1690         TEST_ASSERT(uregex_start(re, 0, &status) == 6);
1691         TEST_ASSERT(uregex_end(re, 0, &status) == 8);
1692         TEST_ASSERT_SUCCESS(status);
1693 
1694         status = U_ZERO_ERROR;
1695         uregex_reset(re, 12, &status);
1696 
1697         result = uregex_findNext(re, &status);
1698         TEST_ASSERT(result == TRUE);
1699         TEST_ASSERT(uregex_start(re, 0, &status) == 13);
1700         TEST_ASSERT(uregex_end(re, 0, &status) == 15);
1701         TEST_ASSERT_SUCCESS(status);
1702 
1703         result = uregex_findNext(re, &status);
1704         TEST_ASSERT(result == FALSE);
1705         TEST_ASSERT_SUCCESS(status);
1706 
1707         uregex_close(re);
1708     }
1709 
1710     /*
1711      *  group()
1712      */
1713     {
1714         UChar    text1[80];
1715         UText   *actual;
1716         UBool    result;
1717 
1718         const char str_abcinteriordef[] = { 0x61, 0x62, 0x63, 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x64, 0x65, 0x66, 0x00 }; /* abc interior def */
1719         const char str_interior[] = { 0x20, 0x69, 0x6e, 0x74, 0x65, 0x72, 0x69, 0x6f, 0x72, 0x20, 0x00 }; /* ' interior ' */
1720 
1721 
1722         u_uastrncpy(text1, "noise abc interior def, and this is off the end",  sizeof(text1)/2);
1723 
1724         status = U_ZERO_ERROR;
1725         re = uregex_openC("abc(.*?)def", 0, NULL, &status);
1726         TEST_ASSERT_SUCCESS(status);
1727 
1728         uregex_setText(re, text1, -1, &status);
1729         result = uregex_find(re, 0, &status);
1730         TEST_ASSERT(result==TRUE);
1731 
1732         /*  Capture Group 0, the full match.  Should succeed.  */
1733         status = U_ZERO_ERROR;
1734         actual = uregex_groupUTextDeep(re, 0, NULL, &status);
1735         TEST_ASSERT_SUCCESS(status);
1736         TEST_ASSERT_UTEXT(str_abcinteriordef, actual);
1737         utext_close(actual);
1738 
1739         /*  Capture Group 0 with shallow clone API.  Should succeed.  */
1740         status = U_ZERO_ERROR;
1741         {
1742             int64_t      group_len;
1743             int32_t      len16;
1744             UErrorCode   shallowStatus = U_ZERO_ERROR;
1745             int64_t      nativeIndex;
1746             UChar *groupChars;
1747             UText groupText = UTEXT_INITIALIZER;
1748 
1749             actual = uregex_groupUText(re, 0, NULL, &group_len, &status);
1750             TEST_ASSERT_SUCCESS(status);
1751 
1752             nativeIndex = utext_getNativeIndex(actual);
1753             /*  Following returns U_INDEX_OUTOFBOUNDS_ERROR... looks like a bug in ucstrFuncs UTextFuncs [utext.cpp]  */
1754             /*  len16 = utext_extract(actual, nativeIndex, nativeIndex + group_len, NULL, 0, &shallowStatus);  */
1755             len16 = group_len;
1756 
1757             groupChars = (UChar *)malloc(sizeof(UChar)*(len16+1));
1758             utext_extract(actual, nativeIndex, nativeIndex + group_len, groupChars, len16+1, &shallowStatus);
1759 
1760             utext_openUChars(&groupText, groupChars, len16, &shallowStatus);
1761 
1762             TEST_ASSERT_UTEXT(str_abcinteriordef, &groupText);
1763             utext_close(&groupText);
1764             free(groupChars);
1765         }
1766         utext_close(actual);
1767 
1768         /*  Capture group #1.  Should succeed. */
1769         status = U_ZERO_ERROR;
1770         actual = uregex_groupUTextDeep(re, 1, NULL, &status);
1771         TEST_ASSERT_SUCCESS(status);
1772         TEST_ASSERT_UTEXT(str_interior, actual);
1773         utext_close(actual);
1774 
1775         /*  Capture group out of range.  Error. */
1776         status = U_ZERO_ERROR;
1777         actual = uregex_groupUTextDeep(re, 2, NULL, &status);
1778         TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR);
1779         TEST_ASSERT(utext_nativeLength(actual) == 0);
1780         utext_close(actual);
1781 
1782         uregex_close(re);
1783 
1784     }
1785 
1786     /*
1787      *  replaceFirst()
1788      */
1789     {
1790         UChar    text1[80];
1791         UChar    text2[80];
1792         UText    replText = UTEXT_INITIALIZER;
1793         UText   *result;
1794         const char str_Replxxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace <aa> x1x x...x. */
1795         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1796         const char str_u00411U00000042a[] =  { 0x5c, 0x5c, 0x5c, 0x75, 0x30, 0x30, 0x34, 0x31, 0x24, 0x31, 0x5c, 0x55, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x34, 0x32, 0x24, 0x5c, 0x61, 0x00 }; /* \\\u0041$1\U00000042$\a */
1797         const char str_1x[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1798         const char str_ReplaceAaaBax1xxx[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x5c, 0x41, 0x61, 0x61, 0x42, 0x24, 0x61, 0x20, 0x78, 0x31, 0x78, 0x20, 0x78, 0x2e, 0x2e, 0x2e, 0x78, 0x2e, 0x00 }; /* Replace \AaaB$a x1x x...x. */
1799         status = U_ZERO_ERROR;
1800         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1801         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1802         utext_openUTF8(&replText, str_1x, -1, &status);
1803 
1804         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1805         TEST_ASSERT_SUCCESS(status);
1806 
1807         /*  Normal case, with match */
1808         uregex_setText(re, text1, -1, &status);
1809         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1810         TEST_ASSERT_SUCCESS(status);
1811         TEST_ASSERT_UTEXT(str_Replxxx, result);
1812         utext_close(result);
1813 
1814         /* No match.  Text should copy to output with no changes.  */
1815         uregex_setText(re, text2, -1, &status);
1816         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1817         TEST_ASSERT_SUCCESS(status);
1818         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1819         utext_close(result);
1820 
1821         /* Unicode escapes */
1822         uregex_setText(re, text1, -1, &status);
1823         utext_openUTF8(&replText, str_u00411U00000042a, -1, &status);
1824         result = uregex_replaceFirstUText(re, &replText, NULL, &status);
1825         TEST_ASSERT_SUCCESS(status);
1826         TEST_ASSERT_UTEXT(str_ReplaceAaaBax1xxx, result);
1827         utext_close(result);
1828 
1829         uregex_close(re);
1830         utext_close(&replText);
1831     }
1832 
1833 
1834     /*
1835      *  replaceAll()
1836      */
1837     {
1838         UChar    text1[80];
1839         UChar    text2[80];
1840         UText    replText = UTEXT_INITIALIZER;
1841         UText   *result;
1842         const char str_1[] = { 0x3c, 0x24, 0x31, 0x3e, 0x00 }; /* <$1> */
1843         const char str_Replaceaa1[] = { 0x52, 0x65, 0x70, 0x6c, 0x61, 0x63, 0x65, 0x20, 0x3c, 0x61, 0x61, 0x3e, 0x20, 0x3c, 0x31, 0x3e, 0x20, 0x3c, 0x2e, 0x2e, 0x2e, 0x3e, 0x2e, 0x00 }; /* Replace <aa> <1> <...>. */
1844         const char str_Nomatchhere[] = { 0x4e, 0x6f, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2e, 0x00 }; /* No match here. */
1845         status = U_ZERO_ERROR;
1846         u_uastrncpy(text1, "Replace xaax x1x x...x.",  sizeof(text1)/2);
1847         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1848         utext_openUTF8(&replText, str_1, -1, &status);
1849 
1850         re = uregex_openC("x(.*?)x", 0, NULL, &status);
1851         TEST_ASSERT_SUCCESS(status);
1852 
1853         /*  Normal case, with match */
1854         uregex_setText(re, text1, -1, &status);
1855         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1856         TEST_ASSERT_SUCCESS(status);
1857         TEST_ASSERT_UTEXT(str_Replaceaa1, result);
1858         utext_close(result);
1859 
1860         /* No match.  Text should copy to output with no changes.  */
1861         uregex_setText(re, text2, -1, &status);
1862         result = uregex_replaceAllUText(re, &replText, NULL, &status);
1863         TEST_ASSERT_SUCCESS(status);
1864         TEST_ASSERT_UTEXT(str_Nomatchhere, result);
1865         utext_close(result);
1866 
1867         uregex_close(re);
1868         utext_close(&replText);
1869     }
1870 
1871 
1872     /*
1873      *  appendReplacement()
1874      */
1875     {
1876         UChar    text[100];
1877         UChar    repl[100];
1878         UChar    buf[100];
1879         UChar   *bufPtr;
1880         int32_t  bufCap;
1881 
1882         status = U_ZERO_ERROR;
1883         re = uregex_openC(".*", 0, 0, &status);
1884         TEST_ASSERT_SUCCESS(status);
1885 
1886         u_uastrncpy(text, "whatever",  sizeof(text)/2);
1887         u_uastrncpy(repl, "some other", sizeof(repl)/2);
1888         uregex_setText(re, text, -1, &status);
1889 
1890         /* match covers whole target string */
1891         uregex_find(re, 0, &status);
1892         TEST_ASSERT_SUCCESS(status);
1893         bufPtr = buf;
1894         bufCap = sizeof(buf) / 2;
1895         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1896         TEST_ASSERT_SUCCESS(status);
1897         TEST_ASSERT_STRING("some other", buf, TRUE);
1898 
1899         /* Match has \u \U escapes */
1900         uregex_find(re, 0, &status);
1901         TEST_ASSERT_SUCCESS(status);
1902         bufPtr = buf;
1903         bufCap = sizeof(buf) / 2;
1904         u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2);
1905         uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status);
1906         TEST_ASSERT_SUCCESS(status);
1907         TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE);
1908 
1909         uregex_close(re);
1910     }
1911 
1912 
1913     /*
1914      *  appendReplacement(), appendTail() checked in replaceFirst(), replaceAll().
1915      */
1916 
1917     /*
1918      *  splitUText()
1919      */
1920     {
1921         UChar    textToSplit[80];
1922         UChar    text2[80];
1923         UText    *fields[10];
1924         int32_t  numFields;
1925         int32_t i;
1926 
1927         u_uastrncpy(textToSplit, "first : second:  third",  sizeof(textToSplit)/2);
1928         u_uastrncpy(text2, "No match here.",  sizeof(text2)/2);
1929 
1930         status = U_ZERO_ERROR;
1931         re = uregex_openC(":", 0, NULL, &status);
1932 
1933 
1934         /*  Simple split */
1935 
1936         uregex_setText(re, textToSplit, -1, &status);
1937         TEST_ASSERT_SUCCESS(status);
1938 
1939         /* The TEST_ASSERT_SUCCESS call above should change too... */
1940         if (U_SUCCESS(status)) {
1941             memset(fields, 0, sizeof(fields));
1942             numFields = uregex_splitUText(re, fields, 10, &status);
1943             TEST_ASSERT_SUCCESS(status);
1944 
1945             /* The TEST_ASSERT_SUCCESS call above should change too... */
1946             if(U_SUCCESS(status)) {
1947               const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* 'first ' */
1948               const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /* '  second' */
1949               const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /* '  third' */
1950                 TEST_ASSERT(numFields == 3);
1951                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1952                 TEST_ASSERT_UTEXT(str_second, fields[1]);
1953                 TEST_ASSERT_UTEXT(str_third, fields[2]);
1954                 TEST_ASSERT(fields[3] == NULL);
1955             }
1956             for(i = 0; i < numFields; i++) {
1957                 utext_close(fields[i]);
1958             }
1959         }
1960 
1961         uregex_close(re);
1962 
1963 
1964         /*  Split with too few output strings available */
1965         status = U_ZERO_ERROR;
1966         re = uregex_openC(":", 0, NULL, &status);
1967         uregex_setText(re, textToSplit, -1, &status);
1968         TEST_ASSERT_SUCCESS(status);
1969 
1970         /* The TEST_ASSERT_SUCCESS call above should change too... */
1971         if(U_SUCCESS(status)) {
1972             fields[0] = NULL;
1973             fields[1] = NULL;
1974             fields[2] = &patternText;
1975             numFields = uregex_splitUText(re, fields, 2, &status);
1976             TEST_ASSERT_SUCCESS(status);
1977 
1978             /* The TEST_ASSERT_SUCCESS call above should change too... */
1979             if(U_SUCCESS(status)) {
1980                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
1981                 const char str_secondthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3a, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second:  third */
1982                 TEST_ASSERT(numFields == 2);
1983                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
1984                 TEST_ASSERT_UTEXT(str_secondthird, fields[1]);
1985                 TEST_ASSERT(fields[2] == &patternText);
1986             }
1987             for(i = 0; i < numFields; i++) {
1988                 utext_close(fields[i]);
1989             }
1990         }
1991 
1992         uregex_close(re);
1993     }
1994 
1995     /* splitUText(), part 2.  Patterns with capture groups.  The capture group text
1996      *                   comes out as additional fields.  */
1997     {
1998         UChar    textToSplit[80];
1999         UText    *fields[10];
2000         int32_t  numFields;
2001         int32_t i;
2002 
2003         u_uastrncpy(textToSplit, "first <tag-a> second<tag-b>  third",  sizeof(textToSplit)/2);
2004 
2005         status = U_ZERO_ERROR;
2006         re = uregex_openC("<(.*?)>", 0, NULL, &status);
2007 
2008         uregex_setText(re, textToSplit, -1, &status);
2009         TEST_ASSERT_SUCCESS(status);
2010 
2011         /* The TEST_ASSERT_SUCCESS call above should change too... */
2012         if(U_SUCCESS(status)) {
2013             memset(fields, 0, sizeof(fields));
2014             numFields = uregex_splitUText(re, fields, 10, &status);
2015             TEST_ASSERT_SUCCESS(status);
2016 
2017             /* The TEST_ASSERT_SUCCESS call above should change too... */
2018             if(U_SUCCESS(status)) {
2019                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2020                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2021                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2022                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2023                 const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2024 
2025                 TEST_ASSERT(numFields == 5);
2026                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2027                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2028                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2029                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2030                 TEST_ASSERT_UTEXT(str_third, fields[4]);
2031                 TEST_ASSERT(fields[5] == NULL);
2032             }
2033             for(i = 0; i < numFields; i++) {
2034                 utext_close(fields[i]);
2035             }
2036         }
2037 
2038         /*  Split with too few output strings available (2) */
2039         status = U_ZERO_ERROR;
2040         fields[0] = NULL;
2041         fields[1] = NULL;
2042         fields[2] = &patternText;
2043         numFields = uregex_splitUText(re, fields, 2, &status);
2044         TEST_ASSERT_SUCCESS(status);
2045 
2046         /* The TEST_ASSERT_SUCCESS call above should change too... */
2047         if(U_SUCCESS(status)) {
2048             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2049             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2050             TEST_ASSERT(numFields == 2);
2051             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2052             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[1]);
2053             TEST_ASSERT(fields[2] == &patternText);
2054         }
2055         for(i = 0; i < numFields; i++) {
2056             utext_close(fields[i]);
2057         }
2058 
2059 
2060         /*  Split with too few output strings available (3) */
2061         status = U_ZERO_ERROR;
2062         fields[0] = NULL;
2063         fields[1] = NULL;
2064         fields[2] = NULL;
2065         fields[3] = &patternText;
2066         numFields = uregex_splitUText(re, fields, 3, &status);
2067         TEST_ASSERT_SUCCESS(status);
2068 
2069         /* The TEST_ASSERT_SUCCESS call above should change too... */
2070         if(U_SUCCESS(status)) {
2071             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2072             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2073             const char str_secondtagbthird[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x3c, 0x74, 0x61, 0x67, 0x2d, 0x62, 0x3e, 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*  second<tag-b>  third */
2074             TEST_ASSERT(numFields == 3);
2075             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2076             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2077             TEST_ASSERT_UTEXT(str_secondtagbthird, fields[2]);
2078             TEST_ASSERT(fields[3] == &patternText);
2079         }
2080         for(i = 0; i < numFields; i++) {
2081             utext_close(fields[i]);
2082         }
2083 
2084         /*  Split with just enough output strings available (5) */
2085         status = U_ZERO_ERROR;
2086         fields[0] = NULL;
2087         fields[1] = NULL;
2088         fields[2] = NULL;
2089         fields[3] = NULL;
2090         fields[4] = NULL;
2091         fields[5] = &patternText;
2092         numFields = uregex_splitUText(re, fields, 5, &status);
2093         TEST_ASSERT_SUCCESS(status);
2094 
2095         /* The TEST_ASSERT_SUCCESS call above should change too... */
2096         if(U_SUCCESS(status)) {
2097             const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2098             const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2099             const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2100             const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2101             const char str_third[] = { 0x20, 0x20, 0x74, 0x68, 0x69, 0x72, 0x64, 0x00 }; /*   third */
2102 
2103             TEST_ASSERT(numFields == 5);
2104             TEST_ASSERT_UTEXT(str_first,  fields[0]);
2105             TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2106             TEST_ASSERT_UTEXT(str_second, fields[2]);
2107             TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2108             TEST_ASSERT_UTEXT(str_third, fields[4]);
2109             TEST_ASSERT(fields[5] == &patternText);
2110         }
2111         for(i = 0; i < numFields; i++) {
2112             utext_close(fields[i]);
2113         }
2114 
2115         /* Split, end of text is a field delimiter.   */
2116         status = U_ZERO_ERROR;
2117         uregex_setText(re, textToSplit, strlen("first <tag-a> second<tag-b>"), &status);
2118         TEST_ASSERT_SUCCESS(status);
2119 
2120         /* The TEST_ASSERT_SUCCESS call above should change too... */
2121         if(U_SUCCESS(status)) {
2122             memset(fields, 0, sizeof(fields));
2123             fields[9] = &patternText;
2124             numFields = uregex_splitUText(re, fields, 9, &status);
2125             TEST_ASSERT_SUCCESS(status);
2126 
2127             /* The TEST_ASSERT_SUCCESS call above should change too... */
2128             if(U_SUCCESS(status)) {
2129                 const char str_first[] = { 0x66, 0x69, 0x72, 0x73, 0x74, 0x20, 0x00 }; /* first  */
2130                 const char str_taga[] = { 0x74, 0x61, 0x67, 0x2d, 0x61, 0x00 }; /* tag-a */
2131                 const char str_second[] = { 0x20, 0x73, 0x65, 0x63, 0x6f, 0x6e, 0x64, 0x00 }; /*  second */
2132                 const char str_tagb[] = { 0x74, 0x61, 0x67, 0x2d, 0x62, 0x00 }; /* tag-b */
2133 
2134                 TEST_ASSERT(numFields == 4);
2135                 TEST_ASSERT_UTEXT(str_first,  fields[0]);
2136                 TEST_ASSERT_UTEXT(str_taga,   fields[1]);
2137                 TEST_ASSERT_UTEXT(str_second, fields[2]);
2138                 TEST_ASSERT_UTEXT(str_tagb,   fields[3]);
2139                 TEST_ASSERT(fields[4] == NULL);
2140                 TEST_ASSERT(fields[8] == NULL);
2141                 TEST_ASSERT(fields[9] == &patternText);
2142             }
2143             for(i = 0; i < numFields; i++) {
2144                 utext_close(fields[i]);
2145             }
2146         }
2147 
2148         uregex_close(re);
2149     }
2150     utext_close(&patternText);
2151 }
2152 
2153 #endif   /*  !UCONFIG_NO_REGULAR_EXPRESSIONS */
2154