• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2004-2013, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  uregex.cpp
7 */
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
12 
13 #include "unicode/regex.h"
14 #include "unicode/uregex.h"
15 #include "unicode/unistr.h"
16 #include "unicode/ustring.h"
17 #include "unicode/uchar.h"
18 #include "unicode/uobject.h"
19 #include "unicode/utf16.h"
20 #include "umutex.h"
21 #include "uassert.h"
22 #include "cmemory.h"
23 
24 #include "regextxt.h"
25 
26 #include <stdio.h>
27 
28 U_NAMESPACE_BEGIN
29 
30 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
31 
32 struct RegularExpression: public UMemory {
33 public:
34     RegularExpression();
35     ~RegularExpression();
36     int32_t           fMagic;
37     RegexPattern     *fPat;
38     u_atomic_int32_t *fPatRefCount;
39     UChar            *fPatString;
40     int32_t           fPatStringLen;
41     RegexMatcher     *fMatcher;
42     const UChar      *fText;         // Text from setText()
43     int32_t           fTextLength;   // Length provided by user with setText(), which
44                                      //  may be -1.
45     UBool             fOwnsText;
46 };
47 
48 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
49 
RegularExpression()50 RegularExpression::RegularExpression() {
51     fMagic        = REXP_MAGIC;
52     fPat          = NULL;
53     fPatRefCount  = NULL;
54     fPatString    = NULL;
55     fPatStringLen = 0;
56     fMatcher      = NULL;
57     fText         = NULL;
58     fTextLength   = 0;
59     fOwnsText     = FALSE;
60 }
61 
~RegularExpression()62 RegularExpression::~RegularExpression() {
63     delete fMatcher;
64     fMatcher = NULL;
65     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
66         delete fPat;
67         uprv_free(fPatString);
68         uprv_free((void *)fPatRefCount);
69     }
70     if (fOwnsText && fText!=NULL) {
71         uprv_free((void *)fText);
72     }
73     fMagic = 0;
74 }
75 
76 U_NAMESPACE_END
77 
78 U_NAMESPACE_USE
79 
80 //----------------------------------------------------------------------------------------
81 //
82 //   validateRE    Do boilerplate style checks on API function parameters.
83 //                 Return TRUE if they look OK.
84 //----------------------------------------------------------------------------------------
validateRE(const RegularExpression * re,UBool requiresText,UErrorCode * status)85 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
86     if (U_FAILURE(*status)) {
87         return FALSE;
88     }
89     if (re == NULL || re->fMagic != REXP_MAGIC) {
90         *status = U_ILLEGAL_ARGUMENT_ERROR;
91         return FALSE;
92     }
93     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
94     if (requiresText && re->fText == NULL && !re->fOwnsText) {
95         *status = U_REGEX_INVALID_STATE;
96         return FALSE;
97     }
98     return TRUE;
99 }
100 
101 //----------------------------------------------------------------------------------------
102 //
103 //    uregex_open
104 //
105 //----------------------------------------------------------------------------------------
106 U_CAPI URegularExpression *  U_EXPORT2
uregex_open(const UChar * pattern,int32_t patternLength,uint32_t flags,UParseError * pe,UErrorCode * status)107 uregex_open( const  UChar          *pattern,
108                     int32_t         patternLength,
109                     uint32_t        flags,
110                     UParseError    *pe,
111                     UErrorCode     *status) {
112 
113     if (U_FAILURE(*status)) {
114         return NULL;
115     }
116     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
117         *status = U_ILLEGAL_ARGUMENT_ERROR;
118         return NULL;
119     }
120     int32_t actualPatLen = patternLength;
121     if (actualPatLen == -1) {
122         actualPatLen = u_strlen(pattern);
123     }
124 
125     RegularExpression  *re     = new RegularExpression;
126     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
127     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
128     if (re == NULL || refC == NULL || patBuf == NULL) {
129         *status = U_MEMORY_ALLOCATION_ERROR;
130         delete re;
131         uprv_free((void *)refC);
132         uprv_free(patBuf);
133         return NULL;
134     }
135     re->fPatRefCount = refC;
136     *re->fPatRefCount = 1;
137 
138     //
139     // Make a copy of the pattern string, so we can return it later if asked.
140     //    For compiling the pattern, we will use a UText wrapper around
141     //    this local copy, to avoid making even more copies.
142     //
143     re->fPatString    = patBuf;
144     re->fPatStringLen = patternLength;
145     u_memcpy(patBuf, pattern, actualPatLen);
146     patBuf[actualPatLen] = 0;
147 
148     UText patText = UTEXT_INITIALIZER;
149     utext_openUChars(&patText, patBuf, patternLength, status);
150 
151     //
152     // Compile the pattern
153     //
154     if (pe != NULL) {
155         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
156     } else {
157         re->fPat = RegexPattern::compile(&patText, flags, *status);
158     }
159     utext_close(&patText);
160 
161     if (U_FAILURE(*status)) {
162         goto ErrorExit;
163     }
164 
165     //
166     // Create the matcher object
167     //
168     re->fMatcher = re->fPat->matcher(*status);
169     if (U_SUCCESS(*status)) {
170         return (URegularExpression*)re;
171     }
172 
173 ErrorExit:
174     delete re;
175     return NULL;
176 
177 }
178 
179 //----------------------------------------------------------------------------------------
180 //
181 //    uregex_openUText
182 //
183 //----------------------------------------------------------------------------------------
184 U_CAPI URegularExpression *  U_EXPORT2
uregex_openUText(UText * pattern,uint32_t flags,UParseError * pe,UErrorCode * status)185 uregex_openUText(UText          *pattern,
186                  uint32_t        flags,
187                  UParseError    *pe,
188                  UErrorCode     *status) {
189 
190     if (U_FAILURE(*status)) {
191         return NULL;
192     }
193     if (pattern == NULL) {
194         *status = U_ILLEGAL_ARGUMENT_ERROR;
195         return NULL;
196     }
197 
198     int64_t patternNativeLength = utext_nativeLength(pattern);
199 
200     if (patternNativeLength == 0) {
201         *status = U_ILLEGAL_ARGUMENT_ERROR;
202         return NULL;
203     }
204 
205     RegularExpression *re     = new RegularExpression;
206 
207     UErrorCode lengthStatus = U_ZERO_ERROR;
208     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
209 
210     u_atomic_int32_t   *refC   = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
211     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
212     if (re == NULL || refC == NULL || patBuf == NULL) {
213         *status = U_MEMORY_ALLOCATION_ERROR;
214         delete re;
215         uprv_free((void *)refC);
216         uprv_free(patBuf);
217         return NULL;
218     }
219     re->fPatRefCount = refC;
220     *re->fPatRefCount = 1;
221 
222     //
223     // Make a copy of the pattern string, so we can return it later if asked.
224     //    For compiling the pattern, we will use a read-only UText wrapper
225     //    around this local copy, to avoid making even more copies.
226     //
227     re->fPatString    = patBuf;
228     re->fPatStringLen = pattern16Length;
229     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
230 
231     UText patText = UTEXT_INITIALIZER;
232     utext_openUChars(&patText, patBuf, pattern16Length, status);
233 
234     //
235     // Compile the pattern
236     //
237     if (pe != NULL) {
238         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
239     } else {
240         re->fPat = RegexPattern::compile(&patText, flags, *status);
241     }
242     utext_close(&patText);
243 
244     if (U_FAILURE(*status)) {
245         goto ErrorExit;
246     }
247 
248     //
249     // Create the matcher object
250     //
251     re->fMatcher = re->fPat->matcher(*status);
252     if (U_SUCCESS(*status)) {
253         return (URegularExpression*)re;
254     }
255 
256 ErrorExit:
257     delete re;
258     return NULL;
259 
260 }
261 
262 //----------------------------------------------------------------------------------------
263 //
264 //    uregex_close
265 //
266 //----------------------------------------------------------------------------------------
267 U_CAPI void  U_EXPORT2
uregex_close(URegularExpression * re2)268 uregex_close(URegularExpression  *re2) {
269     RegularExpression *re = (RegularExpression*)re2;
270     UErrorCode  status = U_ZERO_ERROR;
271     if (validateRE(re, FALSE, &status) == FALSE) {
272         return;
273     }
274     delete re;
275 }
276 
277 
278 //----------------------------------------------------------------------------------------
279 //
280 //    uregex_clone
281 //
282 //----------------------------------------------------------------------------------------
283 U_CAPI URegularExpression * U_EXPORT2
uregex_clone(const URegularExpression * source2,UErrorCode * status)284 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
285     RegularExpression *source = (RegularExpression*)source2;
286     if (validateRE(source, FALSE, status) == FALSE) {
287         return NULL;
288     }
289 
290     RegularExpression *clone = new RegularExpression;
291     if (clone == NULL) {
292         *status = U_MEMORY_ALLOCATION_ERROR;
293         return NULL;
294     }
295 
296     clone->fMatcher = source->fPat->matcher(*status);
297     if (U_FAILURE(*status)) {
298         delete clone;
299         return NULL;
300     }
301 
302     clone->fPat          = source->fPat;
303     clone->fPatRefCount  = source->fPatRefCount;
304     clone->fPatString    = source->fPatString;
305     clone->fPatStringLen = source->fPatStringLen;
306     umtx_atomic_inc(source->fPatRefCount);
307     // Note:  fText is not cloned.
308 
309     return (URegularExpression*)clone;
310 }
311 
312 
313 
314 
315 //------------------------------------------------------------------------------
316 //
317 //    uregex_pattern
318 //
319 //------------------------------------------------------------------------------
320 U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression * regexp2,int32_t * patLength,UErrorCode * status)321 uregex_pattern(const  URegularExpression *regexp2,
322                       int32_t            *patLength,
323                       UErrorCode         *status)  {
324     RegularExpression *regexp = (RegularExpression*)regexp2;
325 
326     if (validateRE(regexp, FALSE, status) == FALSE) {
327         return NULL;
328     }
329     if (patLength != NULL) {
330         *patLength = regexp->fPatStringLen;
331     }
332     return regexp->fPatString;
333 }
334 
335 
336 //------------------------------------------------------------------------------
337 //
338 //    uregex_patternUText
339 //
340 //------------------------------------------------------------------------------
341 U_CAPI UText * U_EXPORT2
uregex_patternUText(const URegularExpression * regexp2,UErrorCode * status)342 uregex_patternUText(const URegularExpression *regexp2,
343                           UErrorCode         *status)  {
344     RegularExpression *regexp = (RegularExpression*)regexp2;
345     return regexp->fPat->patternText(*status);
346 }
347 
348 
349 //------------------------------------------------------------------------------
350 //
351 //    uregex_flags
352 //
353 //------------------------------------------------------------------------------
354 U_CAPI int32_t U_EXPORT2
uregex_flags(const URegularExpression * regexp2,UErrorCode * status)355 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
356     RegularExpression *regexp = (RegularExpression*)regexp2;
357     if (validateRE(regexp, FALSE, status) == FALSE) {
358         return 0;
359     }
360     int32_t flags = regexp->fPat->flags();
361     return flags;
362 }
363 
364 
365 //------------------------------------------------------------------------------
366 //
367 //    uregex_setText
368 //
369 //------------------------------------------------------------------------------
370 U_CAPI void U_EXPORT2
uregex_setText(URegularExpression * regexp2,const UChar * text,int32_t textLength,UErrorCode * status)371 uregex_setText(URegularExpression *regexp2,
372                const UChar        *text,
373                int32_t             textLength,
374                UErrorCode         *status)  {
375     RegularExpression *regexp = (RegularExpression*)regexp2;
376     if (validateRE(regexp, FALSE, status) == FALSE) {
377         return;
378     }
379     if (text == NULL || textLength < -1) {
380         *status = U_ILLEGAL_ARGUMENT_ERROR;
381         return;
382     }
383 
384     if (regexp->fOwnsText && regexp->fText != NULL) {
385         uprv_free((void *)regexp->fText);
386     }
387 
388     regexp->fText       = text;
389     regexp->fTextLength = textLength;
390     regexp->fOwnsText   = FALSE;
391 
392     UText input = UTEXT_INITIALIZER;
393     utext_openUChars(&input, text, textLength, status);
394     regexp->fMatcher->reset(&input);
395     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
396 }
397 
398 
399 //------------------------------------------------------------------------------
400 //
401 //    uregex_setUText
402 //
403 //------------------------------------------------------------------------------
404 U_CAPI void U_EXPORT2
uregex_setUText(URegularExpression * regexp2,UText * text,UErrorCode * status)405 uregex_setUText(URegularExpression *regexp2,
406                 UText              *text,
407                 UErrorCode         *status) {
408     RegularExpression *regexp = (RegularExpression*)regexp2;
409     if (validateRE(regexp, FALSE, status) == FALSE) {
410         return;
411     }
412     if (text == NULL) {
413         *status = U_ILLEGAL_ARGUMENT_ERROR;
414         return;
415     }
416 
417     if (regexp->fOwnsText && regexp->fText != NULL) {
418         uprv_free((void *)regexp->fText);
419     }
420 
421     regexp->fText       = NULL; // only fill it in on request
422     regexp->fTextLength = -1;
423     regexp->fOwnsText   = TRUE;
424     regexp->fMatcher->reset(text);
425 }
426 
427 
428 
429 //------------------------------------------------------------------------------
430 //
431 //    uregex_getText
432 //
433 //------------------------------------------------------------------------------
434 U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression * regexp2,int32_t * textLength,UErrorCode * status)435 uregex_getText(URegularExpression *regexp2,
436                int32_t            *textLength,
437                UErrorCode         *status)  {
438     RegularExpression *regexp = (RegularExpression*)regexp2;
439     if (validateRE(regexp, FALSE, status) == FALSE) {
440         return NULL;
441     }
442 
443     if (regexp->fText == NULL) {
444         // need to fill in the text
445         UText *inputText = regexp->fMatcher->inputText();
446         int64_t inputNativeLength = utext_nativeLength(inputText);
447         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
448             regexp->fText = inputText->chunkContents;
449             regexp->fTextLength = (int32_t)inputNativeLength;
450             regexp->fOwnsText = FALSE; // because the UText owns it
451         } else {
452             UErrorCode lengthStatus = U_ZERO_ERROR;
453             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
454             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
455 
456             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
457             regexp->fText = inputChars;
458             regexp->fOwnsText = TRUE; // should already be set but just in case
459         }
460     }
461 
462     if (textLength != NULL) {
463         *textLength = regexp->fTextLength;
464     }
465     return regexp->fText;
466 }
467 
468 
469 //------------------------------------------------------------------------------
470 //
471 //    uregex_getUText
472 //
473 //------------------------------------------------------------------------------
474 U_CAPI UText * U_EXPORT2
uregex_getUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)475 uregex_getUText(URegularExpression *regexp2,
476                 UText              *dest,
477                 UErrorCode         *status)  {
478     RegularExpression *regexp = (RegularExpression*)regexp2;
479     if (validateRE(regexp, FALSE, status) == FALSE) {
480         return dest;
481     }
482     return regexp->fMatcher->getInput(dest, *status);
483 }
484 
485 
486 //------------------------------------------------------------------------------
487 //
488 //    uregex_refreshUText
489 //
490 //------------------------------------------------------------------------------
491 U_CAPI void U_EXPORT2
uregex_refreshUText(URegularExpression * regexp2,UText * text,UErrorCode * status)492 uregex_refreshUText(URegularExpression *regexp2,
493                     UText              *text,
494                     UErrorCode         *status) {
495     RegularExpression *regexp = (RegularExpression*)regexp2;
496     if (validateRE(regexp, FALSE, status) == FALSE) {
497         return;
498     }
499     regexp->fMatcher->refreshInputText(text, *status);
500 }
501 
502 
503 //------------------------------------------------------------------------------
504 //
505 //    uregex_matches
506 //
507 //------------------------------------------------------------------------------
508 U_CAPI UBool U_EXPORT2
uregex_matches(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)509 uregex_matches(URegularExpression *regexp2,
510                int32_t            startIndex,
511                UErrorCode        *status)  {
512     return uregex_matches64( regexp2, (int64_t)startIndex, status);
513 }
514 
515 U_CAPI UBool U_EXPORT2
uregex_matches64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)516 uregex_matches64(URegularExpression *regexp2,
517                  int64_t            startIndex,
518                  UErrorCode        *status)  {
519     RegularExpression *regexp = (RegularExpression*)regexp2;
520     UBool result = FALSE;
521     if (validateRE(regexp, TRUE, status) == FALSE) {
522         return result;
523     }
524     if (startIndex == -1) {
525         result = regexp->fMatcher->matches(*status);
526     } else {
527         result = regexp->fMatcher->matches(startIndex, *status);
528     }
529     return result;
530 }
531 
532 
533 //------------------------------------------------------------------------------
534 //
535 //    uregex_lookingAt
536 //
537 //------------------------------------------------------------------------------
538 U_CAPI UBool U_EXPORT2
uregex_lookingAt(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)539 uregex_lookingAt(URegularExpression *regexp2,
540                  int32_t             startIndex,
541                  UErrorCode         *status)  {
542     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
543 }
544 
545 U_CAPI UBool U_EXPORT2
uregex_lookingAt64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)546 uregex_lookingAt64(URegularExpression *regexp2,
547                    int64_t             startIndex,
548                    UErrorCode         *status)  {
549     RegularExpression *regexp = (RegularExpression*)regexp2;
550     UBool result = FALSE;
551     if (validateRE(regexp, TRUE, status) == FALSE) {
552         return result;
553     }
554     if (startIndex == -1) {
555         result = regexp->fMatcher->lookingAt(*status);
556     } else {
557         result = regexp->fMatcher->lookingAt(startIndex, *status);
558     }
559     return result;
560 }
561 
562 
563 
564 //------------------------------------------------------------------------------
565 //
566 //    uregex_find
567 //
568 //------------------------------------------------------------------------------
569 U_CAPI UBool U_EXPORT2
uregex_find(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)570 uregex_find(URegularExpression *regexp2,
571             int32_t             startIndex,
572             UErrorCode         *status)  {
573     return uregex_find64( regexp2, (int64_t)startIndex, status);
574 }
575 
576 U_CAPI UBool U_EXPORT2
uregex_find64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)577 uregex_find64(URegularExpression *regexp2,
578               int64_t             startIndex,
579               UErrorCode         *status)  {
580     RegularExpression *regexp = (RegularExpression*)regexp2;
581     UBool result = FALSE;
582     if (validateRE(regexp, TRUE, status) == FALSE) {
583         return result;
584     }
585     if (startIndex == -1) {
586         regexp->fMatcher->resetPreserveRegion();
587         result = regexp->fMatcher->find();
588     } else {
589         result = regexp->fMatcher->find(startIndex, *status);
590     }
591     return result;
592 }
593 
594 
595 //------------------------------------------------------------------------------
596 //
597 //    uregex_findNext
598 //
599 //------------------------------------------------------------------------------
600 U_CAPI UBool U_EXPORT2
uregex_findNext(URegularExpression * regexp2,UErrorCode * status)601 uregex_findNext(URegularExpression *regexp2,
602                 UErrorCode         *status)  {
603     RegularExpression *regexp = (RegularExpression*)regexp2;
604     if (validateRE(regexp, TRUE, status) == FALSE) {
605         return FALSE;
606     }
607     UBool result = regexp->fMatcher->find();
608     return result;
609 }
610 
611 //------------------------------------------------------------------------------
612 //
613 //    uregex_groupCount
614 //
615 //------------------------------------------------------------------------------
616 U_CAPI int32_t U_EXPORT2
uregex_groupCount(URegularExpression * regexp2,UErrorCode * status)617 uregex_groupCount(URegularExpression *regexp2,
618                   UErrorCode         *status)  {
619     RegularExpression *regexp = (RegularExpression*)regexp2;
620     if (validateRE(regexp, FALSE, status) == FALSE) {
621         return 0;
622     }
623     int32_t  result = regexp->fMatcher->groupCount();
624     return result;
625 }
626 
627 
628 //------------------------------------------------------------------------------
629 //
630 //    uregex_group
631 //
632 //------------------------------------------------------------------------------
633 U_CAPI int32_t U_EXPORT2
uregex_group(URegularExpression * regexp2,int32_t groupNum,UChar * dest,int32_t destCapacity,UErrorCode * status)634 uregex_group(URegularExpression *regexp2,
635              int32_t             groupNum,
636              UChar              *dest,
637              int32_t             destCapacity,
638              UErrorCode          *status)  {
639     RegularExpression *regexp = (RegularExpression*)regexp2;
640     if (validateRE(regexp, TRUE, status) == FALSE) {
641         return 0;
642     }
643     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
644         *status = U_ILLEGAL_ARGUMENT_ERROR;
645         return 0;
646     }
647 
648     if (destCapacity == 0 || regexp->fText != NULL) {
649         // If preflighting or if we already have the text as UChars,
650         // this is a little cheaper than going through uregex_groupUTextDeep()
651 
652         //
653         // Pick up the range of characters from the matcher
654         //
655         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
656         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
657         if (U_FAILURE(*status)) {
658             return 0;
659         }
660 
661         //
662         // Trim length based on buffer capacity
663         //
664         int32_t fullLength = endIx - startIx;
665         int32_t copyLength = fullLength;
666         if (copyLength < destCapacity) {
667             dest[copyLength] = 0;
668         } else if (copyLength == destCapacity) {
669             *status = U_STRING_NOT_TERMINATED_WARNING;
670         } else {
671             copyLength = destCapacity;
672             *status = U_BUFFER_OVERFLOW_ERROR;
673         }
674 
675         //
676         // Copy capture group to user's buffer
677         //
678         if (copyLength > 0) {
679             u_memcpy(dest, &regexp->fText[startIx], copyLength);
680         }
681         return fullLength;
682     } else {
683         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
684         int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
685         utext_close(groupText);
686         return result;
687     }
688 }
689 
690 
691 //------------------------------------------------------------------------------
692 //
693 //    uregex_groupUText
694 //
695 //------------------------------------------------------------------------------
696 U_CAPI UText * U_EXPORT2
uregex_groupUText(URegularExpression * regexp2,int32_t groupNum,UText * dest,int64_t * groupLength,UErrorCode * status)697 uregex_groupUText(URegularExpression *regexp2,
698                   int32_t             groupNum,
699                   UText              *dest,
700                   int64_t            *groupLength,
701                   UErrorCode         *status)  {
702     RegularExpression *regexp = (RegularExpression*)regexp2;
703     if (validateRE(regexp, TRUE, status) == FALSE) {
704         UErrorCode emptyTextStatus = U_ZERO_ERROR;
705         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
706     }
707 
708     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
709 }
710 
711 //------------------------------------------------------------------------------
712 //
713 //    uregex_groupUTextDeep
714 //
715 //------------------------------------------------------------------------------
716 U_CAPI UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression * regexp2,int32_t groupNum,UText * dest,UErrorCode * status)717 uregex_groupUTextDeep(URegularExpression *regexp2,
718                   int32_t             groupNum,
719                   UText              *dest,
720                   UErrorCode         *status)  {
721     RegularExpression *regexp = (RegularExpression*)regexp2;
722     if (validateRE(regexp, TRUE, status) == FALSE) {
723         UErrorCode emptyTextStatus = U_ZERO_ERROR;
724         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
725     }
726 
727     if (regexp->fText != NULL) {
728         //
729         // Pick up the range of characters from the matcher
730         // and use our already-extracted characters
731         //
732         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
733         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
734         if (U_FAILURE(*status)) {
735             UErrorCode emptyTextStatus = U_ZERO_ERROR;
736             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
737         }
738 
739         if (dest) {
740             utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
741         } else {
742             UText groupText = UTEXT_INITIALIZER;
743             utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
744             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
745             utext_close(&groupText);
746         }
747 
748         return dest;
749     } else {
750         return regexp->fMatcher->group(groupNum, dest, *status);
751     }
752 }
753 
754 //------------------------------------------------------------------------------
755 //
756 //    uregex_start
757 //
758 //------------------------------------------------------------------------------
759 U_CAPI int32_t U_EXPORT2
uregex_start(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)760 uregex_start(URegularExpression *regexp2,
761              int32_t             groupNum,
762              UErrorCode          *status)  {
763     return (int32_t)uregex_start64( regexp2, groupNum, status);
764 }
765 
766 U_CAPI int64_t U_EXPORT2
uregex_start64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)767 uregex_start64(URegularExpression *regexp2,
768                int32_t             groupNum,
769                UErrorCode          *status)  {
770     RegularExpression *regexp = (RegularExpression*)regexp2;
771     if (validateRE(regexp, TRUE, status) == FALSE) {
772         return 0;
773     }
774     int32_t result = regexp->fMatcher->start(groupNum, *status);
775     return result;
776 }
777 
778 //------------------------------------------------------------------------------
779 //
780 //    uregex_end
781 //
782 //------------------------------------------------------------------------------
783 U_CAPI int32_t U_EXPORT2
uregex_end(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)784 uregex_end(URegularExpression   *regexp2,
785            int32_t               groupNum,
786            UErrorCode           *status)  {
787     return (int32_t)uregex_end64( regexp2, groupNum, status);
788 }
789 
790 U_CAPI int64_t U_EXPORT2
uregex_end64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)791 uregex_end64(URegularExpression   *regexp2,
792              int32_t               groupNum,
793              UErrorCode           *status)  {
794     RegularExpression *regexp = (RegularExpression*)regexp2;
795     if (validateRE(regexp, TRUE, status) == FALSE) {
796         return 0;
797     }
798     int32_t result = regexp->fMatcher->end(groupNum, *status);
799     return result;
800 }
801 
802 //------------------------------------------------------------------------------
803 //
804 //    uregex_reset
805 //
806 //------------------------------------------------------------------------------
807 U_CAPI void U_EXPORT2
uregex_reset(URegularExpression * regexp2,int32_t index,UErrorCode * status)808 uregex_reset(URegularExpression    *regexp2,
809              int32_t               index,
810              UErrorCode            *status)  {
811     uregex_reset64( regexp2, (int64_t)index, status);
812 }
813 
814 U_CAPI void U_EXPORT2
uregex_reset64(URegularExpression * regexp2,int64_t index,UErrorCode * status)815 uregex_reset64(URegularExpression    *regexp2,
816                int64_t               index,
817                UErrorCode            *status)  {
818     RegularExpression *regexp = (RegularExpression*)regexp2;
819     if (validateRE(regexp, TRUE, status) == FALSE) {
820         return;
821     }
822     regexp->fMatcher->reset(index, *status);
823 }
824 
825 
826 //------------------------------------------------------------------------------
827 //
828 //    uregex_setRegion
829 //
830 //------------------------------------------------------------------------------
831 U_CAPI void U_EXPORT2
uregex_setRegion(URegularExpression * regexp2,int32_t regionStart,int32_t regionLimit,UErrorCode * status)832 uregex_setRegion(URegularExpression   *regexp2,
833                  int32_t               regionStart,
834                  int32_t               regionLimit,
835                  UErrorCode           *status)  {
836     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
837 }
838 
839 U_CAPI void U_EXPORT2
uregex_setRegion64(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,UErrorCode * status)840 uregex_setRegion64(URegularExpression   *regexp2,
841                    int64_t               regionStart,
842                    int64_t               regionLimit,
843                    UErrorCode           *status)  {
844     RegularExpression *regexp = (RegularExpression*)regexp2;
845     if (validateRE(regexp, TRUE, status) == FALSE) {
846         return;
847     }
848     regexp->fMatcher->region(regionStart, regionLimit, *status);
849 }
850 
851 
852 //------------------------------------------------------------------------------
853 //
854 //    uregex_setRegionAndStart
855 //
856 //------------------------------------------------------------------------------
857 U_CAPI void U_EXPORT2
uregex_setRegionAndStart(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,int64_t startIndex,UErrorCode * status)858 uregex_setRegionAndStart(URegularExpression   *regexp2,
859                  int64_t               regionStart,
860                  int64_t               regionLimit,
861                  int64_t               startIndex,
862                  UErrorCode           *status)  {
863     RegularExpression *regexp = (RegularExpression*)regexp2;
864     if (validateRE(regexp, TRUE, status) == FALSE) {
865         return;
866     }
867     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
868 }
869 
870 //------------------------------------------------------------------------------
871 //
872 //    uregex_regionStart
873 //
874 //------------------------------------------------------------------------------
875 U_CAPI int32_t U_EXPORT2
uregex_regionStart(const URegularExpression * regexp2,UErrorCode * status)876 uregex_regionStart(const  URegularExpression   *regexp2,
877                           UErrorCode           *status)  {
878     return (int32_t)uregex_regionStart64(regexp2, status);
879 }
880 
881 U_CAPI int64_t U_EXPORT2
uregex_regionStart64(const URegularExpression * regexp2,UErrorCode * status)882 uregex_regionStart64(const  URegularExpression   *regexp2,
883                             UErrorCode           *status)  {
884     RegularExpression *regexp = (RegularExpression*)regexp2;
885     if (validateRE(regexp, TRUE, status) == FALSE) {
886         return 0;
887     }
888     return regexp->fMatcher->regionStart();
889 }
890 
891 
892 //------------------------------------------------------------------------------
893 //
894 //    uregex_regionEnd
895 //
896 //------------------------------------------------------------------------------
897 U_CAPI int32_t U_EXPORT2
uregex_regionEnd(const URegularExpression * regexp2,UErrorCode * status)898 uregex_regionEnd(const  URegularExpression   *regexp2,
899                         UErrorCode           *status)  {
900     return (int32_t)uregex_regionEnd64(regexp2, status);
901 }
902 
903 U_CAPI int64_t U_EXPORT2
uregex_regionEnd64(const URegularExpression * regexp2,UErrorCode * status)904 uregex_regionEnd64(const  URegularExpression   *regexp2,
905                           UErrorCode           *status)  {
906     RegularExpression *regexp = (RegularExpression*)regexp2;
907     if (validateRE(regexp, TRUE, status) == FALSE) {
908         return 0;
909     }
910     return regexp->fMatcher->regionEnd();
911 }
912 
913 
914 //------------------------------------------------------------------------------
915 //
916 //    uregex_hasTransparentBounds
917 //
918 //------------------------------------------------------------------------------
919 U_CAPI UBool U_EXPORT2
uregex_hasTransparentBounds(const URegularExpression * regexp2,UErrorCode * status)920 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
921                                    UErrorCode           *status)  {
922     RegularExpression *regexp = (RegularExpression*)regexp2;
923     if (validateRE(regexp, FALSE, status) == FALSE) {
924         return FALSE;
925     }
926     return regexp->fMatcher->hasTransparentBounds();
927 }
928 
929 
930 //------------------------------------------------------------------------------
931 //
932 //    uregex_useTransparentBounds
933 //
934 //------------------------------------------------------------------------------
935 U_CAPI void U_EXPORT2
uregex_useTransparentBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)936 uregex_useTransparentBounds(URegularExpression    *regexp2,
937                             UBool                  b,
938                             UErrorCode            *status)  {
939     RegularExpression *regexp = (RegularExpression*)regexp2;
940     if (validateRE(regexp, FALSE, status) == FALSE) {
941         return;
942     }
943     regexp->fMatcher->useTransparentBounds(b);
944 }
945 
946 
947 //------------------------------------------------------------------------------
948 //
949 //    uregex_hasAnchoringBounds
950 //
951 //------------------------------------------------------------------------------
952 U_CAPI UBool U_EXPORT2
uregex_hasAnchoringBounds(const URegularExpression * regexp2,UErrorCode * status)953 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
954                                  UErrorCode           *status)  {
955     RegularExpression *regexp = (RegularExpression*)regexp2;
956     if (validateRE(regexp, FALSE, status) == FALSE) {
957         return FALSE;
958     }
959     return regexp->fMatcher->hasAnchoringBounds();
960 }
961 
962 
963 //------------------------------------------------------------------------------
964 //
965 //    uregex_useAnchoringBounds
966 //
967 //------------------------------------------------------------------------------
968 U_CAPI void U_EXPORT2
uregex_useAnchoringBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)969 uregex_useAnchoringBounds(URegularExpression    *regexp2,
970                           UBool                  b,
971                           UErrorCode            *status)  {
972     RegularExpression *regexp = (RegularExpression*)regexp2;
973     if (validateRE(regexp, FALSE, status) == FALSE) {
974         return;
975     }
976     regexp->fMatcher->useAnchoringBounds(b);
977 }
978 
979 
980 //------------------------------------------------------------------------------
981 //
982 //    uregex_hitEnd
983 //
984 //------------------------------------------------------------------------------
985 U_CAPI UBool U_EXPORT2
uregex_hitEnd(const URegularExpression * regexp2,UErrorCode * status)986 uregex_hitEnd(const  URegularExpression   *regexp2,
987                      UErrorCode           *status)  {
988     RegularExpression *regexp = (RegularExpression*)regexp2;
989     if (validateRE(regexp, TRUE, status) == FALSE) {
990         return FALSE;
991     }
992     return regexp->fMatcher->hitEnd();
993 }
994 
995 
996 //------------------------------------------------------------------------------
997 //
998 //    uregex_requireEnd
999 //
1000 //------------------------------------------------------------------------------
1001 U_CAPI UBool U_EXPORT2
uregex_requireEnd(const URegularExpression * regexp2,UErrorCode * status)1002 uregex_requireEnd(const  URegularExpression   *regexp2,
1003                          UErrorCode           *status)  {
1004     RegularExpression *regexp = (RegularExpression*)regexp2;
1005     if (validateRE(regexp, TRUE, status) == FALSE) {
1006         return FALSE;
1007     }
1008     return regexp->fMatcher->requireEnd();
1009 }
1010 
1011 
1012 //------------------------------------------------------------------------------
1013 //
1014 //    uregex_setTimeLimit
1015 //
1016 //------------------------------------------------------------------------------
1017 U_CAPI void U_EXPORT2
uregex_setTimeLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1018 uregex_setTimeLimit(URegularExpression   *regexp2,
1019                     int32_t               limit,
1020                     UErrorCode           *status) {
1021     RegularExpression *regexp = (RegularExpression*)regexp2;
1022     if (validateRE(regexp, FALSE, status)) {
1023         regexp->fMatcher->setTimeLimit(limit, *status);
1024     }
1025 }
1026 
1027 
1028 
1029 //------------------------------------------------------------------------------
1030 //
1031 //    uregex_getTimeLimit
1032 //
1033 //------------------------------------------------------------------------------
1034 U_CAPI int32_t U_EXPORT2
uregex_getTimeLimit(const URegularExpression * regexp2,UErrorCode * status)1035 uregex_getTimeLimit(const  URegularExpression   *regexp2,
1036                            UErrorCode           *status) {
1037     int32_t retVal = 0;
1038     RegularExpression *regexp = (RegularExpression*)regexp2;
1039     if (validateRE(regexp, FALSE, status)) {
1040         retVal = regexp->fMatcher->getTimeLimit();
1041     }
1042     return retVal;
1043 }
1044 
1045 
1046 
1047 //------------------------------------------------------------------------------
1048 //
1049 //    uregex_setStackLimit
1050 //
1051 //------------------------------------------------------------------------------
1052 U_CAPI void U_EXPORT2
uregex_setStackLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1053 uregex_setStackLimit(URegularExpression   *regexp2,
1054                      int32_t               limit,
1055                      UErrorCode           *status) {
1056     RegularExpression *regexp = (RegularExpression*)regexp2;
1057     if (validateRE(regexp, FALSE, status)) {
1058         regexp->fMatcher->setStackLimit(limit, *status);
1059     }
1060 }
1061 
1062 
1063 
1064 //------------------------------------------------------------------------------
1065 //
1066 //    uregex_getStackLimit
1067 //
1068 //------------------------------------------------------------------------------
1069 U_CAPI int32_t U_EXPORT2
uregex_getStackLimit(const URegularExpression * regexp2,UErrorCode * status)1070 uregex_getStackLimit(const  URegularExpression   *regexp2,
1071                             UErrorCode           *status) {
1072     int32_t retVal = 0;
1073     RegularExpression *regexp = (RegularExpression*)regexp2;
1074     if (validateRE(regexp, FALSE, status)) {
1075         retVal = regexp->fMatcher->getStackLimit();
1076     }
1077     return retVal;
1078 }
1079 
1080 
1081 //------------------------------------------------------------------------------
1082 //
1083 //    uregex_setMatchCallback
1084 //
1085 //------------------------------------------------------------------------------
1086 U_CAPI void U_EXPORT2
uregex_setMatchCallback(URegularExpression * regexp2,URegexMatchCallback * callback,const void * context,UErrorCode * status)1087 uregex_setMatchCallback(URegularExpression      *regexp2,
1088                         URegexMatchCallback     *callback,
1089                         const void              *context,
1090                         UErrorCode              *status) {
1091     RegularExpression *regexp = (RegularExpression*)regexp2;
1092     if (validateRE(regexp, FALSE, status)) {
1093         regexp->fMatcher->setMatchCallback(callback, context, *status);
1094     }
1095 }
1096 
1097 
1098 //------------------------------------------------------------------------------
1099 //
1100 //    uregex_getMatchCallback
1101 //
1102 //------------------------------------------------------------------------------
1103 U_CAPI void U_EXPORT2
uregex_getMatchCallback(const URegularExpression * regexp2,URegexMatchCallback ** callback,const void ** context,UErrorCode * status)1104 uregex_getMatchCallback(const URegularExpression    *regexp2,
1105                         URegexMatchCallback        **callback,
1106                         const void                 **context,
1107                         UErrorCode                  *status) {
1108     RegularExpression *regexp = (RegularExpression*)regexp2;
1109      if (validateRE(regexp, FALSE, status)) {
1110          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1111      }
1112 }
1113 
1114 
1115 //------------------------------------------------------------------------------
1116 //
1117 //    uregex_setMatchProgressCallback
1118 //
1119 //------------------------------------------------------------------------------
1120 U_CAPI void U_EXPORT2
uregex_setFindProgressCallback(URegularExpression * regexp2,URegexFindProgressCallback * callback,const void * context,UErrorCode * status)1121 uregex_setFindProgressCallback(URegularExpression              *regexp2,
1122                                 URegexFindProgressCallback      *callback,
1123                                 const void                      *context,
1124                                 UErrorCode                      *status) {
1125     RegularExpression *regexp = (RegularExpression*)regexp2;
1126     if (validateRE(regexp, FALSE, status)) {
1127         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1128     }
1129 }
1130 
1131 
1132 //------------------------------------------------------------------------------
1133 //
1134 //    uregex_getMatchCallback
1135 //
1136 //------------------------------------------------------------------------------
1137 U_CAPI void U_EXPORT2
uregex_getFindProgressCallback(const URegularExpression * regexp2,URegexFindProgressCallback ** callback,const void ** context,UErrorCode * status)1138 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
1139                                 URegexFindProgressCallback        **callback,
1140                                 const void                        **context,
1141                                 UErrorCode                        *status) {
1142     RegularExpression *regexp = (RegularExpression*)regexp2;
1143      if (validateRE(regexp, FALSE, status)) {
1144          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1145      }
1146 }
1147 
1148 
1149 //------------------------------------------------------------------------------
1150 //
1151 //    uregex_replaceAll
1152 //
1153 //------------------------------------------------------------------------------
1154 U_CAPI int32_t U_EXPORT2
uregex_replaceAll(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1155 uregex_replaceAll(URegularExpression    *regexp2,
1156                   const UChar           *replacementText,
1157                   int32_t                replacementLength,
1158                   UChar                 *destBuf,
1159                   int32_t                destCapacity,
1160                   UErrorCode            *status)  {
1161     RegularExpression *regexp = (RegularExpression*)regexp2;
1162     if (validateRE(regexp, TRUE, status) == FALSE) {
1163         return 0;
1164     }
1165     if (replacementText == NULL || replacementLength < -1 ||
1166         (destBuf == NULL && destCapacity > 0) ||
1167         destCapacity < 0) {
1168         *status = U_ILLEGAL_ARGUMENT_ERROR;
1169         return 0;
1170     }
1171 
1172     int32_t   len = 0;
1173 
1174     uregex_reset(regexp2, 0, status);
1175 
1176     // Note: Seperate error code variables for findNext() and appendReplacement()
1177     //       are used so that destination buffer overflow errors
1178     //       in appendReplacement won't stop findNext() from working.
1179     //       appendReplacement() and appendTail() special case incoming buffer
1180     //       overflow errors, continuing to return the correct length.
1181     UErrorCode  findStatus = *status;
1182     while (uregex_findNext(regexp2, &findStatus)) {
1183         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
1184                                         &destBuf, &destCapacity, status);
1185     }
1186     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1187 
1188     if (U_FAILURE(findStatus)) {
1189         // If anything went wrong with the findNext(), make that error trump
1190         //   whatever may have happened with the append() operations.
1191         //   Errors in findNext() are not expected.
1192         *status = findStatus;
1193     }
1194 
1195     return len;
1196 }
1197 
1198 
1199 //------------------------------------------------------------------------------
1200 //
1201 //    uregex_replaceAllUText
1202 //
1203 //------------------------------------------------------------------------------
1204 U_CAPI UText * U_EXPORT2
uregex_replaceAllUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1205 uregex_replaceAllUText(URegularExpression    *regexp2,
1206                        UText                 *replacementText,
1207                        UText                 *dest,
1208                        UErrorCode            *status)  {
1209     RegularExpression *regexp = (RegularExpression*)regexp2;
1210     if (validateRE(regexp, TRUE, status) == FALSE) {
1211         return 0;
1212     }
1213     if (replacementText == NULL) {
1214         *status = U_ILLEGAL_ARGUMENT_ERROR;
1215         return 0;
1216     }
1217 
1218     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1219     return dest;
1220 }
1221 
1222 
1223 //------------------------------------------------------------------------------
1224 //
1225 //    uregex_replaceFirst
1226 //
1227 //------------------------------------------------------------------------------
1228 U_CAPI int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1229 uregex_replaceFirst(URegularExpression  *regexp2,
1230                     const UChar         *replacementText,
1231                     int32_t              replacementLength,
1232                     UChar               *destBuf,
1233                     int32_t              destCapacity,
1234                     UErrorCode          *status)  {
1235     RegularExpression *regexp = (RegularExpression*)regexp2;
1236     if (validateRE(regexp, TRUE, status) == FALSE) {
1237         return 0;
1238     }
1239     if (replacementText == NULL || replacementLength < -1 ||
1240         (destBuf == NULL && destCapacity > 0) ||
1241         destCapacity < 0) {
1242         *status = U_ILLEGAL_ARGUMENT_ERROR;
1243         return 0;
1244     }
1245 
1246     int32_t   len = 0;
1247     UBool     findSucceeded;
1248     uregex_reset(regexp2, 0, status);
1249     findSucceeded = uregex_find(regexp2, 0, status);
1250     if (findSucceeded) {
1251         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
1252                                        &destBuf, &destCapacity, status);
1253     }
1254     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1255 
1256     return len;
1257 }
1258 
1259 
1260 //------------------------------------------------------------------------------
1261 //
1262 //    uregex_replaceFirstUText
1263 //
1264 //------------------------------------------------------------------------------
1265 U_CAPI UText * U_EXPORT2
uregex_replaceFirstUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1266 uregex_replaceFirstUText(URegularExpression  *regexp2,
1267                          UText                 *replacementText,
1268                          UText                 *dest,
1269                          UErrorCode            *status)  {
1270     RegularExpression *regexp = (RegularExpression*)regexp2;
1271     if (validateRE(regexp, TRUE, status) == FALSE) {
1272         return 0;
1273     }
1274     if (replacementText == NULL) {
1275         *status = U_ILLEGAL_ARGUMENT_ERROR;
1276         return 0;
1277     }
1278 
1279     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1280     return dest;
1281 }
1282 
1283 
1284 //------------------------------------------------------------------------------
1285 //
1286 //    uregex_appendReplacement
1287 //
1288 //------------------------------------------------------------------------------
1289 
1290 U_NAMESPACE_BEGIN
1291 //
1292 //  Dummy class, because these functions need to be friends of class RegexMatcher,
1293 //               and stand-alone C functions don't work as friends
1294 //
1295 class RegexCImpl {
1296  public:
1297    inline static  int32_t appendReplacement(RegularExpression    *regexp,
1298                       const UChar           *replacementText,
1299                       int32_t                replacementLength,
1300                       UChar                **destBuf,
1301                       int32_t               *destCapacity,
1302                       UErrorCode            *status);
1303 
1304    inline static int32_t appendTail(RegularExpression    *regexp,
1305         UChar                **destBuf,
1306         int32_t               *destCapacity,
1307         UErrorCode            *status);
1308 
1309     inline static int32_t split(RegularExpression    *regexp,
1310         UChar                 *destBuf,
1311         int32_t                destCapacity,
1312         int32_t               *requiredCapacity,
1313         UChar                 *destFields[],
1314         int32_t                destFieldsCapacity,
1315         UErrorCode            *status);
1316 };
1317 
1318 U_NAMESPACE_END
1319 
1320 
1321 
1322 static const UChar BACKSLASH  = 0x5c;
1323 static const UChar DOLLARSIGN = 0x24;
1324 
1325 //
1326 //  Move a character to an output buffer, with bounds checking on the index.
1327 //      Index advances even if capacity is exceeded, for preflight size computations.
1328 //      This little sequence is used a LOT.
1329 //
appendToBuf(UChar c,int32_t * idx,UChar * buf,int32_t bufCapacity)1330 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
1331     if (*idx < bufCapacity) {
1332         buf[*idx] = c;
1333     }
1334     (*idx)++;
1335 }
1336 
1337 
1338 //
1339 //  appendReplacement, the actual implementation.
1340 //
appendReplacement(RegularExpression * regexp,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1341 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
1342                                       const UChar           *replacementText,
1343                                       int32_t                replacementLength,
1344                                       UChar                **destBuf,
1345                                       int32_t               *destCapacity,
1346                                       UErrorCode            *status)  {
1347 
1348     // If we come in with a buffer overflow error, don't suppress the operation.
1349     //  A series of appendReplacements, appendTail need to correctly preflight
1350     //  the buffer size when an overflow happens somewhere in the middle.
1351     UBool pendingBufferOverflow = FALSE;
1352     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1353         pendingBufferOverflow = TRUE;
1354         *status = U_ZERO_ERROR;
1355     }
1356 
1357     //
1358     // Validate all paramters
1359     //
1360     if (validateRE(regexp, TRUE, status) == FALSE) {
1361         return 0;
1362     }
1363     if (replacementText == NULL || replacementLength < -1 ||
1364         destCapacity == NULL || destBuf == NULL ||
1365         (*destBuf == NULL && *destCapacity > 0) ||
1366         *destCapacity < 0) {
1367         *status = U_ILLEGAL_ARGUMENT_ERROR;
1368         return 0;
1369     }
1370 
1371     RegexMatcher *m = regexp->fMatcher;
1372     if (m->fMatch == FALSE) {
1373         *status = U_REGEX_INVALID_STATE;
1374         return 0;
1375     }
1376 
1377     UChar    *dest             = *destBuf;
1378     int32_t   capacity         = *destCapacity;
1379     int32_t   destIdx          =  0;
1380     int32_t   i;
1381 
1382     // If it wasn't supplied by the caller,  get the length of the replacement text.
1383     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
1384     //          the fly and avoid this step.
1385     if (replacementLength == -1) {
1386         replacementLength = u_strlen(replacementText);
1387     }
1388 
1389     // Copy input string from the end of previous match to start of current match
1390     if (regexp->fText != NULL) {
1391         int32_t matchStart;
1392         int32_t lastMatchEnd;
1393         if (UTEXT_USES_U16(m->fInputText)) {
1394             lastMatchEnd = (int32_t)m->fLastMatchEnd;
1395             matchStart = (int32_t)m->fMatchStart;
1396         } else {
1397             // !!!: Would like a better way to do this!
1398             UErrorCode status = U_ZERO_ERROR;
1399             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
1400             status = U_ZERO_ERROR;
1401             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
1402         }
1403         for (i=lastMatchEnd; i<matchStart; i++) {
1404             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
1405         }
1406     } else {
1407         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1408         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
1409                                  dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
1410                                  &possibleOverflowError);
1411     }
1412     U_ASSERT(destIdx >= 0);
1413 
1414     // scan the replacement text, looking for substitutions ($n) and \escapes.
1415     int32_t  replIdx = 0;
1416     while (replIdx < replacementLength) {
1417         UChar  c = replacementText[replIdx];
1418         replIdx++;
1419         if (c != DOLLARSIGN && c != BACKSLASH) {
1420             // Common case, no substitution, no escaping,
1421             //  just copy the char to the dest buf.
1422             appendToBuf(c, &destIdx, dest, capacity);
1423             continue;
1424         }
1425 
1426         if (c == BACKSLASH) {
1427             // Backslash Escape.  Copy the following char out without further checks.
1428             //                    Note:  Surrogate pairs don't need any special handling
1429             //                           The second half wont be a '$' or a '\', and
1430             //                           will move to the dest normally on the next
1431             //                           loop iteration.
1432             if (replIdx >= replacementLength) {
1433                 break;
1434             }
1435             c = replacementText[replIdx];
1436 
1437             if (c==0x55/*U*/ || c==0x75/*u*/) {
1438                 // We have a \udddd or \Udddddddd escape sequence.
1439                 UChar32 escapedChar =
1440                     u_unescapeAt(uregex_ucstr_unescape_charAt,
1441                        &replIdx,                   // Index is updated by unescapeAt
1442                        replacementLength,          // Length of replacement text
1443                        (void *)replacementText);
1444 
1445                 if (escapedChar != (UChar32)0xFFFFFFFF) {
1446                     if (escapedChar <= 0xffff) {
1447                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
1448                     } else {
1449                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1450                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1451                     }
1452                     continue;
1453                 }
1454                 // Note:  if the \u escape was invalid, just fall through and
1455                 //        treat it as a plain \<anything> escape.
1456             }
1457 
1458             // Plain backslash escape.  Just put out the escaped character.
1459             appendToBuf(c, &destIdx, dest, capacity);
1460 
1461             replIdx++;
1462             continue;
1463         }
1464 
1465 
1466 
1467         // We've got a $.  Pick up a capture group number if one follows.
1468         // Consume at most the number of digits necessary for the largest capture
1469         // number that is valid for this pattern.
1470 
1471         int32_t numDigits = 0;
1472         int32_t groupNum  = 0;
1473         UChar32 digitC;
1474         for (;;) {
1475             if (replIdx >= replacementLength) {
1476                 break;
1477             }
1478             U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
1479             if (u_isdigit(digitC) == FALSE) {
1480                 break;
1481             }
1482 
1483             U16_FWD_1(replacementText, replIdx, replacementLength);
1484             groupNum=groupNum*10 + u_charDigitValue(digitC);
1485             numDigits++;
1486             if (numDigits >= m->fPattern->fMaxCaptureDigits) {
1487                 break;
1488             }
1489         }
1490 
1491 
1492         if (numDigits == 0) {
1493             // The $ didn't introduce a group number at all.
1494             // Treat it as just part of the substitution text.
1495             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
1496             continue;
1497         }
1498 
1499         // Finally, append the capture group data to the destination.
1500         destIdx += uregex_group((URegularExpression*)regexp, groupNum,
1501                                 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
1502         if (*status == U_BUFFER_OVERFLOW_ERROR) {
1503             // Ignore buffer overflow when extracting the group.  We need to
1504             //   continue on to get full size of the untruncated result.  We will
1505             //   raise our own buffer overflow error at the end.
1506             *status = U_ZERO_ERROR;
1507         }
1508 
1509         if (U_FAILURE(*status)) {
1510             // Can fail if group number is out of range.
1511             break;
1512         }
1513 
1514     }
1515 
1516     //
1517     //  Nul Terminate the dest buffer if possible.
1518     //  Set the appropriate buffer overflow or not terminated error, if needed.
1519     //
1520     if (destIdx < capacity) {
1521         dest[destIdx] = 0;
1522     } else if (destIdx == *destCapacity) {
1523         *status = U_STRING_NOT_TERMINATED_WARNING;
1524     } else {
1525         *status = U_BUFFER_OVERFLOW_ERROR;
1526     }
1527 
1528     //
1529     // Return an updated dest buffer and capacity to the caller.
1530     //
1531     if (destIdx > 0 &&  *destCapacity > 0) {
1532         if (destIdx < capacity) {
1533             *destBuf      += destIdx;
1534             *destCapacity -= destIdx;
1535         } else {
1536             *destBuf      += capacity;
1537             *destCapacity =  0;
1538         }
1539     }
1540 
1541     // If we came in with a buffer overflow, make sure we go out with one also.
1542     //   (A zero length match right at the end of the previous match could
1543     //    make this function succeed even though a previous call had overflowed the buf)
1544     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1545         *status = U_BUFFER_OVERFLOW_ERROR;
1546     }
1547 
1548     return destIdx;
1549 }
1550 
1551 //
1552 //   appendReplacement   the actual API function,
1553 //
1554 U_CAPI int32_t U_EXPORT2
uregex_appendReplacement(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1555 uregex_appendReplacement(URegularExpression    *regexp2,
1556                          const UChar           *replacementText,
1557                          int32_t                replacementLength,
1558                          UChar                **destBuf,
1559                          int32_t               *destCapacity,
1560                          UErrorCode            *status) {
1561 
1562     RegularExpression *regexp = (RegularExpression*)regexp2;
1563     return RegexCImpl::appendReplacement(
1564         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1565 }
1566 
1567 //
1568 //   uregex_appendReplacementUText...can just use the normal C++ method
1569 //
1570 U_CAPI void U_EXPORT2
uregex_appendReplacementUText(URegularExpression * regexp2,UText * replText,UText * dest,UErrorCode * status)1571 uregex_appendReplacementUText(URegularExpression    *regexp2,
1572                               UText                 *replText,
1573                               UText                 *dest,
1574                               UErrorCode            *status)  {
1575     RegularExpression *regexp = (RegularExpression*)regexp2;
1576     regexp->fMatcher->appendReplacement(dest, replText, *status);
1577 }
1578 
1579 
1580 //------------------------------------------------------------------------------
1581 //
1582 //    uregex_appendTail
1583 //
1584 //------------------------------------------------------------------------------
appendTail(RegularExpression * regexp,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1585 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
1586                                UChar                **destBuf,
1587                                int32_t               *destCapacity,
1588                                UErrorCode            *status)
1589 {
1590 
1591     // If we come in with a buffer overflow error, don't suppress the operation.
1592     //  A series of appendReplacements, appendTail need to correctly preflight
1593     //  the buffer size when an overflow happens somewhere in the middle.
1594     UBool pendingBufferOverflow = FALSE;
1595     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1596         pendingBufferOverflow = TRUE;
1597         *status = U_ZERO_ERROR;
1598     }
1599 
1600     if (validateRE(regexp, TRUE, status) == FALSE) {
1601         return 0;
1602     }
1603 
1604     if (destCapacity == NULL || destBuf == NULL ||
1605         (*destBuf == NULL && *destCapacity > 0) ||
1606         *destCapacity < 0)
1607     {
1608         *status = U_ILLEGAL_ARGUMENT_ERROR;
1609         return 0;
1610     }
1611 
1612     RegexMatcher *m = regexp->fMatcher;
1613 
1614     int32_t  destIdx     = 0;
1615     int32_t  destCap     = *destCapacity;
1616     UChar    *dest       = *destBuf;
1617 
1618     if (regexp->fText != NULL) {
1619         int32_t srcIdx;
1620         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1621         if (nativeIdx == -1) {
1622             srcIdx = 0;
1623         } else if (UTEXT_USES_U16(m->fInputText)) {
1624             srcIdx = (int32_t)nativeIdx;
1625         } else {
1626             UErrorCode status = U_ZERO_ERROR;
1627             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
1628         }
1629 
1630         for (;;) {
1631             U_ASSERT(destIdx >= 0);
1632 
1633             if (srcIdx == regexp->fTextLength) {
1634                 break;
1635             }
1636             UChar c = regexp->fText[srcIdx];
1637             if (c == 0 && regexp->fTextLength == -1) {
1638                 regexp->fTextLength = srcIdx;
1639                 break;
1640             }
1641 
1642             if (destIdx < destCap) {
1643                 dest[destIdx] = c;
1644             } else {
1645                 // We've overflowed the dest buffer.
1646                 //  If the total input string length is known, we can
1647                 //    compute the total buffer size needed without scanning through the string.
1648                 if (regexp->fTextLength > 0) {
1649                     destIdx += (regexp->fTextLength - srcIdx);
1650                     break;
1651                 }
1652             }
1653             srcIdx++;
1654             destIdx++;
1655         }
1656     } else {
1657         int64_t  srcIdx;
1658         if (m->fMatch) {
1659             // The most recent call to find() succeeded.
1660             srcIdx = m->fMatchEnd;
1661         } else {
1662             // The last call to find() on this matcher failed().
1663             //   Look back to the end of the last find() that succeeded for src index.
1664             srcIdx = m->fLastMatchEnd;
1665             if (srcIdx == -1)  {
1666                 // There has been no successful match with this matcher.
1667                 //   We want to copy the whole string.
1668                 srcIdx = 0;
1669             }
1670         }
1671 
1672         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
1673     }
1674 
1675     //
1676     //  NUL terminate the output string, if possible, otherwise issue the
1677     //   appropriate error or warning.
1678     //
1679     if (destIdx < destCap) {
1680         dest[destIdx] = 0;
1681     } else  if (destIdx == destCap) {
1682         *status = U_STRING_NOT_TERMINATED_WARNING;
1683     } else {
1684         *status = U_BUFFER_OVERFLOW_ERROR;
1685     }
1686 
1687     //
1688     // Update the user's buffer ptr and capacity vars to reflect the
1689     //   amount used.
1690     //
1691     if (destIdx < destCap) {
1692         *destBuf      += destIdx;
1693         *destCapacity -= destIdx;
1694     } else if (*destBuf != NULL) {
1695         *destBuf      += destCap;
1696         *destCapacity  = 0;
1697     }
1698 
1699     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1700         *status = U_BUFFER_OVERFLOW_ERROR;
1701     }
1702 
1703     return destIdx;
1704 }
1705 
1706 
1707 //
1708 //   appendTail   the actual API function
1709 //
1710 U_CAPI int32_t U_EXPORT2
uregex_appendTail(URegularExpression * regexp2,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1711 uregex_appendTail(URegularExpression    *regexp2,
1712                   UChar                **destBuf,
1713                   int32_t               *destCapacity,
1714                   UErrorCode            *status)  {
1715     RegularExpression *regexp = (RegularExpression*)regexp2;
1716     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1717 }
1718 
1719 
1720 //
1721 //   uregex_appendTailUText...can just use the normal C++ method
1722 //
1723 U_CAPI UText * U_EXPORT2
uregex_appendTailUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)1724 uregex_appendTailUText(URegularExpression    *regexp2,
1725                        UText                 *dest,
1726                        UErrorCode            *status)  {
1727     RegularExpression *regexp = (RegularExpression*)regexp2;
1728     return regexp->fMatcher->appendTail(dest, *status);
1729 }
1730 
1731 
1732 //------------------------------------------------------------------------------
1733 //
1734 //    copyString     Internal utility to copy a string to an output buffer,
1735 //                   while managing buffer overflow and preflight size
1736 //                   computation.  NUL termination is added to destination,
1737 //                   and the NUL is counted in the output size.
1738 //
1739 //------------------------------------------------------------------------------
1740 #if 0
1741 static void copyString(UChar        *destBuffer,    //  Destination buffer.
1742                        int32_t       destCapacity,  //  Total capacity of dest buffer
1743                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
1744                                                     //    Update not clipped to destCapacity.
1745                        const UChar  *srcPtr,        //  Pointer to source string
1746                        int32_t       srcLen)        //  Source string len.
1747 {
1748     int32_t  si;
1749     int32_t  di = *destIndex;
1750     UChar    c;
1751 
1752     for (si=0; si<srcLen;  si++) {
1753         c = srcPtr[si];
1754         if (di < destCapacity) {
1755             destBuffer[di] = c;
1756             di++;
1757         } else {
1758             di += srcLen - si;
1759             break;
1760         }
1761     }
1762     if (di<destCapacity) {
1763         destBuffer[di] = 0;
1764     }
1765     di++;
1766     *destIndex = di;
1767 }
1768 #endif
1769 
1770 //------------------------------------------------------------------------------
1771 //
1772 //    uregex_split
1773 //
1774 //------------------------------------------------------------------------------
split(RegularExpression * regexp,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1775 int32_t RegexCImpl::split(RegularExpression     *regexp,
1776                           UChar                 *destBuf,
1777                           int32_t                destCapacity,
1778                           int32_t               *requiredCapacity,
1779                           UChar                 *destFields[],
1780                           int32_t                destFieldsCapacity,
1781                           UErrorCode            *status) {
1782     //
1783     // Reset for the input text
1784     //
1785     regexp->fMatcher->reset();
1786     UText *inputText = regexp->fMatcher->fInputText;
1787     int64_t   nextOutputStringStart = 0;
1788     int64_t   inputLen = regexp->fMatcher->fInputLength;
1789     if (inputLen == 0) {
1790         return 0;
1791     }
1792 
1793     //
1794     // Loop through the input text, searching for the delimiter pattern
1795     //
1796     int32_t   i;             // Index of the field being processed.
1797     int32_t   destIdx = 0;   // Next available position in destBuf;
1798     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
1799     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
1800     for (i=0; ; i++) {
1801         if (i>=destFieldsCapacity-1) {
1802             // There are one or zero output strings left.
1803             // Fill the last output string with whatever is left from the input, then exit the loop.
1804             //  ( i will be == destFieldsCapacity if we filled the output array while processing
1805             //    capture groups of the delimiter expression, in which case we will discard the
1806             //    last capture group saved in favor of the unprocessed remainder of the
1807             //    input string.)
1808             if (inputLen > nextOutputStringStart) {
1809                 if (i != destFieldsCapacity-1) {
1810                     // No fields are left.  Recycle the last one for holding the trailing part of
1811                     //   the input string.
1812                     i = destFieldsCapacity-1;
1813                     destIdx = (int32_t)(destFields[i] - destFields[0]);
1814                 }
1815 
1816                 destFields[i] = &destBuf[destIdx];
1817                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1818                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1819             }
1820             break;
1821         }
1822 
1823         if (regexp->fMatcher->find()) {
1824             // We found another delimiter.  Move everything from where we started looking
1825             //  up until the start of the delimiter into the next output string.
1826             destFields[i] = &destBuf[destIdx];
1827 
1828             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1829                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1830             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1831                 tStatus = U_ZERO_ERROR;
1832             } else {
1833                 *status = tStatus;
1834             }
1835             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
1836 
1837             // If the delimiter pattern has capturing parentheses, the captured
1838             //  text goes out into the next n destination strings.
1839             int32_t groupNum;
1840             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1841                 // If we've run out of output string slots, bail out.
1842                 if (i==destFieldsCapacity-1) {
1843                     break;
1844                 }
1845                 i++;
1846 
1847                 // Set up to extract the capture group contents into the dest buffer.
1848                 destFields[i] = &destBuf[destIdx];
1849                 tStatus = U_ZERO_ERROR;
1850                 int32_t t = uregex_group((URegularExpression*)regexp,
1851                                          groupNum,
1852                                          destFields[i],
1853                                          REMAINING_CAPACITY(destIdx, destCapacity),
1854                                          &tStatus);
1855                 destIdx += t + 1;    // Record the space used in the output string buffer.
1856                                      //  +1 for the NUL that terminates the string.
1857                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1858                     tStatus = U_ZERO_ERROR;
1859                 } else {
1860                     *status = tStatus;
1861                 }
1862             }
1863 
1864             if (nextOutputStringStart == inputLen) {
1865                 // The delimiter was at the end of the string.
1866                 // Output an empty string, and then we are done.
1867                 if (destIdx < destCapacity) {
1868                     destBuf[destIdx] = 0;
1869                 }
1870                 if (i < destFieldsCapacity-1) {
1871                    ++i;
1872                 }
1873                 if (destIdx < destCapacity) {
1874                     destFields[i] = destBuf + destIdx;
1875                 }
1876                 ++destIdx;
1877                 break;
1878             }
1879 
1880         }
1881         else
1882         {
1883             // We ran off the end of the input while looking for the next delimiter.
1884             // All the remaining text goes into the current output string.
1885             destFields[i] = &destBuf[destIdx];
1886             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1887                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1888             break;
1889         }
1890     }
1891 
1892     // Zero out any unused portion of the destFields array
1893     int j;
1894     for (j=i+1; j<destFieldsCapacity; j++) {
1895         destFields[j] = NULL;
1896     }
1897 
1898     if (requiredCapacity != NULL) {
1899         *requiredCapacity = destIdx;
1900     }
1901     if (destIdx > destCapacity) {
1902         *status = U_BUFFER_OVERFLOW_ERROR;
1903     }
1904     return i+1;
1905 }
1906 
1907 //
1908 //   uregex_split   The actual API function
1909 //
1910 U_CAPI int32_t U_EXPORT2
uregex_split(URegularExpression * regexp2,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1911 uregex_split(URegularExpression      *regexp2,
1912              UChar                   *destBuf,
1913              int32_t                  destCapacity,
1914              int32_t                 *requiredCapacity,
1915              UChar                   *destFields[],
1916              int32_t                  destFieldsCapacity,
1917              UErrorCode              *status) {
1918     RegularExpression *regexp = (RegularExpression*)regexp2;
1919     if (validateRE(regexp, TRUE, status) == FALSE) {
1920         return 0;
1921     }
1922     if ((destBuf == NULL && destCapacity > 0) ||
1923         destCapacity < 0 ||
1924         destFields == NULL ||
1925         destFieldsCapacity < 1 ) {
1926         *status = U_ILLEGAL_ARGUMENT_ERROR;
1927         return 0;
1928     }
1929 
1930     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1931 }
1932 
1933 
1934 //
1935 //   uregex_splitUText...can just use the normal C++ method
1936 //
1937 U_CAPI int32_t U_EXPORT2
uregex_splitUText(URegularExpression * regexp2,UText * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1938 uregex_splitUText(URegularExpression    *regexp2,
1939                   UText                 *destFields[],
1940                   int32_t                destFieldsCapacity,
1941                   UErrorCode            *status) {
1942     RegularExpression *regexp = (RegularExpression*)regexp2;
1943     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1944 }
1945 
1946 
1947 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
1948 
1949