• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2004-2011, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  regex.cpp
7 */
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
12 
13 #include "unicode/regex.h"
14 #include "unicode/uregex.h"
15 #include "unicode/unistr.h"
16 #include "unicode/ustring.h"
17 #include "unicode/uchar.h"
18 #include "unicode/uobject.h"
19 #include "umutex.h"
20 #include "uassert.h"
21 #include "cmemory.h"
22 
23 #include "regextxt.h"
24 
25 #include <stdio.h>
26 
27 U_NAMESPACE_BEGIN
28 
29 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
30 
31 struct RegularExpression: public UMemory {
32 public:
33     RegularExpression();
34     ~RegularExpression();
35     int32_t           fMagic;
36     RegexPattern     *fPat;
37     int32_t          *fPatRefCount;
38     UChar            *fPatString;
39     int32_t           fPatStringLen;
40     RegexMatcher     *fMatcher;
41     const UChar      *fText;         // Text from setText()
42     int32_t           fTextLength;   // Length provided by user with setText(), which
43                                      //  may be -1.
44     UBool             fOwnsText;
45 };
46 
47 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
48 
RegularExpression()49 RegularExpression::RegularExpression() {
50     fMagic        = REXP_MAGIC;
51     fPat          = NULL;
52     fPatRefCount  = NULL;
53     fPatString    = NULL;
54     fPatStringLen = 0;
55     fMatcher      = NULL;
56     fText         = NULL;
57     fTextLength   = 0;
58     fOwnsText     = FALSE;
59 }
60 
~RegularExpression()61 RegularExpression::~RegularExpression() {
62     delete fMatcher;
63     fMatcher = NULL;
64     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
65         delete fPat;
66         uprv_free(fPatString);
67         uprv_free(fPatRefCount);
68     }
69     if (fOwnsText && fText!=NULL) {
70         uprv_free((void *)fText);
71     }
72     fMagic = 0;
73 }
74 
75 U_NAMESPACE_END
76 
77 U_NAMESPACE_USE
78 
79 //----------------------------------------------------------------------------------------
80 //
81 //   validateRE    Do boilerplate style checks on API function parameters.
82 //                 Return TRUE if they look OK.
83 //----------------------------------------------------------------------------------------
validateRE(const RegularExpression * re,UBool requiresText,UErrorCode * status)84 static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
85     if (U_FAILURE(*status)) {
86         return FALSE;
87     }
88     if (re == NULL || re->fMagic != REXP_MAGIC) {
89         *status = U_ILLEGAL_ARGUMENT_ERROR;
90         return FALSE;
91     }
92     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
93     if (requiresText && re->fText == NULL && !re->fOwnsText) {
94         *status = U_REGEX_INVALID_STATE;
95         return FALSE;
96     }
97     return TRUE;
98 }
99 
100 //----------------------------------------------------------------------------------------
101 //
102 //    uregex_open
103 //
104 //----------------------------------------------------------------------------------------
105 U_CAPI URegularExpression *  U_EXPORT2
uregex_open(const UChar * pattern,int32_t patternLength,uint32_t flags,UParseError * pe,UErrorCode * status)106 uregex_open( const  UChar          *pattern,
107                     int32_t         patternLength,
108                     uint32_t        flags,
109                     UParseError    *pe,
110                     UErrorCode     *status) {
111 
112     if (U_FAILURE(*status)) {
113         return NULL;
114     }
115     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
116         *status = U_ILLEGAL_ARGUMENT_ERROR;
117         return NULL;
118     }
119     int32_t actualPatLen = patternLength;
120     if (actualPatLen == -1) {
121         actualPatLen = u_strlen(pattern);
122     }
123 
124     RegularExpression *re     = new RegularExpression;
125     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
126     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
127     if (re == NULL || refC == NULL || patBuf == NULL) {
128         *status = U_MEMORY_ALLOCATION_ERROR;
129         delete re;
130         uprv_free(refC);
131         uprv_free(patBuf);
132         return NULL;
133     }
134     re->fPatRefCount = refC;
135     *re->fPatRefCount = 1;
136 
137     //
138     // Make a copy of the pattern string, so we can return it later if asked.
139     //    For compiling the pattern, we will use a UText wrapper around
140     //    this local copy, to avoid making even more copies.
141     //
142     re->fPatString    = patBuf;
143     re->fPatStringLen = patternLength;
144     u_memcpy(patBuf, pattern, actualPatLen);
145     patBuf[actualPatLen] = 0;
146 
147     UText patText = UTEXT_INITIALIZER;
148     utext_openUChars(&patText, patBuf, patternLength, status);
149 
150     //
151     // Compile the pattern
152     //
153     if (pe != NULL) {
154         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
155     } else {
156         re->fPat = RegexPattern::compile(&patText, flags, *status);
157     }
158     utext_close(&patText);
159 
160     if (U_FAILURE(*status)) {
161         goto ErrorExit;
162     }
163 
164     //
165     // Create the matcher object
166     //
167     re->fMatcher = re->fPat->matcher(*status);
168     if (U_SUCCESS(*status)) {
169         return (URegularExpression*)re;
170     }
171 
172 ErrorExit:
173     delete re;
174     return NULL;
175 
176 }
177 
178 //----------------------------------------------------------------------------------------
179 //
180 //    uregex_openUText
181 //
182 //----------------------------------------------------------------------------------------
183 U_CAPI URegularExpression *  U_EXPORT2
uregex_openUText(UText * pattern,uint32_t flags,UParseError * pe,UErrorCode * status)184 uregex_openUText(UText          *pattern,
185                  uint32_t        flags,
186                  UParseError    *pe,
187                  UErrorCode     *status) {
188 
189     if (U_FAILURE(*status)) {
190         return NULL;
191     }
192     if (pattern == NULL) {
193         *status = U_ILLEGAL_ARGUMENT_ERROR;
194         return NULL;
195     }
196 
197     int64_t patternNativeLength = utext_nativeLength(pattern);
198 
199     if (patternNativeLength == 0) {
200         *status = U_ILLEGAL_ARGUMENT_ERROR;
201         return NULL;
202     }
203 
204     RegularExpression *re     = new RegularExpression;
205 
206     UErrorCode lengthStatus = U_ZERO_ERROR;
207     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
208 
209     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
210     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
211     if (re == NULL || refC == NULL || patBuf == NULL) {
212         *status = U_MEMORY_ALLOCATION_ERROR;
213         delete re;
214         uprv_free(refC);
215         uprv_free(patBuf);
216         return NULL;
217     }
218     re->fPatRefCount = refC;
219     *re->fPatRefCount = 1;
220 
221     //
222     // Make a copy of the pattern string, so we can return it later if asked.
223     //    For compiling the pattern, we will use a read-only UText wrapper
224     //    around this local copy, to avoid making even more copies.
225     //
226     re->fPatString    = patBuf;
227     re->fPatStringLen = pattern16Length;
228     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
229 
230     UText patText = UTEXT_INITIALIZER;
231     utext_openUChars(&patText, patBuf, pattern16Length, status);
232 
233     //
234     // Compile the pattern
235     //
236     if (pe != NULL) {
237         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
238     } else {
239         re->fPat = RegexPattern::compile(&patText, flags, *status);
240     }
241     utext_close(&patText);
242 
243     if (U_FAILURE(*status)) {
244         goto ErrorExit;
245     }
246 
247     //
248     // Create the matcher object
249     //
250     re->fMatcher = re->fPat->matcher(*status);
251     if (U_SUCCESS(*status)) {
252         return (URegularExpression*)re;
253     }
254 
255 ErrorExit:
256     delete re;
257     return NULL;
258 
259 }
260 
261 //----------------------------------------------------------------------------------------
262 //
263 //    uregex_close
264 //
265 //----------------------------------------------------------------------------------------
266 U_CAPI void  U_EXPORT2
uregex_close(URegularExpression * re2)267 uregex_close(URegularExpression  *re2) {
268     RegularExpression *re = (RegularExpression*)re2;
269     UErrorCode  status = U_ZERO_ERROR;
270     if (validateRE(re, FALSE, &status) == FALSE) {
271         return;
272     }
273     delete re;
274 }
275 
276 
277 //----------------------------------------------------------------------------------------
278 //
279 //    uregex_clone
280 //
281 //----------------------------------------------------------------------------------------
282 U_CAPI URegularExpression * U_EXPORT2
uregex_clone(const URegularExpression * source2,UErrorCode * status)283 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
284     RegularExpression *source = (RegularExpression*)source2;
285     if (validateRE(source, FALSE, status) == FALSE) {
286         return NULL;
287     }
288 
289     RegularExpression *clone = new RegularExpression;
290     if (clone == NULL) {
291         *status = U_MEMORY_ALLOCATION_ERROR;
292         return NULL;
293     }
294 
295     clone->fMatcher = source->fPat->matcher(*status);
296     if (U_FAILURE(*status)) {
297         delete clone;
298         return NULL;
299     }
300 
301     clone->fPat          = source->fPat;
302     clone->fPatRefCount  = source->fPatRefCount;
303     clone->fPatString    = source->fPatString;
304     clone->fPatStringLen = source->fPatStringLen;
305     umtx_atomic_inc(source->fPatRefCount);
306     // Note:  fText is not cloned.
307 
308     return (URegularExpression*)clone;
309 }
310 
311 
312 
313 
314 //------------------------------------------------------------------------------
315 //
316 //    uregex_pattern
317 //
318 //------------------------------------------------------------------------------
319 U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression * regexp2,int32_t * patLength,UErrorCode * status)320 uregex_pattern(const  URegularExpression *regexp2,
321                       int32_t            *patLength,
322                       UErrorCode         *status)  {
323     RegularExpression *regexp = (RegularExpression*)regexp2;
324 
325     if (validateRE(regexp, FALSE, status) == FALSE) {
326         return NULL;
327     }
328     if (patLength != NULL) {
329         *patLength = regexp->fPatStringLen;
330     }
331     return regexp->fPatString;
332 }
333 
334 
335 //------------------------------------------------------------------------------
336 //
337 //    uregex_patternUText
338 //
339 //------------------------------------------------------------------------------
340 U_CAPI UText * U_EXPORT2
uregex_patternUText(const URegularExpression * regexp2,UErrorCode * status)341 uregex_patternUText(const URegularExpression *regexp2,
342                           UErrorCode         *status)  {
343     RegularExpression *regexp = (RegularExpression*)regexp2;
344     return regexp->fPat->patternText(*status);
345 }
346 
347 
348 //------------------------------------------------------------------------------
349 //
350 //    uregex_flags
351 //
352 //------------------------------------------------------------------------------
353 U_CAPI int32_t U_EXPORT2
uregex_flags(const URegularExpression * regexp2,UErrorCode * status)354 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
355     RegularExpression *regexp = (RegularExpression*)regexp2;
356     if (validateRE(regexp, FALSE, status) == FALSE) {
357         return 0;
358     }
359     int32_t flags = regexp->fPat->flags();
360     return flags;
361 }
362 
363 
364 //------------------------------------------------------------------------------
365 //
366 //    uregex_setText
367 //
368 //------------------------------------------------------------------------------
369 U_CAPI void U_EXPORT2
uregex_setText(URegularExpression * regexp2,const UChar * text,int32_t textLength,UErrorCode * status)370 uregex_setText(URegularExpression *regexp2,
371                const UChar        *text,
372                int32_t             textLength,
373                UErrorCode         *status)  {
374     RegularExpression *regexp = (RegularExpression*)regexp2;
375     if (validateRE(regexp, FALSE, status) == FALSE) {
376         return;
377     }
378     if (text == NULL || textLength < -1) {
379         *status = U_ILLEGAL_ARGUMENT_ERROR;
380         return;
381     }
382 
383     if (regexp->fOwnsText && regexp->fText != NULL) {
384         uprv_free((void *)regexp->fText);
385     }
386 
387     regexp->fText       = text;
388     regexp->fTextLength = textLength;
389     regexp->fOwnsText   = FALSE;
390 
391     UText input = UTEXT_INITIALIZER;
392     utext_openUChars(&input, text, textLength, status);
393     regexp->fMatcher->reset(&input);
394     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
395 }
396 
397 
398 //------------------------------------------------------------------------------
399 //
400 //    uregex_setUText
401 //
402 //------------------------------------------------------------------------------
403 U_CAPI void U_EXPORT2
uregex_setUText(URegularExpression * regexp2,UText * text,UErrorCode * status)404 uregex_setUText(URegularExpression *regexp2,
405                 UText              *text,
406                 UErrorCode         *status) {
407     RegularExpression *regexp = (RegularExpression*)regexp2;
408     if (validateRE(regexp, FALSE, status) == FALSE) {
409         return;
410     }
411     if (text == NULL) {
412         *status = U_ILLEGAL_ARGUMENT_ERROR;
413         return;
414     }
415 
416     if (regexp->fOwnsText && regexp->fText != NULL) {
417         uprv_free((void *)regexp->fText);
418     }
419 
420     regexp->fText       = NULL; // only fill it in on request
421     regexp->fTextLength = -1;
422     regexp->fOwnsText   = TRUE;
423     regexp->fMatcher->reset(text);
424 }
425 
426 
427 
428 //------------------------------------------------------------------------------
429 //
430 //    uregex_getText
431 //
432 //------------------------------------------------------------------------------
433 U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression * regexp2,int32_t * textLength,UErrorCode * status)434 uregex_getText(URegularExpression *regexp2,
435                int32_t            *textLength,
436                UErrorCode         *status)  {
437     RegularExpression *regexp = (RegularExpression*)regexp2;
438     if (validateRE(regexp, FALSE, status) == FALSE) {
439         return NULL;
440     }
441 
442     if (regexp->fText == NULL) {
443         // need to fill in the text
444         UText *inputText = regexp->fMatcher->inputText();
445         int64_t inputNativeLength = utext_nativeLength(inputText);
446         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
447             regexp->fText = inputText->chunkContents;
448             regexp->fTextLength = (int32_t)inputNativeLength;
449             regexp->fOwnsText = FALSE; // because the UText owns it
450         } else {
451             UErrorCode lengthStatus = U_ZERO_ERROR;
452             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
453             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
454 
455             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
456             regexp->fText = inputChars;
457             regexp->fOwnsText = TRUE; // should already be set but just in case
458         }
459     }
460 
461     if (textLength != NULL) {
462         *textLength = regexp->fTextLength;
463     }
464     return regexp->fText;
465 }
466 
467 
468 //------------------------------------------------------------------------------
469 //
470 //    uregex_getUText
471 //
472 //------------------------------------------------------------------------------
473 U_CAPI UText * U_EXPORT2
uregex_getUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)474 uregex_getUText(URegularExpression *regexp2,
475                 UText              *dest,
476                 UErrorCode         *status)  {
477     RegularExpression *regexp = (RegularExpression*)regexp2;
478     if (validateRE(regexp, FALSE, status) == FALSE) {
479         return dest;
480     }
481     return regexp->fMatcher->getInput(dest, *status);
482 }
483 
484 
485 //------------------------------------------------------------------------------
486 //
487 //    uregex_refreshUText
488 //
489 //------------------------------------------------------------------------------
490 U_CAPI void U_EXPORT2
uregex_refreshUText(URegularExpression * regexp2,UText * text,UErrorCode * status)491 uregex_refreshUText(URegularExpression *regexp2,
492                     UText              *text,
493                     UErrorCode         *status) {
494     RegularExpression *regexp = (RegularExpression*)regexp2;
495     if (validateRE(regexp, FALSE, status) == FALSE) {
496         return;
497     }
498     regexp->fMatcher->refreshInputText(text, *status);
499 }
500 
501 
502 //------------------------------------------------------------------------------
503 //
504 //    uregex_matches
505 //
506 //------------------------------------------------------------------------------
507 U_CAPI UBool U_EXPORT2
uregex_matches(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)508 uregex_matches(URegularExpression *regexp2,
509                int32_t            startIndex,
510                UErrorCode        *status)  {
511     return uregex_matches64( regexp2, (int64_t)startIndex, status);
512 }
513 
514 U_CAPI UBool U_EXPORT2
uregex_matches64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)515 uregex_matches64(URegularExpression *regexp2,
516                  int64_t            startIndex,
517                  UErrorCode        *status)  {
518     RegularExpression *regexp = (RegularExpression*)regexp2;
519     UBool result = FALSE;
520     if (validateRE(regexp, TRUE, status) == FALSE) {
521         return result;
522     }
523     if (startIndex == -1) {
524         result = regexp->fMatcher->matches(*status);
525     } else {
526         result = regexp->fMatcher->matches(startIndex, *status);
527     }
528     return result;
529 }
530 
531 
532 //------------------------------------------------------------------------------
533 //
534 //    uregex_lookingAt
535 //
536 //------------------------------------------------------------------------------
537 U_CAPI UBool U_EXPORT2
uregex_lookingAt(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)538 uregex_lookingAt(URegularExpression *regexp2,
539                  int32_t             startIndex,
540                  UErrorCode         *status)  {
541     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
542 }
543 
544 U_CAPI UBool U_EXPORT2
uregex_lookingAt64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)545 uregex_lookingAt64(URegularExpression *regexp2,
546                    int64_t             startIndex,
547                    UErrorCode         *status)  {
548     RegularExpression *regexp = (RegularExpression*)regexp2;
549     UBool result = FALSE;
550     if (validateRE(regexp, TRUE, status) == FALSE) {
551         return result;
552     }
553     if (startIndex == -1) {
554         result = regexp->fMatcher->lookingAt(*status);
555     } else {
556         result = regexp->fMatcher->lookingAt(startIndex, *status);
557     }
558     return result;
559 }
560 
561 
562 
563 //------------------------------------------------------------------------------
564 //
565 //    uregex_find
566 //
567 //------------------------------------------------------------------------------
568 U_CAPI UBool U_EXPORT2
uregex_find(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)569 uregex_find(URegularExpression *regexp2,
570             int32_t             startIndex,
571             UErrorCode         *status)  {
572     return uregex_find64( regexp2, (int64_t)startIndex, status);
573 }
574 
575 U_CAPI UBool U_EXPORT2
uregex_find64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)576 uregex_find64(URegularExpression *regexp2,
577               int64_t             startIndex,
578               UErrorCode         *status)  {
579     RegularExpression *regexp = (RegularExpression*)regexp2;
580     UBool result = FALSE;
581     if (validateRE(regexp, TRUE, status) == FALSE) {
582         return result;
583     }
584     if (startIndex == -1) {
585         regexp->fMatcher->resetPreserveRegion();
586         result = regexp->fMatcher->find();
587     } else {
588         result = regexp->fMatcher->find(startIndex, *status);
589     }
590     return result;
591 }
592 
593 
594 //------------------------------------------------------------------------------
595 //
596 //    uregex_findNext
597 //
598 //------------------------------------------------------------------------------
599 U_CAPI UBool U_EXPORT2
uregex_findNext(URegularExpression * regexp2,UErrorCode * status)600 uregex_findNext(URegularExpression *regexp2,
601                 UErrorCode         *status)  {
602     RegularExpression *regexp = (RegularExpression*)regexp2;
603     if (validateRE(regexp, TRUE, status) == FALSE) {
604         return FALSE;
605     }
606     UBool result = regexp->fMatcher->find();
607     return result;
608 }
609 
610 //------------------------------------------------------------------------------
611 //
612 //    uregex_groupCount
613 //
614 //------------------------------------------------------------------------------
615 U_CAPI int32_t U_EXPORT2
uregex_groupCount(URegularExpression * regexp2,UErrorCode * status)616 uregex_groupCount(URegularExpression *regexp2,
617                   UErrorCode         *status)  {
618     RegularExpression *regexp = (RegularExpression*)regexp2;
619     if (validateRE(regexp, FALSE, status) == FALSE) {
620         return 0;
621     }
622     int32_t  result = regexp->fMatcher->groupCount();
623     return result;
624 }
625 
626 
627 //------------------------------------------------------------------------------
628 //
629 //    uregex_group
630 //
631 //------------------------------------------------------------------------------
632 U_CAPI int32_t U_EXPORT2
uregex_group(URegularExpression * regexp2,int32_t groupNum,UChar * dest,int32_t destCapacity,UErrorCode * status)633 uregex_group(URegularExpression *regexp2,
634              int32_t             groupNum,
635              UChar              *dest,
636              int32_t             destCapacity,
637              UErrorCode          *status)  {
638     RegularExpression *regexp = (RegularExpression*)regexp2;
639     if (validateRE(regexp, TRUE, status) == FALSE) {
640         return 0;
641     }
642     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
643         *status = U_ILLEGAL_ARGUMENT_ERROR;
644         return 0;
645     }
646 
647     if (destCapacity == 0 || regexp->fText != NULL) {
648         // If preflighting or if we already have the text as UChars,
649         // this is a little cheaper than going through uregex_groupUTextDeep()
650 
651         //
652         // Pick up the range of characters from the matcher
653         //
654         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
655         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
656         if (U_FAILURE(*status)) {
657             return 0;
658         }
659 
660         //
661         // Trim length based on buffer capacity
662         //
663         int32_t fullLength = endIx - startIx;
664         int32_t copyLength = fullLength;
665         if (copyLength < destCapacity) {
666             dest[copyLength] = 0;
667         } else if (copyLength == destCapacity) {
668             *status = U_STRING_NOT_TERMINATED_WARNING;
669         } else {
670             copyLength = destCapacity;
671             *status = U_BUFFER_OVERFLOW_ERROR;
672         }
673 
674         //
675         // Copy capture group to user's buffer
676         //
677         if (copyLength > 0) {
678             u_memcpy(dest, &regexp->fText[startIx], copyLength);
679         }
680         return fullLength;
681     } else {
682         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
683         int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
684         utext_close(groupText);
685         return result;
686     }
687 }
688 
689 
690 //------------------------------------------------------------------------------
691 //
692 //    uregex_groupUText
693 //
694 //------------------------------------------------------------------------------
695 U_CAPI UText * U_EXPORT2
uregex_groupUText(URegularExpression * regexp2,int32_t groupNum,UText * dest,int64_t * groupLength,UErrorCode * status)696 uregex_groupUText(URegularExpression *regexp2,
697                   int32_t             groupNum,
698                   UText              *dest,
699                   int64_t            *groupLength,
700                   UErrorCode         *status)  {
701     RegularExpression *regexp = (RegularExpression*)regexp2;
702     if (validateRE(regexp, TRUE, status) == FALSE) {
703         UErrorCode emptyTextStatus = U_ZERO_ERROR;
704         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
705     }
706 
707     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
708 }
709 
710 //------------------------------------------------------------------------------
711 //
712 //    uregex_groupUTextDeep
713 //
714 //------------------------------------------------------------------------------
715 U_CAPI UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression * regexp2,int32_t groupNum,UText * dest,UErrorCode * status)716 uregex_groupUTextDeep(URegularExpression *regexp2,
717                   int32_t             groupNum,
718                   UText              *dest,
719                   UErrorCode         *status)  {
720     RegularExpression *regexp = (RegularExpression*)regexp2;
721     if (validateRE(regexp, TRUE, status) == FALSE) {
722         UErrorCode emptyTextStatus = U_ZERO_ERROR;
723         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
724     }
725 
726     if (regexp->fText != NULL) {
727         //
728         // Pick up the range of characters from the matcher
729         // and use our already-extracted characters
730         //
731         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
732         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
733         if (U_FAILURE(*status)) {
734             UErrorCode emptyTextStatus = U_ZERO_ERROR;
735             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
736         }
737 
738         if (dest) {
739             utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
740         } else {
741             UText groupText = UTEXT_INITIALIZER;
742             utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
743             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
744             utext_close(&groupText);
745         }
746 
747         return dest;
748     } else {
749         return regexp->fMatcher->group(groupNum, dest, *status);
750     }
751 }
752 
753 //------------------------------------------------------------------------------
754 //
755 //    uregex_start
756 //
757 //------------------------------------------------------------------------------
758 U_CAPI int32_t U_EXPORT2
uregex_start(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)759 uregex_start(URegularExpression *regexp2,
760              int32_t             groupNum,
761              UErrorCode          *status)  {
762     return (int32_t)uregex_start64( regexp2, groupNum, status);
763 }
764 
765 U_CAPI int64_t U_EXPORT2
uregex_start64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)766 uregex_start64(URegularExpression *regexp2,
767                int32_t             groupNum,
768                UErrorCode          *status)  {
769     RegularExpression *regexp = (RegularExpression*)regexp2;
770     if (validateRE(regexp, TRUE, status) == FALSE) {
771         return 0;
772     }
773     int32_t result = regexp->fMatcher->start(groupNum, *status);
774     return result;
775 }
776 
777 //------------------------------------------------------------------------------
778 //
779 //    uregex_end
780 //
781 //------------------------------------------------------------------------------
782 U_CAPI int32_t U_EXPORT2
uregex_end(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)783 uregex_end(URegularExpression   *regexp2,
784            int32_t               groupNum,
785            UErrorCode           *status)  {
786     return (int32_t)uregex_end64( regexp2, groupNum, status);
787 }
788 
789 U_CAPI int64_t U_EXPORT2
uregex_end64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)790 uregex_end64(URegularExpression   *regexp2,
791              int32_t               groupNum,
792              UErrorCode           *status)  {
793     RegularExpression *regexp = (RegularExpression*)regexp2;
794     if (validateRE(regexp, TRUE, status) == FALSE) {
795         return 0;
796     }
797     int32_t result = regexp->fMatcher->end(groupNum, *status);
798     return result;
799 }
800 
801 //------------------------------------------------------------------------------
802 //
803 //    uregex_reset
804 //
805 //------------------------------------------------------------------------------
806 U_CAPI void U_EXPORT2
uregex_reset(URegularExpression * regexp2,int32_t index,UErrorCode * status)807 uregex_reset(URegularExpression    *regexp2,
808              int32_t               index,
809              UErrorCode            *status)  {
810     uregex_reset64( regexp2, (int64_t)index, status);
811 }
812 
813 U_CAPI void U_EXPORT2
uregex_reset64(URegularExpression * regexp2,int64_t index,UErrorCode * status)814 uregex_reset64(URegularExpression    *regexp2,
815                int64_t               index,
816                UErrorCode            *status)  {
817     RegularExpression *regexp = (RegularExpression*)regexp2;
818     if (validateRE(regexp, TRUE, status) == FALSE) {
819         return;
820     }
821     regexp->fMatcher->reset(index, *status);
822 }
823 
824 
825 //------------------------------------------------------------------------------
826 //
827 //    uregex_setRegion
828 //
829 //------------------------------------------------------------------------------
830 U_CAPI void U_EXPORT2
uregex_setRegion(URegularExpression * regexp2,int32_t regionStart,int32_t regionLimit,UErrorCode * status)831 uregex_setRegion(URegularExpression   *regexp2,
832                  int32_t               regionStart,
833                  int32_t               regionLimit,
834                  UErrorCode           *status)  {
835     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
836 }
837 
838 U_CAPI void U_EXPORT2
uregex_setRegion64(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,UErrorCode * status)839 uregex_setRegion64(URegularExpression   *regexp2,
840                    int64_t               regionStart,
841                    int64_t               regionLimit,
842                    UErrorCode           *status)  {
843     RegularExpression *regexp = (RegularExpression*)regexp2;
844     if (validateRE(regexp, TRUE, status) == FALSE) {
845         return;
846     }
847     regexp->fMatcher->region(regionStart, regionLimit, *status);
848 }
849 
850 
851 //------------------------------------------------------------------------------
852 //
853 //    uregex_setRegionAndStart
854 //
855 //------------------------------------------------------------------------------
856 U_DRAFT void U_EXPORT2
uregex_setRegionAndStart(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,int64_t startIndex,UErrorCode * status)857 uregex_setRegionAndStart(URegularExpression   *regexp2,
858                  int64_t               regionStart,
859                  int64_t               regionLimit,
860                  int64_t               startIndex,
861                  UErrorCode           *status)  {
862     RegularExpression *regexp = (RegularExpression*)regexp2;
863     if (validateRE(regexp, TRUE, status) == FALSE) {
864         return;
865     }
866     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
867 }
868 
869 //------------------------------------------------------------------------------
870 //
871 //    uregex_regionStart
872 //
873 //------------------------------------------------------------------------------
874 U_CAPI int32_t U_EXPORT2
uregex_regionStart(const URegularExpression * regexp2,UErrorCode * status)875 uregex_regionStart(const  URegularExpression   *regexp2,
876                           UErrorCode           *status)  {
877     return (int32_t)uregex_regionStart64(regexp2, status);
878 }
879 
880 U_CAPI int64_t U_EXPORT2
uregex_regionStart64(const URegularExpression * regexp2,UErrorCode * status)881 uregex_regionStart64(const  URegularExpression   *regexp2,
882                             UErrorCode           *status)  {
883     RegularExpression *regexp = (RegularExpression*)regexp2;
884     if (validateRE(regexp, TRUE, status) == FALSE) {
885         return 0;
886     }
887     return regexp->fMatcher->regionStart();
888 }
889 
890 
891 //------------------------------------------------------------------------------
892 //
893 //    uregex_regionEnd
894 //
895 //------------------------------------------------------------------------------
896 U_CAPI int32_t U_EXPORT2
uregex_regionEnd(const URegularExpression * regexp2,UErrorCode * status)897 uregex_regionEnd(const  URegularExpression   *regexp2,
898                         UErrorCode           *status)  {
899     return (int32_t)uregex_regionEnd64(regexp2, status);
900 }
901 
902 U_CAPI int64_t U_EXPORT2
uregex_regionEnd64(const URegularExpression * regexp2,UErrorCode * status)903 uregex_regionEnd64(const  URegularExpression   *regexp2,
904                           UErrorCode           *status)  {
905     RegularExpression *regexp = (RegularExpression*)regexp2;
906     if (validateRE(regexp, TRUE, status) == FALSE) {
907         return 0;
908     }
909     return regexp->fMatcher->regionEnd();
910 }
911 
912 
913 //------------------------------------------------------------------------------
914 //
915 //    uregex_hasTransparentBounds
916 //
917 //------------------------------------------------------------------------------
918 U_CAPI UBool U_EXPORT2
uregex_hasTransparentBounds(const URegularExpression * regexp2,UErrorCode * status)919 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
920                                    UErrorCode           *status)  {
921     RegularExpression *regexp = (RegularExpression*)regexp2;
922     if (validateRE(regexp, FALSE, status) == FALSE) {
923         return FALSE;
924     }
925     return regexp->fMatcher->hasTransparentBounds();
926 }
927 
928 
929 //------------------------------------------------------------------------------
930 //
931 //    uregex_useTransparentBounds
932 //
933 //------------------------------------------------------------------------------
934 U_CAPI void U_EXPORT2
uregex_useTransparentBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)935 uregex_useTransparentBounds(URegularExpression    *regexp2,
936                             UBool                  b,
937                             UErrorCode            *status)  {
938     RegularExpression *regexp = (RegularExpression*)regexp2;
939     if (validateRE(regexp, FALSE, status) == FALSE) {
940         return;
941     }
942     regexp->fMatcher->useTransparentBounds(b);
943 }
944 
945 
946 //------------------------------------------------------------------------------
947 //
948 //    uregex_hasAnchoringBounds
949 //
950 //------------------------------------------------------------------------------
951 U_CAPI UBool U_EXPORT2
uregex_hasAnchoringBounds(const URegularExpression * regexp2,UErrorCode * status)952 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
953                                  UErrorCode           *status)  {
954     RegularExpression *regexp = (RegularExpression*)regexp2;
955     if (validateRE(regexp, FALSE, status) == FALSE) {
956         return FALSE;
957     }
958     return regexp->fMatcher->hasAnchoringBounds();
959 }
960 
961 
962 //------------------------------------------------------------------------------
963 //
964 //    uregex_useAnchoringBounds
965 //
966 //------------------------------------------------------------------------------
967 U_CAPI void U_EXPORT2
uregex_useAnchoringBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)968 uregex_useAnchoringBounds(URegularExpression    *regexp2,
969                           UBool                  b,
970                           UErrorCode            *status)  {
971     RegularExpression *regexp = (RegularExpression*)regexp2;
972     if (validateRE(regexp, FALSE, status) == FALSE) {
973         return;
974     }
975     regexp->fMatcher->useAnchoringBounds(b);
976 }
977 
978 
979 //------------------------------------------------------------------------------
980 //
981 //    uregex_hitEnd
982 //
983 //------------------------------------------------------------------------------
984 U_CAPI UBool U_EXPORT2
uregex_hitEnd(const URegularExpression * regexp2,UErrorCode * status)985 uregex_hitEnd(const  URegularExpression   *regexp2,
986                      UErrorCode           *status)  {
987     RegularExpression *regexp = (RegularExpression*)regexp2;
988     if (validateRE(regexp, TRUE, status) == FALSE) {
989         return FALSE;
990     }
991     return regexp->fMatcher->hitEnd();
992 }
993 
994 
995 //------------------------------------------------------------------------------
996 //
997 //    uregex_requireEnd
998 //
999 //------------------------------------------------------------------------------
1000 U_CAPI UBool U_EXPORT2
uregex_requireEnd(const URegularExpression * regexp2,UErrorCode * status)1001 uregex_requireEnd(const  URegularExpression   *regexp2,
1002                          UErrorCode           *status)  {
1003     RegularExpression *regexp = (RegularExpression*)regexp2;
1004     if (validateRE(regexp, TRUE, status) == FALSE) {
1005         return FALSE;
1006     }
1007     return regexp->fMatcher->requireEnd();
1008 }
1009 
1010 
1011 //------------------------------------------------------------------------------
1012 //
1013 //    uregex_setTimeLimit
1014 //
1015 //------------------------------------------------------------------------------
1016 U_CAPI void U_EXPORT2
uregex_setTimeLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1017 uregex_setTimeLimit(URegularExpression   *regexp2,
1018                     int32_t               limit,
1019                     UErrorCode           *status) {
1020     RegularExpression *regexp = (RegularExpression*)regexp2;
1021     if (validateRE(regexp, FALSE, status)) {
1022         regexp->fMatcher->setTimeLimit(limit, *status);
1023     }
1024 }
1025 
1026 
1027 
1028 //------------------------------------------------------------------------------
1029 //
1030 //    uregex_getTimeLimit
1031 //
1032 //------------------------------------------------------------------------------
1033 U_CAPI int32_t U_EXPORT2
uregex_getTimeLimit(const URegularExpression * regexp2,UErrorCode * status)1034 uregex_getTimeLimit(const  URegularExpression   *regexp2,
1035                            UErrorCode           *status) {
1036     int32_t retVal = 0;
1037     RegularExpression *regexp = (RegularExpression*)regexp2;
1038     if (validateRE(regexp, FALSE, status)) {
1039         retVal = regexp->fMatcher->getTimeLimit();
1040     }
1041     return retVal;
1042 }
1043 
1044 
1045 
1046 //------------------------------------------------------------------------------
1047 //
1048 //    uregex_setStackLimit
1049 //
1050 //------------------------------------------------------------------------------
1051 U_CAPI void U_EXPORT2
uregex_setStackLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1052 uregex_setStackLimit(URegularExpression   *regexp2,
1053                      int32_t               limit,
1054                      UErrorCode           *status) {
1055     RegularExpression *regexp = (RegularExpression*)regexp2;
1056     if (validateRE(regexp, FALSE, status)) {
1057         regexp->fMatcher->setStackLimit(limit, *status);
1058     }
1059 }
1060 
1061 
1062 
1063 //------------------------------------------------------------------------------
1064 //
1065 //    uregex_getStackLimit
1066 //
1067 //------------------------------------------------------------------------------
1068 U_CAPI int32_t U_EXPORT2
uregex_getStackLimit(const URegularExpression * regexp2,UErrorCode * status)1069 uregex_getStackLimit(const  URegularExpression   *regexp2,
1070                             UErrorCode           *status) {
1071     int32_t retVal = 0;
1072     RegularExpression *regexp = (RegularExpression*)regexp2;
1073     if (validateRE(regexp, FALSE, status)) {
1074         retVal = regexp->fMatcher->getStackLimit();
1075     }
1076     return retVal;
1077 }
1078 
1079 
1080 //------------------------------------------------------------------------------
1081 //
1082 //    uregex_setMatchCallback
1083 //
1084 //------------------------------------------------------------------------------
1085 U_CAPI void U_EXPORT2
uregex_setMatchCallback(URegularExpression * regexp2,URegexMatchCallback * callback,const void * context,UErrorCode * status)1086 uregex_setMatchCallback(URegularExpression      *regexp2,
1087                         URegexMatchCallback     *callback,
1088                         const void              *context,
1089                         UErrorCode              *status) {
1090     RegularExpression *regexp = (RegularExpression*)regexp2;
1091     if (validateRE(regexp, FALSE, status)) {
1092         regexp->fMatcher->setMatchCallback(callback, context, *status);
1093     }
1094 }
1095 
1096 
1097 //------------------------------------------------------------------------------
1098 //
1099 //    uregex_getMatchCallback
1100 //
1101 //------------------------------------------------------------------------------
1102 U_CAPI void U_EXPORT2
uregex_getMatchCallback(const URegularExpression * regexp2,URegexMatchCallback ** callback,const void ** context,UErrorCode * status)1103 uregex_getMatchCallback(const URegularExpression    *regexp2,
1104                         URegexMatchCallback        **callback,
1105                         const void                 **context,
1106                         UErrorCode                  *status) {
1107     RegularExpression *regexp = (RegularExpression*)regexp2;
1108      if (validateRE(regexp, FALSE, status)) {
1109          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1110      }
1111 }
1112 
1113 
1114 //------------------------------------------------------------------------------
1115 //
1116 //    uregex_setMatchProgressCallback
1117 //
1118 //------------------------------------------------------------------------------
1119 U_CAPI void U_EXPORT2
uregex_setFindProgressCallback(URegularExpression * regexp2,URegexFindProgressCallback * callback,const void * context,UErrorCode * status)1120 uregex_setFindProgressCallback(URegularExpression              *regexp2,
1121                                 URegexFindProgressCallback      *callback,
1122                                 const void                      *context,
1123                                 UErrorCode                      *status) {
1124     RegularExpression *regexp = (RegularExpression*)regexp2;
1125     if (validateRE(regexp, FALSE, status)) {
1126         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1127     }
1128 }
1129 
1130 
1131 //------------------------------------------------------------------------------
1132 //
1133 //    uregex_getMatchCallback
1134 //
1135 //------------------------------------------------------------------------------
1136 U_CAPI void U_EXPORT2
uregex_getFindProgressCallback(const URegularExpression * regexp2,URegexFindProgressCallback ** callback,const void ** context,UErrorCode * status)1137 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
1138                                 URegexFindProgressCallback        **callback,
1139                                 const void                        **context,
1140                                 UErrorCode                        *status) {
1141     RegularExpression *regexp = (RegularExpression*)regexp2;
1142      if (validateRE(regexp, FALSE, status)) {
1143          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1144      }
1145 }
1146 
1147 
1148 //------------------------------------------------------------------------------
1149 //
1150 //    uregex_replaceAll
1151 //
1152 //------------------------------------------------------------------------------
1153 U_CAPI int32_t U_EXPORT2
uregex_replaceAll(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1154 uregex_replaceAll(URegularExpression    *regexp2,
1155                   const UChar           *replacementText,
1156                   int32_t                replacementLength,
1157                   UChar                 *destBuf,
1158                   int32_t                destCapacity,
1159                   UErrorCode            *status)  {
1160     RegularExpression *regexp = (RegularExpression*)regexp2;
1161     if (validateRE(regexp, TRUE, status) == FALSE) {
1162         return 0;
1163     }
1164     if (replacementText == NULL || replacementLength < -1 ||
1165         (destBuf == NULL && destCapacity > 0) ||
1166         destCapacity < 0) {
1167         *status = U_ILLEGAL_ARGUMENT_ERROR;
1168         return 0;
1169     }
1170 
1171     int32_t   len = 0;
1172 
1173     uregex_reset(regexp2, 0, status);
1174 
1175     // Note: Seperate error code variables for findNext() and appendReplacement()
1176     //       are used so that destination buffer overflow errors
1177     //       in appendReplacement won't stop findNext() from working.
1178     //       appendReplacement() and appendTail() special case incoming buffer
1179     //       overflow errors, continuing to return the correct length.
1180     UErrorCode  findStatus = *status;
1181     while (uregex_findNext(regexp2, &findStatus)) {
1182         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
1183                                         &destBuf, &destCapacity, status);
1184     }
1185     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1186 
1187     if (U_FAILURE(findStatus)) {
1188         // If anything went wrong with the findNext(), make that error trump
1189         //   whatever may have happened with the append() operations.
1190         //   Errors in findNext() are not expected.
1191         *status = findStatus;
1192     }
1193 
1194     return len;
1195 }
1196 
1197 
1198 //------------------------------------------------------------------------------
1199 //
1200 //    uregex_replaceAllUText
1201 //
1202 //------------------------------------------------------------------------------
1203 U_CAPI UText * U_EXPORT2
uregex_replaceAllUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1204 uregex_replaceAllUText(URegularExpression    *regexp2,
1205                        UText                 *replacementText,
1206                        UText                 *dest,
1207                        UErrorCode            *status)  {
1208     RegularExpression *regexp = (RegularExpression*)regexp2;
1209     if (validateRE(regexp, TRUE, status) == FALSE) {
1210         return 0;
1211     }
1212     if (replacementText == NULL) {
1213         *status = U_ILLEGAL_ARGUMENT_ERROR;
1214         return 0;
1215     }
1216 
1217     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1218     return dest;
1219 }
1220 
1221 
1222 //------------------------------------------------------------------------------
1223 //
1224 //    uregex_replaceFirst
1225 //
1226 //------------------------------------------------------------------------------
1227 U_CAPI int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1228 uregex_replaceFirst(URegularExpression  *regexp2,
1229                     const UChar         *replacementText,
1230                     int32_t              replacementLength,
1231                     UChar               *destBuf,
1232                     int32_t              destCapacity,
1233                     UErrorCode          *status)  {
1234     RegularExpression *regexp = (RegularExpression*)regexp2;
1235     if (validateRE(regexp, TRUE, status) == FALSE) {
1236         return 0;
1237     }
1238     if (replacementText == NULL || replacementLength < -1 ||
1239         (destBuf == NULL && destCapacity > 0) ||
1240         destCapacity < 0) {
1241         *status = U_ILLEGAL_ARGUMENT_ERROR;
1242         return 0;
1243     }
1244 
1245     int32_t   len = 0;
1246     UBool     findSucceeded;
1247     uregex_reset(regexp2, 0, status);
1248     findSucceeded = uregex_find(regexp2, 0, status);
1249     if (findSucceeded) {
1250         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
1251                                        &destBuf, &destCapacity, status);
1252     }
1253     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1254 
1255     return len;
1256 }
1257 
1258 
1259 //------------------------------------------------------------------------------
1260 //
1261 //    uregex_replaceFirstUText
1262 //
1263 //------------------------------------------------------------------------------
1264 U_CAPI UText * U_EXPORT2
uregex_replaceFirstUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1265 uregex_replaceFirstUText(URegularExpression  *regexp2,
1266                          UText                 *replacementText,
1267                          UText                 *dest,
1268                          UErrorCode            *status)  {
1269     RegularExpression *regexp = (RegularExpression*)regexp2;
1270     if (validateRE(regexp, TRUE, status) == FALSE) {
1271         return 0;
1272     }
1273     if (replacementText == NULL) {
1274         *status = U_ILLEGAL_ARGUMENT_ERROR;
1275         return 0;
1276     }
1277 
1278     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1279     return dest;
1280 }
1281 
1282 
1283 //------------------------------------------------------------------------------
1284 //
1285 //    uregex_appendReplacement
1286 //
1287 //------------------------------------------------------------------------------
1288 
1289 U_NAMESPACE_BEGIN
1290 //
1291 //  Dummy class, because these functions need to be friends of class RegexMatcher,
1292 //               and stand-alone C functions don't work as friends
1293 //
1294 class RegexCImpl {
1295  public:
1296    inline static  int32_t appendReplacement(RegularExpression    *regexp,
1297                       const UChar           *replacementText,
1298                       int32_t                replacementLength,
1299                       UChar                **destBuf,
1300                       int32_t               *destCapacity,
1301                       UErrorCode            *status);
1302 
1303    inline static int32_t appendTail(RegularExpression    *regexp,
1304         UChar                **destBuf,
1305         int32_t               *destCapacity,
1306         UErrorCode            *status);
1307 
1308     inline static int32_t split(RegularExpression    *regexp,
1309         UChar                 *destBuf,
1310         int32_t                destCapacity,
1311         int32_t               *requiredCapacity,
1312         UChar                 *destFields[],
1313         int32_t                destFieldsCapacity,
1314         UErrorCode            *status);
1315 };
1316 
1317 U_NAMESPACE_END
1318 
1319 
1320 
1321 static const UChar BACKSLASH  = 0x5c;
1322 static const UChar DOLLARSIGN = 0x24;
1323 
1324 //
1325 //  Move a character to an output buffer, with bounds checking on the index.
1326 //      Index advances even if capacity is exceeded, for preflight size computations.
1327 //      This little sequence is used a LOT.
1328 //
appendToBuf(UChar c,int32_t * idx,UChar * buf,int32_t bufCapacity)1329 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
1330     if (*idx < bufCapacity) {
1331         buf[*idx] = c;
1332     }
1333     (*idx)++;
1334 }
1335 
1336 
1337 //
1338 //  appendReplacement, the actual implementation.
1339 //
appendReplacement(RegularExpression * regexp,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1340 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
1341                                       const UChar           *replacementText,
1342                                       int32_t                replacementLength,
1343                                       UChar                **destBuf,
1344                                       int32_t               *destCapacity,
1345                                       UErrorCode            *status)  {
1346 
1347     // If we come in with a buffer overflow error, don't suppress the operation.
1348     //  A series of appendReplacements, appendTail need to correctly preflight
1349     //  the buffer size when an overflow happens somewhere in the middle.
1350     UBool pendingBufferOverflow = FALSE;
1351     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1352         pendingBufferOverflow = TRUE;
1353         *status = U_ZERO_ERROR;
1354     }
1355 
1356     //
1357     // Validate all paramters
1358     //
1359     if (validateRE(regexp, TRUE, status) == FALSE) {
1360         return 0;
1361     }
1362     if (replacementText == NULL || replacementLength < -1 ||
1363         destCapacity == NULL || destBuf == NULL ||
1364         (*destBuf == NULL && *destCapacity > 0) ||
1365         *destCapacity < 0) {
1366         *status = U_ILLEGAL_ARGUMENT_ERROR;
1367         return 0;
1368     }
1369 
1370     RegexMatcher *m = regexp->fMatcher;
1371     if (m->fMatch == FALSE) {
1372         *status = U_REGEX_INVALID_STATE;
1373         return 0;
1374     }
1375 
1376     UChar    *dest             = *destBuf;
1377     int32_t   capacity         = *destCapacity;
1378     int32_t   destIdx          =  0;
1379     int32_t   i;
1380 
1381     // If it wasn't supplied by the caller,  get the length of the replacement text.
1382     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
1383     //          the fly and avoid this step.
1384     if (replacementLength == -1) {
1385         replacementLength = u_strlen(replacementText);
1386     }
1387 
1388     // Copy input string from the end of previous match to start of current match
1389     if (regexp->fText != NULL) {
1390         int32_t matchStart;
1391         int32_t lastMatchEnd;
1392         if (UTEXT_USES_U16(m->fInputText)) {
1393             lastMatchEnd = (int32_t)m->fLastMatchEnd;
1394             matchStart = (int32_t)m->fMatchStart;
1395         } else {
1396             // !!!: Would like a better way to do this!
1397             UErrorCode status = U_ZERO_ERROR;
1398             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
1399             status = U_ZERO_ERROR;
1400             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
1401         }
1402         for (i=lastMatchEnd; i<matchStart; i++) {
1403             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
1404         }
1405     } else {
1406         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1407         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
1408                                  &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), &possibleOverflowError);
1409     }
1410 
1411 
1412     // scan the replacement text, looking for substitutions ($n) and \escapes.
1413     int32_t  replIdx = 0;
1414     while (replIdx < replacementLength) {
1415         UChar  c = replacementText[replIdx];
1416         replIdx++;
1417         if (c != DOLLARSIGN && c != BACKSLASH) {
1418             // Common case, no substitution, no escaping,
1419             //  just copy the char to the dest buf.
1420             appendToBuf(c, &destIdx, dest, capacity);
1421             continue;
1422         }
1423 
1424         if (c == BACKSLASH) {
1425             // Backslash Escape.  Copy the following char out without further checks.
1426             //                    Note:  Surrogate pairs don't need any special handling
1427             //                           The second half wont be a '$' or a '\', and
1428             //                           will move to the dest normally on the next
1429             //                           loop iteration.
1430             if (replIdx >= replacementLength) {
1431                 break;
1432             }
1433             c = replacementText[replIdx];
1434 
1435             if (c==0x55/*U*/ || c==0x75/*u*/) {
1436                 // We have a \udddd or \Udddddddd escape sequence.
1437                 UChar32 escapedChar =
1438                     u_unescapeAt(uregex_ucstr_unescape_charAt,
1439                        &replIdx,                   // Index is updated by unescapeAt
1440                        replacementLength,          // Length of replacement text
1441                        (void *)replacementText);
1442 
1443                 if (escapedChar != (UChar32)0xFFFFFFFF) {
1444                     if (escapedChar <= 0xffff) {
1445                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
1446                     } else {
1447                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1448                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1449                     }
1450                     continue;
1451                 }
1452                 // Note:  if the \u escape was invalid, just fall through and
1453                 //        treat it as a plain \<anything> escape.
1454             }
1455 
1456             // Plain backslash escape.  Just put out the escaped character.
1457             appendToBuf(c, &destIdx, dest, capacity);
1458 
1459             replIdx++;
1460             continue;
1461         }
1462 
1463 
1464 
1465         // We've got a $.  Pick up a capture group number if one follows.
1466         // Consume at most the number of digits necessary for the largest capture
1467         // number that is valid for this pattern.
1468 
1469         int32_t numDigits = 0;
1470         int32_t groupNum  = 0;
1471         UChar32 digitC;
1472         for (;;) {
1473             if (replIdx >= replacementLength) {
1474                 break;
1475             }
1476             U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
1477             if (u_isdigit(digitC) == FALSE) {
1478                 break;
1479             }
1480 
1481             U16_FWD_1(replacementText, replIdx, replacementLength);
1482             groupNum=groupNum*10 + u_charDigitValue(digitC);
1483             numDigits++;
1484             if (numDigits >= m->fPattern->fMaxCaptureDigits) {
1485                 break;
1486             }
1487         }
1488 
1489 
1490         if (numDigits == 0) {
1491             // The $ didn't introduce a group number at all.
1492             // Treat it as just part of the substitution text.
1493             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
1494             continue;
1495         }
1496 
1497         // Finally, append the capture group data to the destination.
1498         destIdx += uregex_group((URegularExpression*)regexp, groupNum, &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
1499         if (*status == U_BUFFER_OVERFLOW_ERROR) {
1500             // Ignore buffer overflow when extracting the group.  We need to
1501             //   continue on to get full size of the untruncated result.  We will
1502             //   raise our own buffer overflow error at the end.
1503             *status = U_ZERO_ERROR;
1504         }
1505 
1506         if (U_FAILURE(*status)) {
1507             // Can fail if group number is out of range.
1508             break;
1509         }
1510 
1511     }
1512 
1513     //
1514     //  Nul Terminate the dest buffer if possible.
1515     //  Set the appropriate buffer overflow or not terminated error, if needed.
1516     //
1517     if (destIdx < capacity) {
1518         dest[destIdx] = 0;
1519     } else if (destIdx == *destCapacity) {
1520         *status = U_STRING_NOT_TERMINATED_WARNING;
1521     } else {
1522         *status = U_BUFFER_OVERFLOW_ERROR;
1523     }
1524 
1525     //
1526     // Return an updated dest buffer and capacity to the caller.
1527     //
1528     if (destIdx > 0 &&  *destCapacity > 0) {
1529         if (destIdx < capacity) {
1530             *destBuf      += destIdx;
1531             *destCapacity -= destIdx;
1532         } else {
1533             *destBuf      += capacity;
1534             *destCapacity =  0;
1535         }
1536     }
1537 
1538     // If we came in with a buffer overflow, make sure we go out with one also.
1539     //   (A zero length match right at the end of the previous match could
1540     //    make this function succeed even though a previous call had overflowed the buf)
1541     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1542         *status = U_BUFFER_OVERFLOW_ERROR;
1543     }
1544 
1545     return destIdx;
1546 }
1547 
1548 //
1549 //   appendReplacement   the actual API function,
1550 //
1551 U_CAPI int32_t U_EXPORT2
uregex_appendReplacement(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1552 uregex_appendReplacement(URegularExpression    *regexp2,
1553                          const UChar           *replacementText,
1554                          int32_t                replacementLength,
1555                          UChar                **destBuf,
1556                          int32_t               *destCapacity,
1557                          UErrorCode            *status) {
1558 
1559     RegularExpression *regexp = (RegularExpression*)regexp2;
1560     return RegexCImpl::appendReplacement(
1561         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1562 }
1563 
1564 //
1565 //   uregex_appendReplacementUText...can just use the normal C++ method
1566 //
1567 U_CAPI void U_EXPORT2
uregex_appendReplacementUText(URegularExpression * regexp2,UText * replText,UText * dest,UErrorCode * status)1568 uregex_appendReplacementUText(URegularExpression    *regexp2,
1569                               UText                 *replText,
1570                               UText                 *dest,
1571                               UErrorCode            *status)  {
1572     RegularExpression *regexp = (RegularExpression*)regexp2;
1573     regexp->fMatcher->appendReplacement(dest, replText, *status);
1574 }
1575 
1576 
1577 //------------------------------------------------------------------------------
1578 //
1579 //    uregex_appendTail
1580 //
1581 //------------------------------------------------------------------------------
appendTail(RegularExpression * regexp,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1582 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
1583                                UChar                **destBuf,
1584                                int32_t               *destCapacity,
1585                                UErrorCode            *status)
1586 {
1587 
1588     // If we come in with a buffer overflow error, don't suppress the operation.
1589     //  A series of appendReplacements, appendTail need to correctly preflight
1590     //  the buffer size when an overflow happens somewhere in the middle.
1591     UBool pendingBufferOverflow = FALSE;
1592     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1593         pendingBufferOverflow = TRUE;
1594         *status = U_ZERO_ERROR;
1595     }
1596 
1597     if (validateRE(regexp, TRUE, status) == FALSE) {
1598         return 0;
1599     }
1600 
1601     if (destCapacity == NULL || destBuf == NULL ||
1602         (*destBuf == NULL && *destCapacity > 0) ||
1603         *destCapacity < 0)
1604     {
1605         *status = U_ILLEGAL_ARGUMENT_ERROR;
1606         return 0;
1607     }
1608 
1609     RegexMatcher *m = regexp->fMatcher;
1610 
1611     int32_t  destIdx     = 0;
1612     int32_t  destCap     = *destCapacity;
1613     UChar    *dest       = *destBuf;
1614 
1615     if (regexp->fText != NULL) {
1616         int32_t srcIdx;
1617         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1618         if (nativeIdx == -1) {
1619             srcIdx = 0;
1620         } else if (UTEXT_USES_U16(m->fInputText)) {
1621             srcIdx = (int32_t)nativeIdx;
1622         } else {
1623             UErrorCode status = U_ZERO_ERROR;
1624             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
1625         }
1626 
1627         for (;;) {
1628             if (srcIdx == regexp->fTextLength) {
1629                 break;
1630             }
1631             UChar c = regexp->fText[srcIdx];
1632             if (c == 0 && regexp->fTextLength == -1) {
1633                 regexp->fTextLength = srcIdx;
1634                 break;
1635             }
1636             if (destIdx < destCap) {
1637                 dest[destIdx] = c;
1638             } else {
1639                 // We've overflowed the dest buffer.
1640                 //  If the total input string length is known, we can
1641                 //    compute the total buffer size needed without scanning through the string.
1642                 if (regexp->fTextLength > 0) {
1643                     destIdx += (regexp->fTextLength - srcIdx);
1644                     break;
1645                 }
1646             }
1647             srcIdx++;
1648             destIdx++;
1649         }
1650     } else {
1651         int64_t  srcIdx;
1652         if (m->fMatch) {
1653             // The most recent call to find() succeeded.
1654             srcIdx = m->fMatchEnd;
1655         } else {
1656             // The last call to find() on this matcher failed().
1657             //   Look back to the end of the last find() that succeeded for src index.
1658             srcIdx = m->fLastMatchEnd;
1659             if (srcIdx == -1)  {
1660                 // There has been no successful match with this matcher.
1661                 //   We want to copy the whole string.
1662                 srcIdx = 0;
1663             }
1664         }
1665 
1666         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
1667     }
1668 
1669     //
1670     //  NUL terminate the output string, if possible, otherwise issue the
1671     //   appropriate error or warning.
1672     //
1673     if (destIdx < destCap) {
1674         dest[destIdx] = 0;
1675     } else  if (destIdx == destCap) {
1676         *status = U_STRING_NOT_TERMINATED_WARNING;
1677     } else {
1678         *status = U_BUFFER_OVERFLOW_ERROR;
1679     }
1680 
1681     //
1682     // Update the user's buffer ptr and capacity vars to reflect the
1683     //   amount used.
1684     //
1685     if (destIdx < destCap) {
1686         *destBuf      += destIdx;
1687         *destCapacity -= destIdx;
1688     } else {
1689         *destBuf      += destCap;
1690         *destCapacity  = 0;
1691     }
1692 
1693     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1694         *status = U_BUFFER_OVERFLOW_ERROR;
1695     }
1696 
1697     return destIdx;
1698 }
1699 
1700 
1701 //
1702 //   appendTail   the actual API function
1703 //
1704 U_CAPI int32_t U_EXPORT2
uregex_appendTail(URegularExpression * regexp2,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1705 uregex_appendTail(URegularExpression    *regexp2,
1706                   UChar                **destBuf,
1707                   int32_t               *destCapacity,
1708                   UErrorCode            *status)  {
1709     RegularExpression *regexp = (RegularExpression*)regexp2;
1710     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1711 }
1712 
1713 
1714 //
1715 //   uregex_appendTailUText...can just use the normal C++ method
1716 //
1717 U_CAPI UText * U_EXPORT2
uregex_appendTailUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)1718 uregex_appendTailUText(URegularExpression    *regexp2,
1719                        UText                 *dest,
1720                        UErrorCode            *status)  {
1721     RegularExpression *regexp = (RegularExpression*)regexp2;
1722     return regexp->fMatcher->appendTail(dest, *status);
1723 }
1724 
1725 
1726 //------------------------------------------------------------------------------
1727 //
1728 //    copyString     Internal utility to copy a string to an output buffer,
1729 //                   while managing buffer overflow and preflight size
1730 //                   computation.  NUL termination is added to destination,
1731 //                   and the NUL is counted in the output size.
1732 //
1733 //------------------------------------------------------------------------------
1734 #if 0
1735 static void copyString(UChar        *destBuffer,    //  Destination buffer.
1736                        int32_t       destCapacity,  //  Total capacity of dest buffer
1737                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
1738                                                     //    Update not clipped to destCapacity.
1739                        const UChar  *srcPtr,        //  Pointer to source string
1740                        int32_t       srcLen)        //  Source string len.
1741 {
1742     int32_t  si;
1743     int32_t  di = *destIndex;
1744     UChar    c;
1745 
1746     for (si=0; si<srcLen;  si++) {
1747         c = srcPtr[si];
1748         if (di < destCapacity) {
1749             destBuffer[di] = c;
1750             di++;
1751         } else {
1752             di += srcLen - si;
1753             break;
1754         }
1755     }
1756     if (di<destCapacity) {
1757         destBuffer[di] = 0;
1758     }
1759     di++;
1760     *destIndex = di;
1761 }
1762 #endif
1763 
1764 //------------------------------------------------------------------------------
1765 //
1766 //    uregex_split
1767 //
1768 //------------------------------------------------------------------------------
split(RegularExpression * regexp,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1769 int32_t RegexCImpl::split(RegularExpression     *regexp,
1770                           UChar                 *destBuf,
1771                           int32_t                destCapacity,
1772                           int32_t               *requiredCapacity,
1773                           UChar                 *destFields[],
1774                           int32_t                destFieldsCapacity,
1775                           UErrorCode            *status) {
1776     //
1777     // Reset for the input text
1778     //
1779     regexp->fMatcher->reset();
1780     UText *inputText = regexp->fMatcher->fInputText;
1781     int64_t   nextOutputStringStart = 0;
1782     int64_t   inputLen = regexp->fMatcher->fInputLength;
1783     if (inputLen == 0) {
1784         return 0;
1785     }
1786 
1787     //
1788     // Loop through the input text, searching for the delimiter pattern
1789     //
1790     int32_t   i;             // Index of the field being processed.
1791     int32_t   destIdx = 0;   // Next available position in destBuf;
1792     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
1793     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
1794     for (i=0; ; i++) {
1795         if (i>=destFieldsCapacity-1) {
1796             // There are one or zero output strings left.
1797             // Fill the last output string with whatever is left from the input, then exit the loop.
1798             //  ( i will be == destFieldsCapacity if we filled the output array while processing
1799             //    capture groups of the delimiter expression, in which case we will discard the
1800             //    last capture group saved in favor of the unprocessed remainder of the
1801             //    input string.)
1802             if (inputLen > nextOutputStringStart) {
1803                 if (i != destFieldsCapacity-1) {
1804                     // No fields are left.  Recycle the last one for holding the trailing part of
1805                     //   the input string.
1806                     i = destFieldsCapacity-1;
1807                     destIdx = (int32_t)(destFields[i] - destFields[0]);
1808                 }
1809 
1810                 destFields[i] = &destBuf[destIdx];
1811                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1812                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1813             }
1814             break;
1815         }
1816 
1817         if (regexp->fMatcher->find()) {
1818             // We found another delimiter.  Move everything from where we started looking
1819             //  up until the start of the delimiter into the next output string.
1820             destFields[i] = &destBuf[destIdx];
1821 
1822             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1823                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1824             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1825                 tStatus = U_ZERO_ERROR;
1826             } else {
1827                 *status = tStatus;
1828             }
1829             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
1830 
1831             // If the delimiter pattern has capturing parentheses, the captured
1832             //  text goes out into the next n destination strings.
1833             int32_t groupNum;
1834             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1835                 // If we've run out of output string slots, bail out.
1836                 if (i==destFieldsCapacity-1) {
1837                     break;
1838                 }
1839                 i++;
1840 
1841                 // Set up to extract the capture group contents into the dest buffer.
1842                 destFields[i] = &destBuf[destIdx];
1843                 tStatus = U_ZERO_ERROR;
1844                 int32_t t = uregex_group((URegularExpression*)regexp,
1845                                          groupNum,
1846                                          destFields[i],
1847                                          REMAINING_CAPACITY(destIdx, destCapacity),
1848                                          &tStatus);
1849                 destIdx += t + 1;    // Record the space used in the output string buffer.
1850                                      //  +1 for the NUL that terminates the string.
1851                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1852                     tStatus = U_ZERO_ERROR;
1853                 } else {
1854                     *status = tStatus;
1855                 }
1856             }
1857 
1858             if (nextOutputStringStart == inputLen) {
1859                 // The delimiter was at the end of the string.
1860                 // Output an empty string, and then we are done.
1861                 if (destIdx < destCapacity) {
1862                     destBuf[destIdx] = 0;
1863                 }
1864                 if (i < destFieldsCapacity-1) {
1865                    ++i;
1866                 }
1867                 if (destIdx < destCapacity) {
1868                     destFields[i] = destBuf + destIdx;
1869                 }
1870                 ++destIdx;
1871                 break;
1872             }
1873 
1874         }
1875         else
1876         {
1877             // We ran off the end of the input while looking for the next delimiter.
1878             // All the remaining text goes into the current output string.
1879             destFields[i] = &destBuf[destIdx];
1880             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1881                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1882             break;
1883         }
1884     }
1885 
1886     // Zero out any unused portion of the destFields array
1887     int j;
1888     for (j=i+1; j<destFieldsCapacity; j++) {
1889         destFields[j] = NULL;
1890     }
1891 
1892     if (requiredCapacity != NULL) {
1893         *requiredCapacity = destIdx;
1894     }
1895     if (destIdx > destCapacity) {
1896         *status = U_BUFFER_OVERFLOW_ERROR;
1897     }
1898     return i+1;
1899 }
1900 
1901 //
1902 //   uregex_split   The actual API function
1903 //
1904 U_CAPI int32_t U_EXPORT2
uregex_split(URegularExpression * regexp2,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1905 uregex_split(URegularExpression      *regexp2,
1906              UChar                   *destBuf,
1907              int32_t                  destCapacity,
1908              int32_t                 *requiredCapacity,
1909              UChar                   *destFields[],
1910              int32_t                  destFieldsCapacity,
1911              UErrorCode              *status) {
1912     RegularExpression *regexp = (RegularExpression*)regexp2;
1913     if (validateRE(regexp, TRUE, status) == FALSE) {
1914         return 0;
1915     }
1916     if ((destBuf == NULL && destCapacity > 0) ||
1917         destCapacity < 0 ||
1918         destFields == NULL ||
1919         destFieldsCapacity < 1 ) {
1920         *status = U_ILLEGAL_ARGUMENT_ERROR;
1921         return 0;
1922     }
1923 
1924     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1925 }
1926 
1927 
1928 //
1929 //   uregex_splitUText...can just use the normal C++ method
1930 //
1931 U_CAPI int32_t U_EXPORT2
uregex_splitUText(URegularExpression * regexp2,UText * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1932 uregex_splitUText(URegularExpression    *regexp2,
1933                   UText                 *destFields[],
1934                   int32_t                destFieldsCapacity,
1935                   UErrorCode            *status) {
1936     RegularExpression *regexp = (RegularExpression*)regexp2;
1937     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1938 }
1939 
1940 
1941 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
1942 
1943