• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2004-2010, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  regex.cpp
7 */
8 
9 #include "unicode/utypes.h"
10 
11 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
12 
13 #include "unicode/regex.h"
14 #include "unicode/uregex.h"
15 #include "unicode/unistr.h"
16 #include "unicode/ustring.h"
17 #include "unicode/uchar.h"
18 #include "unicode/uobject.h"
19 #include "umutex.h"
20 #include "uassert.h"
21 #include "cmemory.h"
22 
23 #include "regextxt.h"
24 
25 #include <stdio.h>
26 
27 U_NAMESPACE_BEGIN
28 
29 #define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
30 
31 struct RegularExpression: public UMemory {
32 public:
33     RegularExpression();
34     ~RegularExpression();
35     int32_t           fMagic;
36     RegexPattern     *fPat;
37     int32_t          *fPatRefCount;
38     UChar            *fPatString;
39     int32_t           fPatStringLen;
40     RegexMatcher     *fMatcher;
41     const UChar      *fText;         // Text from setText()
42     int32_t           fTextLength;   // Length provided by user with setText(), which
43                                      //  may be -1.
44     UBool             fOwnsText;
45 };
46 
47 static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
48 
RegularExpression()49 RegularExpression::RegularExpression() {
50     fMagic        = REXP_MAGIC;
51     fPat          = NULL;
52     fPatRefCount  = NULL;
53     fPatString    = NULL;
54     fPatStringLen = 0;
55     fMatcher      = NULL;
56     fText         = NULL;
57     fTextLength   = 0;
58     fOwnsText     = FALSE;
59 }
60 
~RegularExpression()61 RegularExpression::~RegularExpression() {
62     delete fMatcher;
63     fMatcher = NULL;
64     if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
65         delete fPat;
66         uprv_free(fPatString);
67         uprv_free(fPatRefCount);
68     }
69     if (fOwnsText && fText!=NULL) {
70         uprv_free((void *)fText);
71     }
72     fMagic = 0;
73 }
74 
75 U_NAMESPACE_END
76 
77 U_NAMESPACE_USE
78 
79 //----------------------------------------------------------------------------------------
80 //
81 //   validateRE    Do boilerplate style checks on API function parameters.
82 //                 Return TRUE if they look OK.
83 //----------------------------------------------------------------------------------------
validateRE(const RegularExpression * re,UErrorCode * status,UBool requiresText=TRUE)84 static UBool validateRE(const RegularExpression *re, UErrorCode *status, UBool requiresText = TRUE) {
85     if (U_FAILURE(*status)) {
86         return FALSE;
87     }
88     if (re == NULL || re->fMagic != REXP_MAGIC) {
89         *status = U_ILLEGAL_ARGUMENT_ERROR;
90         return FALSE;
91     }
92     // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
93     if (requiresText && re->fText == NULL && !re->fOwnsText) {
94         *status = U_REGEX_INVALID_STATE;
95         return FALSE;
96     }
97     return TRUE;
98 }
99 
100 //----------------------------------------------------------------------------------------
101 //
102 //    uregex_open
103 //
104 //----------------------------------------------------------------------------------------
105 U_CAPI URegularExpression *  U_EXPORT2
uregex_open(const UChar * pattern,int32_t patternLength,uint32_t flags,UParseError * pe,UErrorCode * status)106 uregex_open( const  UChar          *pattern,
107                     int32_t         patternLength,
108                     uint32_t        flags,
109                     UParseError    *pe,
110                     UErrorCode     *status) {
111 
112     if (U_FAILURE(*status)) {
113         return NULL;
114     }
115     if (pattern == NULL || patternLength < -1 || patternLength == 0) {
116         *status = U_ILLEGAL_ARGUMENT_ERROR;
117         return NULL;
118     }
119     int32_t actualPatLen = patternLength;
120     if (actualPatLen == -1) {
121         actualPatLen = u_strlen(pattern);
122     }
123 
124     RegularExpression *re     = new RegularExpression;
125     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
126     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
127     if (re == NULL || refC == NULL || patBuf == NULL) {
128         *status = U_MEMORY_ALLOCATION_ERROR;
129         delete re;
130         uprv_free(refC);
131         uprv_free(patBuf);
132         return NULL;
133     }
134     re->fPatRefCount = refC;
135     *re->fPatRefCount = 1;
136 
137     //
138     // Make a copy of the pattern string, so we can return it later if asked.
139     //    For compiling the pattern, we will use a UText wrapper around
140     //    this local copy, to avoid making even more copies.
141     //
142     re->fPatString    = patBuf;
143     re->fPatStringLen = patternLength;
144     u_memcpy(patBuf, pattern, actualPatLen);
145     patBuf[actualPatLen] = 0;
146 
147     UText patText = UTEXT_INITIALIZER;
148     utext_openUChars(&patText, patBuf, patternLength, status);
149 
150     //
151     // Compile the pattern
152     //
153     if (pe != NULL) {
154         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
155     } else {
156         re->fPat = RegexPattern::compile(&patText, flags, *status);
157     }
158     utext_close(&patText);
159 
160     if (U_FAILURE(*status)) {
161         goto ErrorExit;
162     }
163 
164     //
165     // Create the matcher object
166     //
167     re->fMatcher = re->fPat->matcher(*status);
168     if (U_SUCCESS(*status)) {
169         return (URegularExpression*)re;
170     }
171 
172 ErrorExit:
173     delete re;
174     return NULL;
175 
176 }
177 
178 //----------------------------------------------------------------------------------------
179 //
180 //    uregex_openUText
181 //
182 //----------------------------------------------------------------------------------------
183 U_CAPI URegularExpression *  U_EXPORT2
uregex_openUText(UText * pattern,uint32_t flags,UParseError * pe,UErrorCode * status)184 uregex_openUText(UText          *pattern,
185                  uint32_t        flags,
186                  UParseError    *pe,
187                  UErrorCode     *status) {
188 
189     if (U_FAILURE(*status)) {
190         return NULL;
191     }
192     if (pattern == NULL) {
193         *status = U_ILLEGAL_ARGUMENT_ERROR;
194         return NULL;
195     }
196 
197     int64_t patternNativeLength = utext_nativeLength(pattern);
198 
199     if (patternNativeLength == 0) {
200         *status = U_ILLEGAL_ARGUMENT_ERROR;
201         return NULL;
202     }
203 
204     RegularExpression *re     = new RegularExpression;
205 
206     UErrorCode lengthStatus = U_ZERO_ERROR;
207     int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
208 
209     int32_t            *refC   = (int32_t *)uprv_malloc(sizeof(int32_t));
210     UChar              *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
211     if (re == NULL || refC == NULL || patBuf == NULL) {
212         *status = U_MEMORY_ALLOCATION_ERROR;
213         delete re;
214         uprv_free(refC);
215         uprv_free(patBuf);
216         return NULL;
217     }
218     re->fPatRefCount = refC;
219     *re->fPatRefCount = 1;
220 
221     //
222     // Make a copy of the pattern string, so we can return it later if asked.
223     //    For compiling the pattern, we will use a read-only UText wrapper
224     //    around this local copy, to avoid making even more copies.
225     //
226     re->fPatString    = patBuf;
227     re->fPatStringLen = pattern16Length;
228     utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
229 
230     UText patText = UTEXT_INITIALIZER;
231     utext_openUChars(&patText, patBuf, pattern16Length, status);
232 
233     //
234     // Compile the pattern
235     //
236     if (pe != NULL) {
237         re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
238     } else {
239         re->fPat = RegexPattern::compile(&patText, flags, *status);
240     }
241     utext_close(&patText);
242 
243     if (U_FAILURE(*status)) {
244         goto ErrorExit;
245     }
246 
247     //
248     // Create the matcher object
249     //
250     re->fMatcher = re->fPat->matcher(*status);
251     if (U_SUCCESS(*status)) {
252         return (URegularExpression*)re;
253     }
254 
255 ErrorExit:
256     delete re;
257     return NULL;
258 
259 }
260 
261 //----------------------------------------------------------------------------------------
262 //
263 //    uregex_close
264 //
265 //----------------------------------------------------------------------------------------
266 U_CAPI void  U_EXPORT2
uregex_close(URegularExpression * re2)267 uregex_close(URegularExpression  *re2) {
268     RegularExpression *re = (RegularExpression*)re2;
269     UErrorCode  status = U_ZERO_ERROR;
270     if (validateRE(re, &status, FALSE) == FALSE) {
271         return;
272     }
273     delete re;
274 }
275 
276 
277 //----------------------------------------------------------------------------------------
278 //
279 //    uregex_clone
280 //
281 //----------------------------------------------------------------------------------------
282 U_CAPI URegularExpression * U_EXPORT2
uregex_clone(const URegularExpression * source2,UErrorCode * status)283 uregex_clone(const URegularExpression *source2, UErrorCode *status)  {
284     RegularExpression *source = (RegularExpression*)source2;
285     if (validateRE(source, status, FALSE) == FALSE) {
286         return NULL;
287     }
288 
289     RegularExpression *clone = new RegularExpression;
290     if (clone == NULL) {
291         *status = U_MEMORY_ALLOCATION_ERROR;
292         return NULL;
293     }
294 
295     clone->fMatcher = source->fPat->matcher(*status);
296     if (U_FAILURE(*status)) {
297         delete clone;
298         return NULL;
299     }
300 
301     clone->fPat          = source->fPat;
302     clone->fPatRefCount  = source->fPatRefCount;
303     clone->fPatString    = source->fPatString;
304     clone->fPatStringLen = source->fPatStringLen;
305     umtx_atomic_inc(source->fPatRefCount);
306     // Note:  fText is not cloned.
307 
308     return (URegularExpression*)clone;
309 }
310 
311 
312 
313 
314 //------------------------------------------------------------------------------
315 //
316 //    uregex_pattern
317 //
318 //------------------------------------------------------------------------------
319 U_CAPI const UChar * U_EXPORT2
uregex_pattern(const URegularExpression * regexp2,int32_t * patLength,UErrorCode * status)320 uregex_pattern(const  URegularExpression *regexp2,
321                       int32_t            *patLength,
322                       UErrorCode         *status)  {
323     RegularExpression *regexp = (RegularExpression*)regexp2;
324 
325     if (validateRE(regexp, status, FALSE) == FALSE) {
326         return NULL;
327     }
328     if (patLength != NULL) {
329         *patLength = regexp->fPatStringLen;
330     }
331     return regexp->fPatString;
332 }
333 
334 
335 //------------------------------------------------------------------------------
336 //
337 //    uregex_patternUText
338 //
339 //------------------------------------------------------------------------------
340 U_CAPI UText * U_EXPORT2
uregex_patternUText(const URegularExpression * regexp2,UErrorCode * status)341 uregex_patternUText(const URegularExpression *regexp2,
342                           UErrorCode         *status)  {
343     RegularExpression *regexp = (RegularExpression*)regexp2;
344     return regexp->fPat->patternText(*status);
345 }
346 
347 
348 //------------------------------------------------------------------------------
349 //
350 //    uregex_flags
351 //
352 //------------------------------------------------------------------------------
353 U_CAPI int32_t U_EXPORT2
uregex_flags(const URegularExpression * regexp2,UErrorCode * status)354 uregex_flags(const URegularExpression *regexp2, UErrorCode *status)  {
355     RegularExpression *regexp = (RegularExpression*)regexp2;
356     if (validateRE(regexp, status, FALSE) == FALSE) {
357         return 0;
358     }
359     int32_t flags = regexp->fPat->flags();
360     return flags;
361 }
362 
363 
364 //------------------------------------------------------------------------------
365 //
366 //    uregex_setText
367 //
368 //------------------------------------------------------------------------------
369 U_CAPI void U_EXPORT2
uregex_setText(URegularExpression * regexp2,const UChar * text,int32_t textLength,UErrorCode * status)370 uregex_setText(URegularExpression *regexp2,
371                const UChar        *text,
372                int32_t             textLength,
373                UErrorCode         *status)  {
374     RegularExpression *regexp = (RegularExpression*)regexp2;
375     if (validateRE(regexp, status, FALSE) == FALSE) {
376         return;
377     }
378     if (text == NULL || textLength < -1) {
379         *status = U_ILLEGAL_ARGUMENT_ERROR;
380         return;
381     }
382 
383     if (regexp->fOwnsText && regexp->fText != NULL) {
384         uprv_free((void *)regexp->fText);
385     }
386 
387     regexp->fText       = text;
388     regexp->fTextLength = textLength;
389     regexp->fOwnsText   = FALSE;
390 
391     UText input = UTEXT_INITIALIZER;
392     utext_openUChars(&input, text, textLength, status);
393     regexp->fMatcher->reset(&input);
394     utext_close(&input); // reset() made a shallow clone, so we don't need this copy
395 }
396 
397 
398 //------------------------------------------------------------------------------
399 //
400 //    uregex_setUText
401 //
402 //------------------------------------------------------------------------------
403 U_CAPI void U_EXPORT2
uregex_setUText(URegularExpression * regexp2,UText * text,UErrorCode * status)404 uregex_setUText(URegularExpression *regexp2,
405                 UText              *text,
406                 UErrorCode         *status) {
407     RegularExpression *regexp = (RegularExpression*)regexp2;
408     if (validateRE(regexp, status, FALSE) == FALSE) {
409         return;
410     }
411     if (text == NULL) {
412         *status = U_ILLEGAL_ARGUMENT_ERROR;
413         return;
414     }
415 
416     if (regexp->fOwnsText && regexp->fText != NULL) {
417         uprv_free((void *)regexp->fText);
418     }
419 
420     regexp->fText       = NULL; // only fill it in on request
421     regexp->fTextLength = -1;
422     regexp->fOwnsText   = TRUE;
423     regexp->fMatcher->reset(text);
424 }
425 
426 
427 
428 //------------------------------------------------------------------------------
429 //
430 //    uregex_getText
431 //
432 //------------------------------------------------------------------------------
433 U_CAPI const UChar * U_EXPORT2
uregex_getText(URegularExpression * regexp2,int32_t * textLength,UErrorCode * status)434 uregex_getText(URegularExpression *regexp2,
435                int32_t            *textLength,
436                UErrorCode         *status)  {
437     RegularExpression *regexp = (RegularExpression*)regexp2;
438     if (validateRE(regexp, status, FALSE) == FALSE) {
439         return NULL;
440     }
441 
442     if (regexp->fText == NULL) {
443         // need to fill in the text
444         UText *inputText = regexp->fMatcher->inputText();
445         int64_t inputNativeLength = utext_nativeLength(inputText);
446         if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
447             regexp->fText = inputText->chunkContents;
448             regexp->fTextLength = (int32_t)inputNativeLength;
449             regexp->fOwnsText = FALSE; // because the UText owns it
450         } else {
451             UErrorCode lengthStatus = U_ZERO_ERROR;
452             regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
453             UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
454 
455             utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
456             regexp->fText = inputChars;
457             regexp->fOwnsText = TRUE; // should already be set but just in case
458         }
459     }
460 
461     if (textLength != NULL) {
462         *textLength = regexp->fTextLength;
463     }
464     return regexp->fText;
465 }
466 
467 
468 //------------------------------------------------------------------------------
469 //
470 //    uregex_getUText
471 //
472 //------------------------------------------------------------------------------
473 U_CAPI UText * U_EXPORT2
uregex_getUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)474 uregex_getUText(URegularExpression *regexp2,
475                 UText              *dest,
476                 UErrorCode         *status)  {
477     RegularExpression *regexp = (RegularExpression*)regexp2;
478     if (validateRE(regexp, status, FALSE) == FALSE) {
479         return dest;
480     }
481     return regexp->fMatcher->getInput(dest, *status);
482 }
483 
484 // BEGIN android-added
485 // Removed this function after Android upgrade to ICU4.8.
486 //------------------------------------------------------------------------------
487 //
488 //    uregex_refreshUText
489 //
490 //------------------------------------------------------------------------------
491 U_CAPI void U_EXPORT2
uregex_refreshUText(URegularExpression * regexp2,UText * text,UErrorCode * status)492 uregex_refreshUText(URegularExpression *regexp2,
493                     UText              *text,
494                     UErrorCode         *status) {
495     RegularExpression *regexp = (RegularExpression*)regexp2;
496     if (validateRE(regexp, status, FALSE) == FALSE) {
497         return;
498     }
499     regexp->fMatcher->refreshInputText(text, *status);
500 }
501 // END android-added
502 
503 //------------------------------------------------------------------------------
504 //
505 //    uregex_matches
506 //
507 //------------------------------------------------------------------------------
508 U_CAPI UBool U_EXPORT2
uregex_matches(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)509 uregex_matches(URegularExpression *regexp2,
510                int32_t            startIndex,
511                UErrorCode        *status)  {
512     return uregex_matches64( regexp2, (int64_t)startIndex, status);
513 }
514 
515 U_CAPI UBool U_EXPORT2
uregex_matches64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)516 uregex_matches64(URegularExpression *regexp2,
517                  int64_t            startIndex,
518                  UErrorCode        *status)  {
519     RegularExpression *regexp = (RegularExpression*)regexp2;
520     UBool result = FALSE;
521     if (validateRE(regexp, status) == FALSE) {
522         return result;
523     }
524     if (startIndex == -1) {
525         result = regexp->fMatcher->matches(*status);
526     } else {
527         result = regexp->fMatcher->matches(startIndex, *status);
528     }
529     return result;
530 }
531 
532 
533 //------------------------------------------------------------------------------
534 //
535 //    uregex_lookingAt
536 //
537 //------------------------------------------------------------------------------
538 U_CAPI UBool U_EXPORT2
uregex_lookingAt(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)539 uregex_lookingAt(URegularExpression *regexp2,
540                  int32_t             startIndex,
541                  UErrorCode         *status)  {
542     return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
543 }
544 
545 U_CAPI UBool U_EXPORT2
uregex_lookingAt64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)546 uregex_lookingAt64(URegularExpression *regexp2,
547                    int64_t             startIndex,
548                    UErrorCode         *status)  {
549     RegularExpression *regexp = (RegularExpression*)regexp2;
550     UBool result = FALSE;
551     if (validateRE(regexp, status) == FALSE) {
552         return result;
553     }
554     if (startIndex == -1) {
555         result = regexp->fMatcher->lookingAt(*status);
556     } else {
557         result = regexp->fMatcher->lookingAt(startIndex, *status);
558     }
559     return result;
560 }
561 
562 
563 
564 //------------------------------------------------------------------------------
565 //
566 //    uregex_find
567 //
568 //------------------------------------------------------------------------------
569 U_CAPI UBool U_EXPORT2
uregex_find(URegularExpression * regexp2,int32_t startIndex,UErrorCode * status)570 uregex_find(URegularExpression *regexp2,
571             int32_t             startIndex,
572             UErrorCode         *status)  {
573     return uregex_find64( regexp2, (int64_t)startIndex, status);
574 }
575 
576 U_CAPI UBool U_EXPORT2
uregex_find64(URegularExpression * regexp2,int64_t startIndex,UErrorCode * status)577 uregex_find64(URegularExpression *regexp2,
578               int64_t             startIndex,
579               UErrorCode         *status)  {
580     RegularExpression *regexp = (RegularExpression*)regexp2;
581     UBool result = FALSE;
582     if (validateRE(regexp, status) == FALSE) {
583         return result;
584     }
585     if (startIndex == -1) {
586         regexp->fMatcher->resetPreserveRegion();
587         result = regexp->fMatcher->find();
588     } else {
589         result = regexp->fMatcher->find(startIndex, *status);
590     }
591     return result;
592 }
593 
594 
595 //------------------------------------------------------------------------------
596 //
597 //    uregex_findNext
598 //
599 //------------------------------------------------------------------------------
600 U_CAPI UBool U_EXPORT2
uregex_findNext(URegularExpression * regexp2,UErrorCode * status)601 uregex_findNext(URegularExpression *regexp2,
602                 UErrorCode         *status)  {
603     RegularExpression *regexp = (RegularExpression*)regexp2;
604     if (validateRE(regexp, status) == FALSE) {
605         return FALSE;
606     }
607     UBool result = regexp->fMatcher->find();
608     return result;
609 }
610 
611 //------------------------------------------------------------------------------
612 //
613 //    uregex_groupCount
614 //
615 //------------------------------------------------------------------------------
616 U_CAPI int32_t U_EXPORT2
uregex_groupCount(URegularExpression * regexp2,UErrorCode * status)617 uregex_groupCount(URegularExpression *regexp2,
618                   UErrorCode         *status)  {
619     RegularExpression *regexp = (RegularExpression*)regexp2;
620     if (validateRE(regexp, status, FALSE) == FALSE) {
621         return 0;
622     }
623     int32_t  result = regexp->fMatcher->groupCount();
624     return result;
625 }
626 
627 
628 //------------------------------------------------------------------------------
629 //
630 //    uregex_group
631 //
632 //------------------------------------------------------------------------------
633 U_CAPI int32_t U_EXPORT2
uregex_group(URegularExpression * regexp2,int32_t groupNum,UChar * dest,int32_t destCapacity,UErrorCode * status)634 uregex_group(URegularExpression *regexp2,
635              int32_t             groupNum,
636              UChar              *dest,
637              int32_t             destCapacity,
638              UErrorCode          *status)  {
639     RegularExpression *regexp = (RegularExpression*)regexp2;
640     if (validateRE(regexp, status) == FALSE) {
641         return 0;
642     }
643     if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
644         *status = U_ILLEGAL_ARGUMENT_ERROR;
645         return 0;
646     }
647 
648     if (destCapacity == 0 || regexp->fText != NULL) {
649         // If preflighting or if we already have the text as UChars,
650         // this is a little cheaper than going through uregex_groupUTextDeep()
651 
652         //
653         // Pick up the range of characters from the matcher
654         //
655         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
656         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
657         if (U_FAILURE(*status)) {
658             return 0;
659         }
660 
661         //
662         // Trim length based on buffer capacity
663         //
664         int32_t fullLength = endIx - startIx;
665         int32_t copyLength = fullLength;
666         if (copyLength < destCapacity) {
667             dest[copyLength] = 0;
668         } else if (copyLength == destCapacity) {
669             *status = U_STRING_NOT_TERMINATED_WARNING;
670         } else {
671             copyLength = destCapacity;
672             *status = U_BUFFER_OVERFLOW_ERROR;
673         }
674 
675         //
676         // Copy capture group to user's buffer
677         //
678         if (copyLength > 0) {
679             u_memcpy(dest, &regexp->fText[startIx], copyLength);
680         }
681         return fullLength;
682     } else {
683         UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
684         int32_t result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
685         utext_close(groupText);
686         return result;
687     }
688 }
689 
690 
691 //------------------------------------------------------------------------------
692 //
693 //    uregex_groupUText
694 //
695 //------------------------------------------------------------------------------
696 U_CAPI UText * U_EXPORT2
uregex_groupUText(URegularExpression * regexp2,int32_t groupNum,UText * dest,int64_t * groupLength,UErrorCode * status)697 uregex_groupUText(URegularExpression *regexp2,
698                   int32_t             groupNum,
699                   UText              *dest,
700                   int64_t            *groupLength,
701                   UErrorCode         *status)  {
702     RegularExpression *regexp = (RegularExpression*)regexp2;
703     if (validateRE(regexp, status) == FALSE) {
704         UErrorCode emptyTextStatus = U_ZERO_ERROR;
705         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
706     }
707 
708     return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
709 }
710 
711 //------------------------------------------------------------------------------
712 //
713 //    uregex_groupUTextDeep
714 //
715 //------------------------------------------------------------------------------
716 U_CAPI UText * U_EXPORT2
uregex_groupUTextDeep(URegularExpression * regexp2,int32_t groupNum,UText * dest,UErrorCode * status)717 uregex_groupUTextDeep(URegularExpression *regexp2,
718                   int32_t             groupNum,
719                   UText              *dest,
720                   UErrorCode         *status)  {
721     RegularExpression *regexp = (RegularExpression*)regexp2;
722     if (validateRE(regexp, status) == FALSE) {
723         UErrorCode emptyTextStatus = U_ZERO_ERROR;
724         return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
725     }
726 
727     if (regexp->fText != NULL) {
728         //
729         // Pick up the range of characters from the matcher
730         // and use our already-extracted characters
731         //
732         int32_t  startIx = regexp->fMatcher->start(groupNum, *status);
733         int32_t  endIx   = regexp->fMatcher->end  (groupNum, *status);
734         if (U_FAILURE(*status)) {
735             UErrorCode emptyTextStatus = U_ZERO_ERROR;
736             return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
737         }
738 
739         if (dest) {
740             utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
741         } else {
742             UText groupText = UTEXT_INITIALIZER;
743             utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
744             dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
745             utext_close(&groupText);
746         }
747 
748         return dest;
749     } else {
750         return regexp->fMatcher->group(groupNum, dest, *status);
751     }
752 }
753 
754 //------------------------------------------------------------------------------
755 //
756 //    uregex_start
757 //
758 //------------------------------------------------------------------------------
759 U_CAPI int32_t U_EXPORT2
uregex_start(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)760 uregex_start(URegularExpression *regexp2,
761              int32_t             groupNum,
762              UErrorCode          *status)  {
763     return (int32_t)uregex_start64( regexp2, groupNum, status);
764 }
765 
766 U_CAPI int64_t U_EXPORT2
uregex_start64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)767 uregex_start64(URegularExpression *regexp2,
768                int32_t             groupNum,
769                UErrorCode          *status)  {
770     RegularExpression *regexp = (RegularExpression*)regexp2;
771     if (validateRE(regexp, status) == FALSE) {
772         return 0;
773     }
774     int32_t result = regexp->fMatcher->start(groupNum, *status);
775     return result;
776 }
777 
778 //------------------------------------------------------------------------------
779 //
780 //    uregex_end
781 //
782 //------------------------------------------------------------------------------
783 U_CAPI int32_t U_EXPORT2
uregex_end(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)784 uregex_end(URegularExpression   *regexp2,
785            int32_t               groupNum,
786            UErrorCode           *status)  {
787     return (int32_t)uregex_end64( regexp2, groupNum, status);
788 }
789 
790 U_CAPI int64_t U_EXPORT2
uregex_end64(URegularExpression * regexp2,int32_t groupNum,UErrorCode * status)791 uregex_end64(URegularExpression   *regexp2,
792              int32_t               groupNum,
793              UErrorCode           *status)  {
794     RegularExpression *regexp = (RegularExpression*)regexp2;
795     if (validateRE(regexp, status) == FALSE) {
796         return 0;
797     }
798     int32_t result = regexp->fMatcher->end(groupNum, *status);
799     return result;
800 }
801 
802 //------------------------------------------------------------------------------
803 //
804 //    uregex_reset
805 //
806 //------------------------------------------------------------------------------
807 U_CAPI void U_EXPORT2
uregex_reset(URegularExpression * regexp2,int32_t index,UErrorCode * status)808 uregex_reset(URegularExpression    *regexp2,
809              int32_t               index,
810              UErrorCode            *status)  {
811     uregex_reset64( regexp2, (int64_t)index, status);
812 }
813 
814 U_CAPI void U_EXPORT2
uregex_reset64(URegularExpression * regexp2,int64_t index,UErrorCode * status)815 uregex_reset64(URegularExpression    *regexp2,
816                int64_t               index,
817                UErrorCode            *status)  {
818     RegularExpression *regexp = (RegularExpression*)regexp2;
819     if (validateRE(regexp, status) == FALSE) {
820         return;
821     }
822     regexp->fMatcher->reset(index, *status);
823 }
824 
825 
826 //------------------------------------------------------------------------------
827 //
828 //    uregex_setRegion
829 //
830 //------------------------------------------------------------------------------
831 U_CAPI void U_EXPORT2
uregex_setRegion(URegularExpression * regexp2,int32_t regionStart,int32_t regionLimit,UErrorCode * status)832 uregex_setRegion(URegularExpression   *regexp2,
833                  int32_t               regionStart,
834                  int32_t               regionLimit,
835                  UErrorCode           *status)  {
836     uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
837 }
838 
839 U_CAPI void U_EXPORT2
uregex_setRegion64(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,UErrorCode * status)840 uregex_setRegion64(URegularExpression   *regexp2,
841                    int64_t               regionStart,
842                    int64_t               regionLimit,
843                    UErrorCode           *status)  {
844     RegularExpression *regexp = (RegularExpression*)regexp2;
845     if (validateRE(regexp, status) == FALSE) {
846         return;
847     }
848     regexp->fMatcher->region(regionStart, regionLimit, *status);
849 }
850 
851 
852 //------------------------------------------------------------------------------
853 //
854 //    uregex_setRegionAndStart
855 //
856 //------------------------------------------------------------------------------
857 U_DRAFT void U_EXPORT2
uregex_setRegionAndStart(URegularExpression * regexp2,int64_t regionStart,int64_t regionLimit,int64_t startIndex,UErrorCode * status)858 uregex_setRegionAndStart(URegularExpression   *regexp2,
859                  int64_t               regionStart,
860                  int64_t               regionLimit,
861                  int64_t               startIndex,
862                  UErrorCode           *status)  {
863     RegularExpression *regexp = (RegularExpression*)regexp2;
864     if (validateRE(regexp, status) == FALSE) {
865         return;
866     }
867     regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
868 }
869 
870 //------------------------------------------------------------------------------
871 //
872 //    uregex_regionStart
873 //
874 //------------------------------------------------------------------------------
875 U_CAPI int32_t U_EXPORT2
uregex_regionStart(const URegularExpression * regexp2,UErrorCode * status)876 uregex_regionStart(const  URegularExpression   *regexp2,
877                           UErrorCode           *status)  {
878     return (int32_t)uregex_regionStart64(regexp2, status);
879 }
880 
881 U_CAPI int64_t U_EXPORT2
uregex_regionStart64(const URegularExpression * regexp2,UErrorCode * status)882 uregex_regionStart64(const  URegularExpression   *regexp2,
883                             UErrorCode           *status)  {
884     RegularExpression *regexp = (RegularExpression*)regexp2;
885     if (validateRE(regexp, status) == FALSE) {
886         return 0;
887     }
888     return regexp->fMatcher->regionStart();
889 }
890 
891 
892 //------------------------------------------------------------------------------
893 //
894 //    uregex_regionEnd
895 //
896 //------------------------------------------------------------------------------
897 U_CAPI int32_t U_EXPORT2
uregex_regionEnd(const URegularExpression * regexp2,UErrorCode * status)898 uregex_regionEnd(const  URegularExpression   *regexp2,
899                         UErrorCode           *status)  {
900     return (int32_t)uregex_regionEnd64(regexp2, status);
901 }
902 
903 U_CAPI int64_t U_EXPORT2
uregex_regionEnd64(const URegularExpression * regexp2,UErrorCode * status)904 uregex_regionEnd64(const  URegularExpression   *regexp2,
905                           UErrorCode           *status)  {
906     RegularExpression *regexp = (RegularExpression*)regexp2;
907     if (validateRE(regexp, status) == FALSE) {
908         return 0;
909     }
910     return regexp->fMatcher->regionEnd();
911 }
912 
913 
914 //------------------------------------------------------------------------------
915 //
916 //    uregex_hasTransparentBounds
917 //
918 //------------------------------------------------------------------------------
919 U_CAPI UBool U_EXPORT2
uregex_hasTransparentBounds(const URegularExpression * regexp2,UErrorCode * status)920 uregex_hasTransparentBounds(const  URegularExpression   *regexp2,
921                                    UErrorCode           *status)  {
922     RegularExpression *regexp = (RegularExpression*)regexp2;
923     if (validateRE(regexp, status) == FALSE) {
924         return FALSE;
925     }
926     return regexp->fMatcher->hasTransparentBounds();
927 }
928 
929 
930 //------------------------------------------------------------------------------
931 //
932 //    uregex_useTransparentBounds
933 //
934 //------------------------------------------------------------------------------
935 U_CAPI void U_EXPORT2
uregex_useTransparentBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)936 uregex_useTransparentBounds(URegularExpression    *regexp2,
937                             UBool                  b,
938                             UErrorCode            *status)  {
939     RegularExpression *regexp = (RegularExpression*)regexp2;
940     if (validateRE(regexp, status) == FALSE) {
941         return;
942     }
943     regexp->fMatcher->useTransparentBounds(b);
944 }
945 
946 
947 //------------------------------------------------------------------------------
948 //
949 //    uregex_hasAnchoringBounds
950 //
951 //------------------------------------------------------------------------------
952 U_CAPI UBool U_EXPORT2
uregex_hasAnchoringBounds(const URegularExpression * regexp2,UErrorCode * status)953 uregex_hasAnchoringBounds(const  URegularExpression   *regexp2,
954                                  UErrorCode           *status)  {
955     RegularExpression *regexp = (RegularExpression*)regexp2;
956     if (validateRE(regexp, status) == FALSE) {
957         return FALSE;
958     }
959     return regexp->fMatcher->hasAnchoringBounds();
960 }
961 
962 
963 //------------------------------------------------------------------------------
964 //
965 //    uregex_useAnchoringBounds
966 //
967 //------------------------------------------------------------------------------
968 U_CAPI void U_EXPORT2
uregex_useAnchoringBounds(URegularExpression * regexp2,UBool b,UErrorCode * status)969 uregex_useAnchoringBounds(URegularExpression    *regexp2,
970                           UBool                  b,
971                           UErrorCode            *status)  {
972     RegularExpression *regexp = (RegularExpression*)regexp2;
973     if (validateRE(regexp, status) == FALSE) {
974         return;
975     }
976     regexp->fMatcher->useAnchoringBounds(b);
977 }
978 
979 
980 //------------------------------------------------------------------------------
981 //
982 //    uregex_hitEnd
983 //
984 //------------------------------------------------------------------------------
985 U_CAPI UBool U_EXPORT2
uregex_hitEnd(const URegularExpression * regexp2,UErrorCode * status)986 uregex_hitEnd(const  URegularExpression   *regexp2,
987                      UErrorCode           *status)  {
988     RegularExpression *regexp = (RegularExpression*)regexp2;
989     if (validateRE(regexp, status) == FALSE) {
990         return FALSE;
991     }
992     return regexp->fMatcher->hitEnd();
993 }
994 
995 
996 //------------------------------------------------------------------------------
997 //
998 //    uregex_requireEnd
999 //
1000 //------------------------------------------------------------------------------
1001 U_CAPI UBool U_EXPORT2
uregex_requireEnd(const URegularExpression * regexp2,UErrorCode * status)1002 uregex_requireEnd(const  URegularExpression   *regexp2,
1003                          UErrorCode           *status)  {
1004     RegularExpression *regexp = (RegularExpression*)regexp2;
1005     if (validateRE(regexp, status) == FALSE) {
1006         return FALSE;
1007     }
1008     return regexp->fMatcher->requireEnd();
1009 }
1010 
1011 
1012 //------------------------------------------------------------------------------
1013 //
1014 //    uregex_setTimeLimit
1015 //
1016 //------------------------------------------------------------------------------
1017 U_CAPI void U_EXPORT2
uregex_setTimeLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1018 uregex_setTimeLimit(URegularExpression   *regexp2,
1019                     int32_t               limit,
1020                     UErrorCode           *status) {
1021     RegularExpression *regexp = (RegularExpression*)regexp2;
1022     if (validateRE(regexp, status)) {
1023         regexp->fMatcher->setTimeLimit(limit, *status);
1024     }
1025 }
1026 
1027 
1028 
1029 //------------------------------------------------------------------------------
1030 //
1031 //    uregex_getTimeLimit
1032 //
1033 //------------------------------------------------------------------------------
1034 U_CAPI int32_t U_EXPORT2
uregex_getTimeLimit(const URegularExpression * regexp2,UErrorCode * status)1035 uregex_getTimeLimit(const  URegularExpression   *regexp2,
1036                            UErrorCode           *status) {
1037     int32_t retVal = 0;
1038     RegularExpression *regexp = (RegularExpression*)regexp2;
1039     if (validateRE(regexp, status)) {
1040         retVal = regexp->fMatcher->getTimeLimit();
1041     }
1042     return retVal;
1043 }
1044 
1045 
1046 
1047 //------------------------------------------------------------------------------
1048 //
1049 //    uregex_setStackLimit
1050 //
1051 //------------------------------------------------------------------------------
1052 U_CAPI void U_EXPORT2
uregex_setStackLimit(URegularExpression * regexp2,int32_t limit,UErrorCode * status)1053 uregex_setStackLimit(URegularExpression   *regexp2,
1054                      int32_t               limit,
1055                      UErrorCode           *status) {
1056     RegularExpression *regexp = (RegularExpression*)regexp2;
1057     if (validateRE(regexp, status)) {
1058         regexp->fMatcher->setStackLimit(limit, *status);
1059     }
1060 }
1061 
1062 
1063 
1064 //------------------------------------------------------------------------------
1065 //
1066 //    uregex_getStackLimit
1067 //
1068 //------------------------------------------------------------------------------
1069 U_CAPI int32_t U_EXPORT2
uregex_getStackLimit(const URegularExpression * regexp2,UErrorCode * status)1070 uregex_getStackLimit(const  URegularExpression   *regexp2,
1071                             UErrorCode           *status) {
1072     int32_t retVal = 0;
1073     RegularExpression *regexp = (RegularExpression*)regexp2;
1074     if (validateRE(regexp, status)) {
1075         retVal = regexp->fMatcher->getStackLimit();
1076     }
1077     return retVal;
1078 }
1079 
1080 
1081 //------------------------------------------------------------------------------
1082 //
1083 //    uregex_setMatchCallback
1084 //
1085 //------------------------------------------------------------------------------
1086 U_CAPI void U_EXPORT2
uregex_setMatchCallback(URegularExpression * regexp2,URegexMatchCallback * callback,const void * context,UErrorCode * status)1087 uregex_setMatchCallback(URegularExpression      *regexp2,
1088                         URegexMatchCallback     *callback,
1089                         const void              *context,
1090                         UErrorCode              *status) {
1091     RegularExpression *regexp = (RegularExpression*)regexp2;
1092     if (validateRE(regexp, status)) {
1093         regexp->fMatcher->setMatchCallback(callback, context, *status);
1094     }
1095 }
1096 
1097 
1098 //------------------------------------------------------------------------------
1099 //
1100 //    uregex_getMatchCallback
1101 //
1102 //------------------------------------------------------------------------------
1103 U_CAPI void U_EXPORT2
uregex_getMatchCallback(const URegularExpression * regexp2,URegexMatchCallback ** callback,const void ** context,UErrorCode * status)1104 uregex_getMatchCallback(const URegularExpression    *regexp2,
1105                         URegexMatchCallback        **callback,
1106                         const void                 **context,
1107                         UErrorCode                  *status) {
1108     RegularExpression *regexp = (RegularExpression*)regexp2;
1109      if (validateRE(regexp, status)) {
1110          regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1111      }
1112 }
1113 
1114 
1115 //------------------------------------------------------------------------------
1116 //
1117 //    uregex_setMatchProgressCallback
1118 //
1119 //------------------------------------------------------------------------------
1120 U_CAPI void U_EXPORT2
uregex_setFindProgressCallback(URegularExpression * regexp2,URegexFindProgressCallback * callback,const void * context,UErrorCode * status)1121 uregex_setFindProgressCallback(URegularExpression              *regexp2,
1122                                 URegexFindProgressCallback      *callback,
1123                                 const void                      *context,
1124                                 UErrorCode                      *status) {
1125     RegularExpression *regexp = (RegularExpression*)regexp2;
1126     if (validateRE(regexp, status)) {
1127         regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1128     }
1129 }
1130 
1131 
1132 //------------------------------------------------------------------------------
1133 //
1134 //    uregex_getMatchCallback
1135 //
1136 //------------------------------------------------------------------------------
1137 U_CAPI void U_EXPORT2
uregex_getFindProgressCallback(const URegularExpression * regexp2,URegexFindProgressCallback ** callback,const void ** context,UErrorCode * status)1138 uregex_getFindProgressCallback(const URegularExpression          *regexp2,
1139                                 URegexFindProgressCallback        **callback,
1140                                 const void                        **context,
1141                                 UErrorCode                        *status) {
1142     RegularExpression *regexp = (RegularExpression*)regexp2;
1143      if (validateRE(regexp, status)) {
1144          regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1145      }
1146 }
1147 
1148 
1149 //------------------------------------------------------------------------------
1150 //
1151 //    uregex_replaceAll
1152 //
1153 //------------------------------------------------------------------------------
1154 U_CAPI int32_t U_EXPORT2
uregex_replaceAll(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1155 uregex_replaceAll(URegularExpression    *regexp2,
1156                   const UChar           *replacementText,
1157                   int32_t                replacementLength,
1158                   UChar                 *destBuf,
1159                   int32_t                destCapacity,
1160                   UErrorCode            *status)  {
1161     RegularExpression *regexp = (RegularExpression*)regexp2;
1162     if (validateRE(regexp, status) == FALSE) {
1163         return 0;
1164     }
1165     if (replacementText == NULL || replacementLength < -1 ||
1166         (destBuf == NULL && destCapacity > 0) ||
1167         destCapacity < 0) {
1168         *status = U_ILLEGAL_ARGUMENT_ERROR;
1169         return 0;
1170     }
1171 
1172     int32_t   len = 0;
1173 
1174     uregex_reset(regexp2, 0, status);
1175 
1176     // Note: Seperate error code variables for findNext() and appendReplacement()
1177     //       are used so that destination buffer overflow errors
1178     //       in appendReplacement won't stop findNext() from working.
1179     //       appendReplacement() and appendTail() special case incoming buffer
1180     //       overflow errors, continuing to return the correct length.
1181     UErrorCode  findStatus = *status;
1182     while (uregex_findNext(regexp2, &findStatus)) {
1183         len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
1184                                         &destBuf, &destCapacity, status);
1185     }
1186     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1187 
1188     if (U_FAILURE(findStatus)) {
1189         // If anything went wrong with the findNext(), make that error trump
1190         //   whatever may have happened with the append() operations.
1191         //   Errors in findNext() are not expected.
1192         *status = findStatus;
1193     }
1194 
1195     return len;
1196 }
1197 
1198 
1199 //------------------------------------------------------------------------------
1200 //
1201 //    uregex_replaceAllUText
1202 //
1203 //------------------------------------------------------------------------------
1204 U_CAPI UText * U_EXPORT2
uregex_replaceAllUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1205 uregex_replaceAllUText(URegularExpression    *regexp2,
1206                        UText                 *replacementText,
1207                        UText                 *dest,
1208                        UErrorCode            *status)  {
1209     RegularExpression *regexp = (RegularExpression*)regexp2;
1210     if (validateRE(regexp, status) == FALSE) {
1211         return 0;
1212     }
1213     if (replacementText == NULL) {
1214         *status = U_ILLEGAL_ARGUMENT_ERROR;
1215         return 0;
1216     }
1217 
1218     dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1219     return dest;
1220 }
1221 
1222 
1223 //------------------------------------------------------------------------------
1224 //
1225 //    uregex_replaceFirst
1226 //
1227 //------------------------------------------------------------------------------
1228 U_CAPI int32_t U_EXPORT2
uregex_replaceFirst(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar * destBuf,int32_t destCapacity,UErrorCode * status)1229 uregex_replaceFirst(URegularExpression  *regexp2,
1230                     const UChar         *replacementText,
1231                     int32_t              replacementLength,
1232                     UChar               *destBuf,
1233                     int32_t              destCapacity,
1234                     UErrorCode          *status)  {
1235     RegularExpression *regexp = (RegularExpression*)regexp2;
1236     if (validateRE(regexp, status) == FALSE) {
1237         return 0;
1238     }
1239     if (replacementText == NULL || replacementLength < -1 ||
1240         (destBuf == NULL && destCapacity > 0) ||
1241         destCapacity < 0) {
1242         *status = U_ILLEGAL_ARGUMENT_ERROR;
1243         return 0;
1244     }
1245 
1246     int32_t   len = 0;
1247     UBool     findSucceeded;
1248     uregex_reset(regexp2, 0, status);
1249     findSucceeded = uregex_find(regexp2, 0, status);
1250     if (findSucceeded) {
1251         len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
1252                                        &destBuf, &destCapacity, status);
1253     }
1254     len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1255 
1256     return len;
1257 }
1258 
1259 
1260 //------------------------------------------------------------------------------
1261 //
1262 //    uregex_replaceFirstUText
1263 //
1264 //------------------------------------------------------------------------------
1265 U_CAPI UText * U_EXPORT2
uregex_replaceFirstUText(URegularExpression * regexp2,UText * replacementText,UText * dest,UErrorCode * status)1266 uregex_replaceFirstUText(URegularExpression  *regexp2,
1267                          UText                 *replacementText,
1268                          UText                 *dest,
1269                          UErrorCode            *status)  {
1270     RegularExpression *regexp = (RegularExpression*)regexp2;
1271     if (validateRE(regexp, status) == FALSE) {
1272         return 0;
1273     }
1274     if (replacementText == NULL) {
1275         *status = U_ILLEGAL_ARGUMENT_ERROR;
1276         return 0;
1277     }
1278 
1279     dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1280     return dest;
1281 }
1282 
1283 
1284 //------------------------------------------------------------------------------
1285 //
1286 //    uregex_appendReplacement
1287 //
1288 //------------------------------------------------------------------------------
1289 
1290 U_NAMESPACE_BEGIN
1291 //
1292 //  Dummy class, because these functions need to be friends of class RegexMatcher,
1293 //               and stand-alone C functions don't work as friends
1294 //
1295 class RegexCImpl {
1296  public:
1297    inline static  int32_t appendReplacement(RegularExpression    *regexp,
1298                       const UChar           *replacementText,
1299                       int32_t                replacementLength,
1300                       UChar                **destBuf,
1301                       int32_t               *destCapacity,
1302                       UErrorCode            *status);
1303 
1304    inline static int32_t appendTail(RegularExpression    *regexp,
1305         UChar                **destBuf,
1306         int32_t               *destCapacity,
1307         UErrorCode            *status);
1308 
1309     inline static int32_t split(RegularExpression    *regexp,
1310         UChar                 *destBuf,
1311         int32_t                destCapacity,
1312         int32_t               *requiredCapacity,
1313         UChar                 *destFields[],
1314         int32_t                destFieldsCapacity,
1315         UErrorCode            *status);
1316 };
1317 
1318 U_NAMESPACE_END
1319 
1320 
1321 
1322 static const UChar BACKSLASH  = 0x5c;
1323 static const UChar DOLLARSIGN = 0x24;
1324 
1325 //
1326 //  Move a character to an output buffer, with bounds checking on the index.
1327 //      Index advances even if capacity is exceeded, for preflight size computations.
1328 //      This little sequence is used a LOT.
1329 //
appendToBuf(UChar c,int32_t * idx,UChar * buf,int32_t bufCapacity)1330 static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
1331     if (*idx < bufCapacity) {
1332         buf[*idx] = c;
1333     }
1334     (*idx)++;
1335 }
1336 
1337 
1338 //
1339 //  appendReplacement, the actual implementation.
1340 //
appendReplacement(RegularExpression * regexp,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1341 int32_t RegexCImpl::appendReplacement(RegularExpression    *regexp,
1342                                       const UChar           *replacementText,
1343                                       int32_t                replacementLength,
1344                                       UChar                **destBuf,
1345                                       int32_t               *destCapacity,
1346                                       UErrorCode            *status)  {
1347 
1348     // If we come in with a buffer overflow error, don't suppress the operation.
1349     //  A series of appendReplacements, appendTail need to correctly preflight
1350     //  the buffer size when an overflow happens somewhere in the middle.
1351     UBool pendingBufferOverflow = FALSE;
1352     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1353         pendingBufferOverflow = TRUE;
1354         *status = U_ZERO_ERROR;
1355     }
1356 
1357     //
1358     // Validate all paramters
1359     //
1360     if (validateRE(regexp, status) == FALSE) {
1361         return 0;
1362     }
1363     if (replacementText == NULL || replacementLength < -1 ||
1364         destCapacity == NULL || destBuf == NULL ||
1365         (*destBuf == NULL && *destCapacity > 0) ||
1366         *destCapacity < 0) {
1367         *status = U_ILLEGAL_ARGUMENT_ERROR;
1368         return 0;
1369     }
1370 
1371     RegexMatcher *m = regexp->fMatcher;
1372     if (m->fMatch == FALSE) {
1373         *status = U_REGEX_INVALID_STATE;
1374         return 0;
1375     }
1376 
1377     UChar    *dest             = *destBuf;
1378     int32_t   capacity         = *destCapacity;
1379     int32_t   destIdx          =  0;
1380     int32_t   i;
1381 
1382     // If it wasn't supplied by the caller,  get the length of the replacement text.
1383     //   TODO:  slightly smarter logic in the copy loop could watch for the NUL on
1384     //          the fly and avoid this step.
1385     if (replacementLength == -1) {
1386         replacementLength = u_strlen(replacementText);
1387     }
1388 
1389     // Copy input string from the end of previous match to start of current match
1390     if (regexp->fText != NULL) {
1391         int32_t matchStart;
1392         int32_t lastMatchEnd;
1393         if (UTEXT_USES_U16(m->fInputText)) {
1394             lastMatchEnd = (int32_t)m->fLastMatchEnd;
1395             matchStart = (int32_t)m->fMatchStart;
1396         } else {
1397             // !!!: Would like a better way to do this!
1398             UErrorCode status = U_ZERO_ERROR;
1399             lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
1400             status = U_ZERO_ERROR;
1401             matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
1402         }
1403         for (i=lastMatchEnd; i<matchStart; i++) {
1404             appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
1405         }
1406     } else {
1407         UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1408         destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
1409                                  &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), &possibleOverflowError);
1410     }
1411 
1412 
1413     // scan the replacement text, looking for substitutions ($n) and \escapes.
1414     int32_t  replIdx = 0;
1415     while (replIdx < replacementLength) {
1416         UChar  c = replacementText[replIdx];
1417         replIdx++;
1418         if (c != DOLLARSIGN && c != BACKSLASH) {
1419             // Common case, no substitution, no escaping,
1420             //  just copy the char to the dest buf.
1421             appendToBuf(c, &destIdx, dest, capacity);
1422             continue;
1423         }
1424 
1425         if (c == BACKSLASH) {
1426             // Backslash Escape.  Copy the following char out without further checks.
1427             //                    Note:  Surrogate pairs don't need any special handling
1428             //                           The second half wont be a '$' or a '\', and
1429             //                           will move to the dest normally on the next
1430             //                           loop iteration.
1431             if (replIdx >= replacementLength) {
1432                 break;
1433             }
1434             c = replacementText[replIdx];
1435 
1436             if (c==0x55/*U*/ || c==0x75/*u*/) {
1437                 // We have a \udddd or \Udddddddd escape sequence.
1438                 UChar32 escapedChar =
1439                     u_unescapeAt(uregex_ucstr_unescape_charAt,
1440                        &replIdx,                   // Index is updated by unescapeAt
1441                        replacementLength,          // Length of replacement text
1442                        (void *)replacementText);
1443 
1444                 if (escapedChar != (UChar32)0xFFFFFFFF) {
1445                     if (escapedChar <= 0xffff) {
1446                         appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
1447                     } else {
1448                         appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1449                         appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1450                     }
1451                     continue;
1452                 }
1453                 // Note:  if the \u escape was invalid, just fall through and
1454                 //        treat it as a plain \<anything> escape.
1455             }
1456 
1457             // Plain backslash escape.  Just put out the escaped character.
1458             appendToBuf(c, &destIdx, dest, capacity);
1459 
1460             replIdx++;
1461             continue;
1462         }
1463 
1464 
1465 
1466         // We've got a $.  Pick up a capture group number if one follows.
1467         // Consume at most the number of digits necessary for the largest capture
1468         // number that is valid for this pattern.
1469 
1470         int32_t numDigits = 0;
1471         int32_t groupNum  = 0;
1472         UChar32 digitC;
1473         for (;;) {
1474             if (replIdx >= replacementLength) {
1475                 break;
1476             }
1477             U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
1478             if (u_isdigit(digitC) == FALSE) {
1479                 break;
1480             }
1481 
1482             U16_FWD_1(replacementText, replIdx, replacementLength);
1483             groupNum=groupNum*10 + u_charDigitValue(digitC);
1484             numDigits++;
1485             if (numDigits >= m->fPattern->fMaxCaptureDigits) {
1486                 break;
1487             }
1488         }
1489 
1490 
1491         if (numDigits == 0) {
1492             // The $ didn't introduce a group number at all.
1493             // Treat it as just part of the substitution text.
1494             appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
1495             continue;
1496         }
1497 
1498         // Finally, append the capture group data to the destination.
1499         destIdx += uregex_group((URegularExpression*)regexp, groupNum, &dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
1500         if (*status == U_BUFFER_OVERFLOW_ERROR) {
1501             // Ignore buffer overflow when extracting the group.  We need to
1502             //   continue on to get full size of the untruncated result.  We will
1503             //   raise our own buffer overflow error at the end.
1504             *status = U_ZERO_ERROR;
1505         }
1506 
1507         if (U_FAILURE(*status)) {
1508             // Can fail if group number is out of range.
1509             break;
1510         }
1511 
1512     }
1513 
1514     //
1515     //  Nul Terminate the dest buffer if possible.
1516     //  Set the appropriate buffer overflow or not terminated error, if needed.
1517     //
1518     if (destIdx < capacity) {
1519         dest[destIdx] = 0;
1520     } else if (destIdx == *destCapacity) {
1521         *status = U_STRING_NOT_TERMINATED_WARNING;
1522     } else {
1523         *status = U_BUFFER_OVERFLOW_ERROR;
1524     }
1525 
1526     //
1527     // Return an updated dest buffer and capacity to the caller.
1528     //
1529     if (destIdx > 0 &&  *destCapacity > 0) {
1530         if (destIdx < capacity) {
1531             *destBuf      += destIdx;
1532             *destCapacity -= destIdx;
1533         } else {
1534             *destBuf      += capacity;
1535             *destCapacity =  0;
1536         }
1537     }
1538 
1539     // If we came in with a buffer overflow, make sure we go out with one also.
1540     //   (A zero length match right at the end of the previous match could
1541     //    make this function succeed even though a previous call had overflowed the buf)
1542     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1543         *status = U_BUFFER_OVERFLOW_ERROR;
1544     }
1545 
1546     return destIdx;
1547 }
1548 
1549 //
1550 //   appendReplacement   the actual API function,
1551 //
1552 U_CAPI int32_t U_EXPORT2
uregex_appendReplacement(URegularExpression * regexp2,const UChar * replacementText,int32_t replacementLength,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1553 uregex_appendReplacement(URegularExpression    *regexp2,
1554                          const UChar           *replacementText,
1555                          int32_t                replacementLength,
1556                          UChar                **destBuf,
1557                          int32_t               *destCapacity,
1558                          UErrorCode            *status) {
1559 
1560     RegularExpression *regexp = (RegularExpression*)regexp2;
1561     return RegexCImpl::appendReplacement(
1562         regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1563 }
1564 
1565 //
1566 //   uregex_appendReplacementUText...can just use the normal C++ method
1567 //
1568 U_CAPI void U_EXPORT2
uregex_appendReplacementUText(URegularExpression * regexp2,UText * replText,UText * dest,UErrorCode * status)1569 uregex_appendReplacementUText(URegularExpression    *regexp2,
1570                               UText                 *replText,
1571                               UText                 *dest,
1572                               UErrorCode            *status)  {
1573     RegularExpression *regexp = (RegularExpression*)regexp2;
1574     regexp->fMatcher->appendReplacement(dest, replText, *status);
1575 }
1576 
1577 
1578 //------------------------------------------------------------------------------
1579 //
1580 //    uregex_appendTail
1581 //
1582 //------------------------------------------------------------------------------
appendTail(RegularExpression * regexp,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1583 int32_t RegexCImpl::appendTail(RegularExpression    *regexp,
1584                                UChar                **destBuf,
1585                                int32_t               *destCapacity,
1586                                UErrorCode            *status)
1587 {
1588 
1589     // If we come in with a buffer overflow error, don't suppress the operation.
1590     //  A series of appendReplacements, appendTail need to correctly preflight
1591     //  the buffer size when an overflow happens somewhere in the middle.
1592     UBool pendingBufferOverflow = FALSE;
1593     if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
1594         pendingBufferOverflow = TRUE;
1595         *status = U_ZERO_ERROR;
1596     }
1597 
1598     if (validateRE(regexp, status) == FALSE) {
1599         return 0;
1600     }
1601 
1602     if (destCapacity == NULL || destBuf == NULL ||
1603         (*destBuf == NULL && *destCapacity > 0) ||
1604         *destCapacity < 0)
1605     {
1606         *status = U_ILLEGAL_ARGUMENT_ERROR;
1607         return 0;
1608     }
1609 
1610     RegexMatcher *m = regexp->fMatcher;
1611 
1612     int32_t  destIdx     = 0;
1613     int32_t  destCap     = *destCapacity;
1614     UChar    *dest       = *destBuf;
1615 
1616     if (regexp->fText != NULL) {
1617         int32_t srcIdx;
1618         int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1619         if (nativeIdx == -1) {
1620             srcIdx = 0;
1621         } else if (UTEXT_USES_U16(m->fInputText)) {
1622             srcIdx = (int32_t)nativeIdx;
1623         } else {
1624             UErrorCode status = U_ZERO_ERROR;
1625             srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
1626         }
1627 
1628         for (;;) {
1629             if (srcIdx == regexp->fTextLength) {
1630                 break;
1631             }
1632             UChar c = regexp->fText[srcIdx];
1633             if (c == 0 && regexp->fTextLength == -1) {
1634                 regexp->fTextLength = srcIdx;
1635                 break;
1636             }
1637             if (destIdx < destCap) {
1638                 dest[destIdx] = c;
1639             } else {
1640                 // We've overflowed the dest buffer.
1641                 //  If the total input string length is known, we can
1642                 //    compute the total buffer size needed without scanning through the string.
1643                 if (regexp->fTextLength > 0) {
1644                     destIdx += (regexp->fTextLength - srcIdx);
1645                     break;
1646                 }
1647             }
1648             srcIdx++;
1649             destIdx++;
1650         }
1651     } else {
1652         int64_t  srcIdx;
1653         if (m->fMatch) {
1654             // The most recent call to find() succeeded.
1655             srcIdx = m->fMatchEnd;
1656         } else {
1657             // The last call to find() on this matcher failed().
1658             //   Look back to the end of the last find() that succeeded for src index.
1659             srcIdx = m->fLastMatchEnd;
1660             if (srcIdx == -1)  {
1661                 // There has been no successful match with this matcher.
1662                 //   We want to copy the whole string.
1663                 srcIdx = 0;
1664             }
1665         }
1666 
1667         destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
1668     }
1669 
1670     //
1671     //  NUL terminate the output string, if possible, otherwise issue the
1672     //   appropriate error or warning.
1673     //
1674     if (destIdx < destCap) {
1675         dest[destIdx] = 0;
1676     } else  if (destIdx == destCap) {
1677         *status = U_STRING_NOT_TERMINATED_WARNING;
1678     } else {
1679         *status = U_BUFFER_OVERFLOW_ERROR;
1680     }
1681 
1682     //
1683     // Update the user's buffer ptr and capacity vars to reflect the
1684     //   amount used.
1685     //
1686     if (destIdx < destCap) {
1687         *destBuf      += destIdx;
1688         *destCapacity -= destIdx;
1689     } else {
1690         *destBuf      += destCap;
1691         *destCapacity  = 0;
1692     }
1693 
1694     if (pendingBufferOverflow && U_SUCCESS(*status)) {
1695         *status = U_BUFFER_OVERFLOW_ERROR;
1696     }
1697 
1698     return destIdx;
1699 }
1700 
1701 
1702 //
1703 //   appendTail   the actual API function
1704 //
1705 U_CAPI int32_t U_EXPORT2
uregex_appendTail(URegularExpression * regexp2,UChar ** destBuf,int32_t * destCapacity,UErrorCode * status)1706 uregex_appendTail(URegularExpression    *regexp2,
1707                   UChar                **destBuf,
1708                   int32_t               *destCapacity,
1709                   UErrorCode            *status)  {
1710     RegularExpression *regexp = (RegularExpression*)regexp2;
1711     return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1712 }
1713 
1714 
1715 //
1716 //   uregex_appendTailUText...can just use the normal C++ method
1717 //
1718 U_CAPI UText * U_EXPORT2
uregex_appendTailUText(URegularExpression * regexp2,UText * dest,UErrorCode * status)1719 uregex_appendTailUText(URegularExpression    *regexp2,
1720                        UText                 *dest,
1721                        UErrorCode            *status)  {
1722     RegularExpression *regexp = (RegularExpression*)regexp2;
1723     return regexp->fMatcher->appendTail(dest, *status);
1724 }
1725 
1726 
1727 //------------------------------------------------------------------------------
1728 //
1729 //    copyString     Internal utility to copy a string to an output buffer,
1730 //                   while managing buffer overflow and preflight size
1731 //                   computation.  NUL termination is added to destination,
1732 //                   and the NUL is counted in the output size.
1733 //
1734 //------------------------------------------------------------------------------
1735 #if 0
1736 static void copyString(UChar        *destBuffer,    //  Destination buffer.
1737                        int32_t       destCapacity,  //  Total capacity of dest buffer
1738                        int32_t      *destIndex,     //  Index into dest buffer.  Updated on return.
1739                                                     //    Update not clipped to destCapacity.
1740                        const UChar  *srcPtr,        //  Pointer to source string
1741                        int32_t       srcLen)        //  Source string len.
1742 {
1743     int32_t  si;
1744     int32_t  di = *destIndex;
1745     UChar    c;
1746 
1747     for (si=0; si<srcLen;  si++) {
1748         c = srcPtr[si];
1749         if (di < destCapacity) {
1750             destBuffer[di] = c;
1751             di++;
1752         } else {
1753             di += srcLen - si;
1754             break;
1755         }
1756     }
1757     if (di<destCapacity) {
1758         destBuffer[di] = 0;
1759     }
1760     di++;
1761     *destIndex = di;
1762 }
1763 #endif
1764 
1765 //------------------------------------------------------------------------------
1766 //
1767 //    uregex_split
1768 //
1769 //------------------------------------------------------------------------------
split(RegularExpression * regexp,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1770 int32_t RegexCImpl::split(RegularExpression     *regexp,
1771                           UChar                 *destBuf,
1772                           int32_t                destCapacity,
1773                           int32_t               *requiredCapacity,
1774                           UChar                 *destFields[],
1775                           int32_t                destFieldsCapacity,
1776                           UErrorCode            *status) {
1777     //
1778     // Reset for the input text
1779     //
1780     regexp->fMatcher->reset();
1781     UText *inputText = regexp->fMatcher->fInputText;
1782     int64_t   nextOutputStringStart = 0;
1783     int64_t   inputLen = regexp->fMatcher->fInputLength;
1784     if (inputLen == 0) {
1785         return 0;
1786     }
1787 
1788     //
1789     // Loop through the input text, searching for the delimiter pattern
1790     //
1791     int32_t   i;             // Index of the field being processed.
1792     int32_t   destIdx = 0;   // Next available position in destBuf;
1793     int32_t   numCaptureGroups = regexp->fMatcher->groupCount();
1794     UErrorCode  tStatus = U_ZERO_ERROR;   // Want to ignore any buffer overflow errors so that the strings are still counted
1795     for (i=0; ; i++) {
1796         if (i>=destFieldsCapacity-1) {
1797             // There are one or zero output strings left.
1798             // Fill the last output string with whatever is left from the input, then exit the loop.
1799             //  ( i will be == destFieldsCapacity if we filled the output array while processing
1800             //    capture groups of the delimiter expression, in which case we will discard the
1801             //    last capture group saved in favor of the unprocessed remainder of the
1802             //    input string.)
1803             if (inputLen > nextOutputStringStart) {
1804                 if (i != destFieldsCapacity-1) {
1805                     // No fields are left.  Recycle the last one for holding the trailing part of
1806                     //   the input string.
1807                     i = destFieldsCapacity-1;
1808                     destIdx = (int32_t)(destFields[i] - destFields[0]);
1809                 }
1810 
1811                 destFields[i] = &destBuf[destIdx];
1812                 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1813                                              &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1814             }
1815             break;
1816         }
1817 
1818         if (regexp->fMatcher->find()) {
1819             // We found another delimiter.  Move everything from where we started looking
1820             //  up until the start of the delimiter into the next output string.
1821             destFields[i] = &destBuf[destIdx];
1822 
1823             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1824                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1825             if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1826                 tStatus = U_ZERO_ERROR;
1827             } else {
1828                 *status = tStatus;
1829             }
1830             nextOutputStringStart = regexp->fMatcher->fMatchEnd;
1831 
1832             // If the delimiter pattern has capturing parentheses, the captured
1833             //  text goes out into the next n destination strings.
1834             int32_t groupNum;
1835             for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1836                 // If we've run out of output string slots, bail out.
1837                 if (i==destFieldsCapacity-1) {
1838                     break;
1839                 }
1840                 i++;
1841 
1842                 // Set up to extract the capture group contents into the dest buffer.
1843                 destFields[i] = &destBuf[destIdx];
1844                 tStatus = U_ZERO_ERROR;
1845                 int32_t t = uregex_group((URegularExpression*)regexp, groupNum, destFields[i], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1846                 destIdx += t + 1;    // Record the space used in the output string buffer.
1847                                      //  +1 for the NUL that terminates the string.
1848                 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1849                     tStatus = U_ZERO_ERROR;
1850                 } else {
1851                     *status = tStatus;
1852                 }
1853             }
1854 
1855             if (nextOutputStringStart == inputLen) {
1856                 // The delimiter was at the end of the string.  We're done.
1857                 break;
1858             }
1859 
1860         }
1861         else
1862         {
1863             // We ran off the end of the input while looking for the next delimiter.
1864             // All the remaining text goes into the current output string.
1865             destFields[i] = &destBuf[destIdx];
1866             destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1867                                          &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
1868             break;
1869         }
1870     }
1871 
1872     // Zero out any unused portion of the destFields array
1873     int j;
1874     for (j=i+1; j<destFieldsCapacity; j++) {
1875         destFields[j] = NULL;
1876     }
1877 
1878     if (requiredCapacity != NULL) {
1879         *requiredCapacity = destIdx;
1880     }
1881     if (destIdx > destCapacity) {
1882         *status = U_BUFFER_OVERFLOW_ERROR;
1883     }
1884     return i+1;
1885 }
1886 
1887 //
1888 //   uregex_split   The actual API function
1889 //
1890 U_CAPI int32_t U_EXPORT2
uregex_split(URegularExpression * regexp2,UChar * destBuf,int32_t destCapacity,int32_t * requiredCapacity,UChar * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1891 uregex_split(URegularExpression      *regexp2,
1892              UChar                   *destBuf,
1893              int32_t                  destCapacity,
1894              int32_t                 *requiredCapacity,
1895              UChar                   *destFields[],
1896              int32_t                  destFieldsCapacity,
1897              UErrorCode              *status) {
1898     RegularExpression *regexp = (RegularExpression*)regexp2;
1899     if (validateRE(regexp, status) == FALSE) {
1900         return 0;
1901     }
1902     if ((destBuf == NULL && destCapacity > 0) ||
1903         destCapacity < 0 ||
1904         destFields == NULL ||
1905         destFieldsCapacity < 1 ) {
1906         *status = U_ILLEGAL_ARGUMENT_ERROR;
1907         return 0;
1908     }
1909 
1910     return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1911 }
1912 
1913 
1914 //
1915 //   uregex_splitUText...can just use the normal C++ method
1916 //
1917 U_CAPI int32_t U_EXPORT2
uregex_splitUText(URegularExpression * regexp2,UText * destFields[],int32_t destFieldsCapacity,UErrorCode * status)1918 uregex_splitUText(URegularExpression    *regexp2,
1919                   UText                 *destFields[],
1920                   int32_t                destFieldsCapacity,
1921                   UErrorCode            *status) {
1922     RegularExpression *regexp = (RegularExpression*)regexp2;
1923     return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1924 }
1925 
1926 
1927 #endif   // !UCONFIG_NO_REGULAR_EXPRESSIONS
1928 
1929