• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  file:  repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2010 International Business Machines Corporation   *
7 *   and others. All rights reserved.                                      *
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 //--------------------------------------------------------------------------
28 //
29 //    RegexPattern    Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33     UErrorCode status = U_ZERO_ERROR;
34     u_init(&status);
35 
36     // Init all of this instances data.
37     init();
38 }
39 
40 
41 //--------------------------------------------------------------------------
42 //
43 //   Copy Constructor        Note:  This is a rather inefficient implementation,
44 //                                  but it probably doesn't matter.
45 //
46 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)47 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
48     init();
49     *this = other;
50 }
51 
52 
53 
54 //--------------------------------------------------------------------------
55 //
56 //    Assignment Operator
57 //
58 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60     if (this == &other) {
61         // Source and destination are the same.  Don't do anything.
62         return *this;
63     }
64 
65     // Clean out any previous contents of object being assigned to.
66     zap();
67 
68     // Give target object a default initialization
69     init();
70 
71     // Copy simple fields
72     if ( other.fPatternString == NULL ) {
73         fPatternString = NULL;
74         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75     } else {
76         fPatternString = new UnicodeString(*(other.fPatternString));
77         UErrorCode status = U_ZERO_ERROR;
78         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
79         if (U_FAILURE(status)) {
80             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81             return *this;
82         }
83     }
84     fFlags            = other.fFlags;
85     fLiteralText      = other.fLiteralText;
86     fDeferredStatus   = other.fDeferredStatus;
87     fMinMatchLen      = other.fMinMatchLen;
88     fFrameSize        = other.fFrameSize;
89     fDataSize         = other.fDataSize;
90     fMaxCaptureDigits = other.fMaxCaptureDigits;
91     fStaticSets       = other.fStaticSets;
92     fStaticSets8      = other.fStaticSets8;
93 
94     fStartType        = other.fStartType;
95     fInitialStringIdx = other.fInitialStringIdx;
96     fInitialStringLen = other.fInitialStringLen;
97     *fInitialChars    = *other.fInitialChars;
98     fInitialChar      = other.fInitialChar;
99     *fInitialChars8   = *other.fInitialChars8;
100     fNeedsAltInput    = other.fNeedsAltInput;
101 
102     //  Copy the pattern.  It's just values, nothing deep to copy.
103     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105 
106     //  Copy the Unicode Sets.
107     //    Could be made more efficient if the sets were reference counted and shared,
108     //    but I doubt that pattern copying will be particularly common.
109     //    Note:  init() already added an empty element zero to fSets
110     int32_t i;
111     int32_t  numSets = other.fSets->size();
112     fSets8 = new Regex8BitSet[numSets];
113     if (fSets8 == NULL) {
114     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115     	return *this;
116     }
117     for (i=1; i<numSets; i++) {
118         if (U_FAILURE(fDeferredStatus)) {
119             return *this;
120         }
121         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
123         if (newSet == NULL) {
124             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125             break;
126         }
127         fSets->addElement(newSet, fDeferredStatus);
128         fSets8[i] = other.fSets8[i];
129     }
130 
131     return *this;
132 }
133 
134 
135 //--------------------------------------------------------------------------
136 //
137 //    init        Shared initialization for use by constructors.
138 //                Bring an uninitialized RegexPattern up to a default state.
139 //
140 //--------------------------------------------------------------------------
init()141 void RegexPattern::init() {
142     fFlags            = 0;
143     fCompiledPat      = 0;
144     fLiteralText.remove();
145     fSets             = NULL;
146     fSets8            = NULL;
147     fDeferredStatus   = U_ZERO_ERROR;
148     fMinMatchLen      = 0;
149     fFrameSize        = 0;
150     fDataSize         = 0;
151     fGroupMap         = NULL;
152     fMaxCaptureDigits = 1;
153     fStaticSets       = NULL;
154     fStaticSets8      = NULL;
155     fStartType        = START_NO_INFO;
156     fInitialStringIdx = 0;
157     fInitialStringLen = 0;
158     fInitialChars     = NULL;
159     fInitialChar      = 0;
160     fInitialChars8    = NULL;
161     fNeedsAltInput    = FALSE;
162 
163     fPattern          = NULL; // will be set later
164     fPatternString    = NULL; // may be set later
165     fCompiledPat      = new UVector64(fDeferredStatus);
166     fGroupMap         = new UVector32(fDeferredStatus);
167     fSets             = new UVector(fDeferredStatus);
168     fInitialChars     = new UnicodeSet;
169     fInitialChars8    = new Regex8BitSet;
170     if (U_FAILURE(fDeferredStatus)) {
171         return;
172     }
173     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
174         fInitialChars == NULL || fInitialChars8 == NULL) {
175         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176         return;
177     }
178 
179     // Slot zero of the vector of sets is reserved.  Fill it here.
180     fSets->addElement((int32_t)0, fDeferredStatus);
181 }
182 
183 
184 //--------------------------------------------------------------------------
185 //
186 //   zap            Delete everything owned by this RegexPattern.
187 //
188 //--------------------------------------------------------------------------
zap()189 void RegexPattern::zap() {
190     delete fCompiledPat;
191     fCompiledPat = NULL;
192     int i;
193     for (i=1; i<fSets->size(); i++) {
194         UnicodeSet *s;
195         s = (UnicodeSet *)fSets->elementAt(i);
196         if (s != NULL) {
197             delete s;
198         }
199     }
200     delete fSets;
201     fSets = NULL;
202     delete[] fSets8;
203     fSets8 = NULL;
204     delete fGroupMap;
205     fGroupMap = NULL;
206     delete fInitialChars;
207     fInitialChars = NULL;
208     delete fInitialChars8;
209     fInitialChars8 = NULL;
210     if (fPattern != NULL) {
211         utext_close(fPattern);
212         fPattern = NULL;
213     }
214     if (fPatternString != NULL) {
215         delete fPatternString;
216         fPatternString = NULL;
217     }
218 }
219 
220 
221 //--------------------------------------------------------------------------
222 //
223 //   Destructor
224 //
225 //--------------------------------------------------------------------------
~RegexPattern()226 RegexPattern::~RegexPattern() {
227     zap();
228 }
229 
230 
231 //--------------------------------------------------------------------------
232 //
233 //   Clone
234 //
235 //--------------------------------------------------------------------------
clone() const236 RegexPattern  *RegexPattern::clone() const {
237     RegexPattern  *copy = new RegexPattern(*this);
238     return copy;
239 }
240 
241 
242 //--------------------------------------------------------------------------
243 //
244 //   operator ==   (comparison)    Consider to patterns to be == if the
245 //                                 pattern strings and the flags are the same.
246 //                                 Note that pattern strings with the same
247 //                                 characters can still be considered different.
248 //
249 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const250 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
251     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252         if (this->fPatternString != NULL && other.fPatternString != NULL) {
253             return *(this->fPatternString) == *(other.fPatternString);
254         } else if (this->fPattern == NULL) {
255             if (other.fPattern == NULL) {
256                 return TRUE;
257             }
258         } else if (other.fPattern != NULL) {
259             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261             return utext_equals(this->fPattern, other.fPattern);
262         }
263     }
264     return FALSE;
265 }
266 
267 //---------------------------------------------------------------------
268 //
269 //   compile
270 //
271 //---------------------------------------------------------------------
272 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)273 RegexPattern::compile(const UnicodeString &regex,
274                       uint32_t             flags,
275                       UParseError          &pe,
276                       UErrorCode           &status)
277 {
278     if (U_FAILURE(status)) {
279         return NULL;
280     }
281 
282     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
284     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285 
286     if ((flags & ~allFlags) != 0) {
287         status = U_REGEX_INVALID_FLAG;
288         return NULL;
289     }
290 
291     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292         status = U_REGEX_UNIMPLEMENTED;
293         return NULL;
294     }
295 
296     RegexPattern *This = new RegexPattern;
297     if (This == NULL) {
298         status = U_MEMORY_ALLOCATION_ERROR;
299         return NULL;
300     }
301     if (U_FAILURE(This->fDeferredStatus)) {
302         status = This->fDeferredStatus;
303         delete This;
304         return NULL;
305     }
306     This->fFlags = flags;
307 
308     RegexCompile     compiler(This, status);
309     compiler.compile(regex, pe, status);
310 
311     if (U_FAILURE(status)) {
312         delete This;
313         This = NULL;
314     }
315 
316     return This;
317 }
318 
319 
320 //
321 //   compile, UText mode
322 //
323 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)324 RegexPattern::compile(UText                *regex,
325                       uint32_t             flags,
326                       UParseError          &pe,
327                       UErrorCode           &status)
328 {
329     if (U_FAILURE(status)) {
330         return NULL;
331     }
332 
333     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
334                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
335                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
336 
337     if ((flags & ~allFlags) != 0) {
338         status = U_REGEX_INVALID_FLAG;
339         return NULL;
340     }
341 
342     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
343         status = U_REGEX_UNIMPLEMENTED;
344         return NULL;
345     }
346 
347     RegexPattern *This = new RegexPattern;
348     if (This == NULL) {
349         status = U_MEMORY_ALLOCATION_ERROR;
350         return NULL;
351     }
352     if (U_FAILURE(This->fDeferredStatus)) {
353         status = This->fDeferredStatus;
354         delete This;
355         return NULL;
356     }
357     This->fFlags = flags;
358 
359     RegexCompile     compiler(This, status);
360     compiler.compile(regex, pe, status);
361 
362     if (U_FAILURE(status)) {
363         delete This;
364         This = NULL;
365     }
366 
367     return This;
368 }
369 
370 //
371 //   compile with default flags.
372 //
373 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)374 RegexPattern::compile(const UnicodeString &regex,
375                       UParseError         &pe,
376                       UErrorCode          &err)
377 {
378     return compile(regex, 0, pe, err);
379 }
380 
381 
382 //
383 //   compile with default flags, UText mode
384 //
385 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)386 RegexPattern::compile(UText               *regex,
387                       UParseError         &pe,
388                       UErrorCode          &err)
389 {
390     return compile(regex, 0, pe, err);
391 }
392 
393 
394 //
395 //   compile with no UParseErr parameter.
396 //
397 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)398 RegexPattern::compile(const UnicodeString &regex,
399                       uint32_t             flags,
400                       UErrorCode          &err)
401 {
402     UParseError pe;
403     return compile(regex, flags, pe, err);
404 }
405 
406 
407 //
408 //   compile with no UParseErr parameter, UText mode
409 //
410 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)411 RegexPattern::compile(UText                *regex,
412                       uint32_t             flags,
413                       UErrorCode           &err)
414 {
415     UParseError pe;
416     return compile(regex, flags, pe, err);
417 }
418 
419 
420 //---------------------------------------------------------------------
421 //
422 //   flags
423 //
424 //---------------------------------------------------------------------
flags() const425 uint32_t RegexPattern::flags() const {
426     return fFlags;
427 }
428 
429 
430 //---------------------------------------------------------------------
431 //
432 //   matcher(UnicodeString, err)
433 //
434 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436                                     UErrorCode          &status)  const {
437     RegexMatcher    *retMatcher = matcher(status);
438     if (retMatcher != NULL) {
439         retMatcher->fDeferredStatus = status;
440         retMatcher->reset(input);
441     }
442     return retMatcher;
443 }
444 
445 //
446 //   matcher, UText mode
447 //
matcher(UText * input,PatternIsUTextFlag,UErrorCode & status) const448 RegexMatcher *RegexPattern::matcher(UText               *input,
449                                     PatternIsUTextFlag  /*flag*/,
450                                     UErrorCode          &status)  const {
451     RegexMatcher    *retMatcher = matcher(status);
452     if (retMatcher != NULL) {
453         retMatcher->fDeferredStatus = status;
454         retMatcher->reset(input);
455     }
456     return retMatcher;
457 }
458 
459 #if 0
460 RegexMatcher *RegexPattern::matcher(const UChar * /*input*/,
461                                     UErrorCode          &status)  const
462 {
463     /* This should never get called. The API with UnicodeString should be called instead. */
464     if (U_SUCCESS(status)) {
465         status = U_UNSUPPORTED_ERROR;
466     }
467     return NULL;
468 }
469 #endif
470 
471 //---------------------------------------------------------------------
472 //
473 //   matcher(status)
474 //
475 //---------------------------------------------------------------------
matcher(UErrorCode & status) const476 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
477     RegexMatcher    *retMatcher = NULL;
478 
479     if (U_FAILURE(status)) {
480         return NULL;
481     }
482     if (U_FAILURE(fDeferredStatus)) {
483         status = fDeferredStatus;
484         return NULL;
485     }
486 
487     retMatcher = new RegexMatcher(this);
488     if (retMatcher == NULL) {
489         status = U_MEMORY_ALLOCATION_ERROR;
490         return NULL;
491     }
492     return retMatcher;
493 }
494 
495 
496 
497 //---------------------------------------------------------------------
498 //
499 //   matches        Convenience function to test for a match, starting
500 //                  with a pattern string and a data string.
501 //
502 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)503 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
504               const UnicodeString   &input,
505                     UParseError     &pe,
506                     UErrorCode      &status) {
507 
508     if (U_FAILURE(status)) {return FALSE;}
509 
510     UBool         retVal;
511     RegexPattern *pat     = NULL;
512     RegexMatcher *matcher = NULL;
513 
514     pat     = RegexPattern::compile(regex, 0, pe, status);
515     matcher = pat->matcher(input, status);
516     retVal  = matcher->matches(status);
517 
518     delete matcher;
519     delete pat;
520     return retVal;
521 }
522 
523 
524 //
525 //   matches, UText mode
526 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)527 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
528                     UText           *input,
529                     UParseError     &pe,
530                     UErrorCode      &status) {
531 
532     if (U_FAILURE(status)) {return FALSE;}
533 
534     UBool         retVal;
535     RegexPattern *pat     = NULL;
536     RegexMatcher *matcher = NULL;
537 
538     pat     = RegexPattern::compile(regex, 0, pe, status);
539     matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
540     retVal  = matcher->matches(status);
541 
542     delete matcher;
543     delete pat;
544     return retVal;
545 }
546 
547 
548 
549 
550 
551 //---------------------------------------------------------------------
552 //
553 //   pattern
554 //
555 //---------------------------------------------------------------------
pattern() const556 UnicodeString RegexPattern::pattern() const {
557     if (fPatternString != NULL) {
558         return *fPatternString;
559     } else if (fPattern == NULL) {
560         return UnicodeString();
561     } else {
562         UErrorCode status = U_ZERO_ERROR;
563         int64_t nativeLen = utext_nativeLength(fPattern);
564         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
565         UnicodeString result;
566 
567         status = U_ZERO_ERROR;
568         UChar *resultChars = result.getBuffer(len16);
569         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
570         result.releaseBuffer(len16);
571 
572         return result;
573     }
574 }
575 
576 
577 
578 
579 //---------------------------------------------------------------------
580 //
581 //   patternText
582 //
583 //---------------------------------------------------------------------
patternText(UErrorCode & status) const584 UText *RegexPattern::patternText(UErrorCode      &status) const {
585     if (U_FAILURE(status)) {return NULL;}
586     status = U_ZERO_ERROR;
587 
588     if (fPattern != NULL) {
589         return fPattern;
590     } else {
591         RegexStaticSets::initGlobals(&status);
592         return RegexStaticSets::gStaticSets->fEmptyText;
593     }
594 }
595 
596 
597 
598 //---------------------------------------------------------------------
599 //
600 //   split
601 //
602 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const603 int32_t  RegexPattern::split(const UnicodeString &input,
604         UnicodeString    dest[],
605         int32_t          destCapacity,
606         UErrorCode      &status) const
607 {
608     if (U_FAILURE(status)) {
609         return 0;
610     };
611 
612     RegexMatcher  m(this);
613     int32_t r = 0;
614     // Check m's status to make sure all is ok.
615     if (U_SUCCESS(m.fDeferredStatus)) {
616     	r = m.split(input, dest, destCapacity, status);
617     }
618     return r;
619 }
620 
621 //
622 //   split, UText mode
623 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const624 int32_t  RegexPattern::split(UText *input,
625         UText           *dest[],
626         int32_t          destCapacity,
627         UErrorCode      &status) const
628 {
629     if (U_FAILURE(status)) {
630         return 0;
631     };
632 
633     RegexMatcher  m(this);
634     int32_t r = 0;
635     // Check m's status to make sure all is ok.
636     if (U_SUCCESS(m.fDeferredStatus)) {
637     	r = m.split(input, dest, destCapacity, status);
638     }
639     return r;
640 }
641 
642 
643 
644 //---------------------------------------------------------------------
645 //
646 //   dump    Output the compiled form of the pattern.
647 //           Debugging function only.
648 //
649 //---------------------------------------------------------------------
650 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const651 void   RegexPattern::dumpOp(int32_t index) const {
652     static const char * const opNames[] = {URX_OPCODE_NAMES};
653     int32_t op          = fCompiledPat->elementAti(index);
654     int32_t val         = URX_VAL(op);
655     int32_t type        = URX_TYPE(op);
656     int32_t pinnedType  = type;
657     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
658         pinnedType = 0;
659     }
660 
661     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
662     switch (type) {
663     case URX_NOP:
664     case URX_DOTANY:
665     case URX_DOTANY_ALL:
666     case URX_FAIL:
667     case URX_CARET:
668     case URX_DOLLAR:
669     case URX_BACKSLASH_G:
670     case URX_BACKSLASH_X:
671     case URX_END:
672     case URX_DOLLAR_M:
673     case URX_CARET_M:
674         // Types with no operand field of interest.
675         break;
676 
677     case URX_RESERVED_OP:
678     case URX_START_CAPTURE:
679     case URX_END_CAPTURE:
680     case URX_STATE_SAVE:
681     case URX_JMP:
682     case URX_JMP_SAV:
683     case URX_JMP_SAV_X:
684     case URX_BACKSLASH_B:
685     case URX_BACKSLASH_BU:
686     case URX_BACKSLASH_D:
687     case URX_BACKSLASH_Z:
688     case URX_STRING_LEN:
689     case URX_CTR_INIT:
690     case URX_CTR_INIT_NG:
691     case URX_CTR_LOOP:
692     case URX_CTR_LOOP_NG:
693     case URX_RELOC_OPRND:
694     case URX_STO_SP:
695     case URX_LD_SP:
696     case URX_BACKREF:
697     case URX_STO_INP_LOC:
698     case URX_JMPX:
699     case URX_LA_START:
700     case URX_LA_END:
701     case URX_BACKREF_I:
702     case URX_LB_START:
703     case URX_LB_CONT:
704     case URX_LB_END:
705     case URX_LBN_CONT:
706     case URX_LBN_END:
707     case URX_LOOP_C:
708     case URX_LOOP_DOT_I:
709         // types with an integer operand field.
710         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
711         break;
712 
713     case URX_ONECHAR:
714     case URX_ONECHAR_I:
715         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
716         break;
717 
718     case URX_STRING:
719     case URX_STRING_I:
720         {
721             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
722             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
723             int32_t length = URX_VAL(lengthOp);
724             int32_t i;
725             for (i=val; i<val+length; i++) {
726                 UChar c = fLiteralText[i];
727                 if (c < 32 || c >= 256) {c = '.';}
728                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
729             }
730         }
731         break;
732 
733     case URX_SETREF:
734     case URX_LOOP_SR_I:
735         {
736             UnicodeString s;
737             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
738             set->toPattern(s, TRUE);
739             for (int32_t i=0; i<s.length(); i++) {
740                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
741             }
742         }
743         break;
744 
745     case URX_STATIC_SETREF:
746     case URX_STAT_SETREF_N:
747         {
748             UnicodeString s;
749             if (val & URX_NEG_SET) {
750                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
751                 val &= ~URX_NEG_SET;
752             }
753             UnicodeSet *set = fStaticSets[val];
754             set->toPattern(s, TRUE);
755             for (int32_t i=0; i<s.length(); i++) {
756                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
757             }
758         }
759         break;
760 
761 
762     default:
763         REGEX_DUMP_DEBUG_PRINTF(("??????"));
764         break;
765     }
766     REGEX_DUMP_DEBUG_PRINTF(("\n"));
767 }
768 #endif
769 
770 
771 #if defined(REGEX_DEBUG)
772 U_CAPI void  U_EXPORT2
RegexPatternDump(const RegexPattern * This)773 RegexPatternDump(const RegexPattern *This) {
774     int      index;
775     int      i;
776 
777     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
778     UChar32 c = utext_next32From(This->fPattern, 0);
779     while (c != U_SENTINEL) {
780         if (c<32 || c>256) {
781             c = '.';
782         }
783         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
784 
785         c = UTEXT_NEXT32(This->fPattern);
786     }
787     REGEX_DUMP_DEBUG_PRINTF(("\n"));
788     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
789     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
790     if (This->fStartType == START_STRING) {
791         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
792         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
793             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
794         }
795         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
796 
797     } else if (This->fStartType == START_SET) {
798         int32_t numSetChars = This->fInitialChars->size();
799         if (numSetChars > 20) {
800             numSetChars = 20;
801         }
802         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
803         for (i=0; i<numSetChars; i++) {
804             UChar32 c = This->fInitialChars->charAt(i);
805             if (0x20<c && c <0x7e) {
806                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
807             } else {
808                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
809             }
810         }
811         if (numSetChars < This->fInitialChars->size()) {
812             REGEX_DUMP_DEBUG_PRINTF((" ..."));
813         }
814         REGEX_DUMP_DEBUG_PRINTF(("\n"));
815 
816     } else if (This->fStartType == START_CHAR) {
817         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
818         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
819                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
820             } else {
821                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
822             }
823     }
824 
825     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
826            "-------------------------------------------\n"));
827     for (index = 0; index<This->fCompiledPat->size(); index++) {
828         This->dumpOp(index);
829     }
830     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
831 }
832 #endif
833 
834 
835 
836 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
837 
838 U_NAMESPACE_END
839 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
840