• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  file:  repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2011 International Business Machines Corporation   *
7 *   and others. All rights reserved.                                      *
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 //--------------------------------------------------------------------------
28 //
29 //    RegexPattern    Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33     UErrorCode status = U_ZERO_ERROR;
34     u_init(&status);
35 
36     // Init all of this instances data.
37     init();
38 }
39 
40 
41 //--------------------------------------------------------------------------
42 //
43 //   Copy Constructor        Note:  This is a rather inefficient implementation,
44 //                                  but it probably doesn't matter.
45 //
46 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)47 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
48     init();
49     *this = other;
50 }
51 
52 
53 
54 //--------------------------------------------------------------------------
55 //
56 //    Assignment Operator
57 //
58 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60     if (this == &other) {
61         // Source and destination are the same.  Don't do anything.
62         return *this;
63     }
64 
65     // Clean out any previous contents of object being assigned to.
66     zap();
67 
68     // Give target object a default initialization
69     init();
70 
71     // Copy simple fields
72     if ( other.fPatternString == NULL ) {
73         fPatternString = NULL;
74         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75     } else {
76         fPatternString = new UnicodeString(*(other.fPatternString));
77         UErrorCode status = U_ZERO_ERROR;
78         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
79         if (U_FAILURE(status)) {
80             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81             return *this;
82         }
83     }
84     fFlags            = other.fFlags;
85     fLiteralText      = other.fLiteralText;
86     fDeferredStatus   = other.fDeferredStatus;
87     fMinMatchLen      = other.fMinMatchLen;
88     fFrameSize        = other.fFrameSize;
89     fDataSize         = other.fDataSize;
90     fMaxCaptureDigits = other.fMaxCaptureDigits;
91     fStaticSets       = other.fStaticSets;
92     fStaticSets8      = other.fStaticSets8;
93 
94     fStartType        = other.fStartType;
95     fInitialStringIdx = other.fInitialStringIdx;
96     fInitialStringLen = other.fInitialStringLen;
97     *fInitialChars    = *other.fInitialChars;
98     fInitialChar      = other.fInitialChar;
99     *fInitialChars8   = *other.fInitialChars8;
100     fNeedsAltInput    = other.fNeedsAltInput;
101 
102     //  Copy the pattern.  It's just values, nothing deep to copy.
103     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105 
106     //  Copy the Unicode Sets.
107     //    Could be made more efficient if the sets were reference counted and shared,
108     //    but I doubt that pattern copying will be particularly common.
109     //    Note:  init() already added an empty element zero to fSets
110     int32_t i;
111     int32_t  numSets = other.fSets->size();
112     fSets8 = new Regex8BitSet[numSets];
113     if (fSets8 == NULL) {
114     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115     	return *this;
116     }
117     for (i=1; i<numSets; i++) {
118         if (U_FAILURE(fDeferredStatus)) {
119             return *this;
120         }
121         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
123         if (newSet == NULL) {
124             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125             break;
126         }
127         fSets->addElement(newSet, fDeferredStatus);
128         fSets8[i] = other.fSets8[i];
129     }
130 
131     return *this;
132 }
133 
134 
135 //--------------------------------------------------------------------------
136 //
137 //    init        Shared initialization for use by constructors.
138 //                Bring an uninitialized RegexPattern up to a default state.
139 //
140 //--------------------------------------------------------------------------
init()141 void RegexPattern::init() {
142     fFlags            = 0;
143     fCompiledPat      = 0;
144     fLiteralText.remove();
145     fSets             = NULL;
146     fSets8            = NULL;
147     fDeferredStatus   = U_ZERO_ERROR;
148     fMinMatchLen      = 0;
149     fFrameSize        = 0;
150     fDataSize         = 0;
151     fGroupMap         = NULL;
152     fMaxCaptureDigits = 1;
153     fStaticSets       = NULL;
154     fStaticSets8      = NULL;
155     fStartType        = START_NO_INFO;
156     fInitialStringIdx = 0;
157     fInitialStringLen = 0;
158     fInitialChars     = NULL;
159     fInitialChar      = 0;
160     fInitialChars8    = NULL;
161     fNeedsAltInput    = FALSE;
162 
163     fPattern          = NULL; // will be set later
164     fPatternString    = NULL; // may be set later
165     fCompiledPat      = new UVector64(fDeferredStatus);
166     fGroupMap         = new UVector32(fDeferredStatus);
167     fSets             = new UVector(fDeferredStatus);
168     fInitialChars     = new UnicodeSet;
169     fInitialChars8    = new Regex8BitSet;
170     if (U_FAILURE(fDeferredStatus)) {
171         return;
172     }
173     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
174         fInitialChars == NULL || fInitialChars8 == NULL) {
175         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176         return;
177     }
178 
179     // Slot zero of the vector of sets is reserved.  Fill it here.
180     fSets->addElement((int32_t)0, fDeferredStatus);
181 }
182 
183 
184 //--------------------------------------------------------------------------
185 //
186 //   zap            Delete everything owned by this RegexPattern.
187 //
188 //--------------------------------------------------------------------------
zap()189 void RegexPattern::zap() {
190     delete fCompiledPat;
191     fCompiledPat = NULL;
192     int i;
193     for (i=1; i<fSets->size(); i++) {
194         UnicodeSet *s;
195         s = (UnicodeSet *)fSets->elementAt(i);
196         if (s != NULL) {
197             delete s;
198         }
199     }
200     delete fSets;
201     fSets = NULL;
202     delete[] fSets8;
203     fSets8 = NULL;
204     delete fGroupMap;
205     fGroupMap = NULL;
206     delete fInitialChars;
207     fInitialChars = NULL;
208     delete fInitialChars8;
209     fInitialChars8 = NULL;
210     if (fPattern != NULL) {
211         utext_close(fPattern);
212         fPattern = NULL;
213     }
214     if (fPatternString != NULL) {
215         delete fPatternString;
216         fPatternString = NULL;
217     }
218 }
219 
220 
221 //--------------------------------------------------------------------------
222 //
223 //   Destructor
224 //
225 //--------------------------------------------------------------------------
~RegexPattern()226 RegexPattern::~RegexPattern() {
227     zap();
228 }
229 
230 
231 //--------------------------------------------------------------------------
232 //
233 //   Clone
234 //
235 //--------------------------------------------------------------------------
clone() const236 RegexPattern  *RegexPattern::clone() const {
237     RegexPattern  *copy = new RegexPattern(*this);
238     return copy;
239 }
240 
241 
242 //--------------------------------------------------------------------------
243 //
244 //   operator ==   (comparison)    Consider to patterns to be == if the
245 //                                 pattern strings and the flags are the same.
246 //                                 Note that pattern strings with the same
247 //                                 characters can still be considered different.
248 //
249 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const250 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
251     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252         if (this->fPatternString != NULL && other.fPatternString != NULL) {
253             return *(this->fPatternString) == *(other.fPatternString);
254         } else if (this->fPattern == NULL) {
255             if (other.fPattern == NULL) {
256                 return TRUE;
257             }
258         } else if (other.fPattern != NULL) {
259             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261             return utext_equals(this->fPattern, other.fPattern);
262         }
263     }
264     return FALSE;
265 }
266 
267 //---------------------------------------------------------------------
268 //
269 //   compile
270 //
271 //---------------------------------------------------------------------
272 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)273 RegexPattern::compile(const UnicodeString &regex,
274                       uint32_t             flags,
275                       UParseError          &pe,
276                       UErrorCode           &status)
277 {
278     if (U_FAILURE(status)) {
279         return NULL;
280     }
281 
282     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
284     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285 
286     if ((flags & ~allFlags) != 0) {
287         status = U_REGEX_INVALID_FLAG;
288         return NULL;
289     }
290 
291     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292         status = U_REGEX_UNIMPLEMENTED;
293         return NULL;
294     }
295 
296     RegexPattern *This = new RegexPattern;
297     if (This == NULL) {
298         status = U_MEMORY_ALLOCATION_ERROR;
299         return NULL;
300     }
301     if (U_FAILURE(This->fDeferredStatus)) {
302         status = This->fDeferredStatus;
303         delete This;
304         return NULL;
305     }
306     This->fFlags = flags;
307 
308     RegexCompile     compiler(This, status);
309     compiler.compile(regex, pe, status);
310 
311     if (U_FAILURE(status)) {
312         delete This;
313         This = NULL;
314     }
315 
316     return This;
317 }
318 
319 
320 //
321 //   compile, UText mode
322 //
323 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)324 RegexPattern::compile(UText                *regex,
325                       uint32_t             flags,
326                       UParseError          &pe,
327                       UErrorCode           &status)
328 {
329     if (U_FAILURE(status)) {
330         return NULL;
331     }
332 
333     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
334                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
335                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
336 
337     if ((flags & ~allFlags) != 0) {
338         status = U_REGEX_INVALID_FLAG;
339         return NULL;
340     }
341 
342     if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
343         status = U_REGEX_UNIMPLEMENTED;
344         return NULL;
345     }
346 
347     RegexPattern *This = new RegexPattern;
348     if (This == NULL) {
349         status = U_MEMORY_ALLOCATION_ERROR;
350         return NULL;
351     }
352     if (U_FAILURE(This->fDeferredStatus)) {
353         status = This->fDeferredStatus;
354         delete This;
355         return NULL;
356     }
357     This->fFlags = flags;
358 
359     RegexCompile     compiler(This, status);
360     compiler.compile(regex, pe, status);
361 
362     if (U_FAILURE(status)) {
363         delete This;
364         This = NULL;
365     }
366 
367     return This;
368 }
369 
370 //
371 //   compile with default flags.
372 //
373 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)374 RegexPattern::compile(const UnicodeString &regex,
375                       UParseError         &pe,
376                       UErrorCode          &err)
377 {
378     return compile(regex, 0, pe, err);
379 }
380 
381 
382 //
383 //   compile with default flags, UText mode
384 //
385 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)386 RegexPattern::compile(UText               *regex,
387                       UParseError         &pe,
388                       UErrorCode          &err)
389 {
390     return compile(regex, 0, pe, err);
391 }
392 
393 
394 //
395 //   compile with no UParseErr parameter.
396 //
397 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)398 RegexPattern::compile(const UnicodeString &regex,
399                       uint32_t             flags,
400                       UErrorCode          &err)
401 {
402     UParseError pe;
403     return compile(regex, flags, pe, err);
404 }
405 
406 
407 //
408 //   compile with no UParseErr parameter, UText mode
409 //
410 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)411 RegexPattern::compile(UText                *regex,
412                       uint32_t             flags,
413                       UErrorCode           &err)
414 {
415     UParseError pe;
416     return compile(regex, flags, pe, err);
417 }
418 
419 
420 //---------------------------------------------------------------------
421 //
422 //   flags
423 //
424 //---------------------------------------------------------------------
flags() const425 uint32_t RegexPattern::flags() const {
426     return fFlags;
427 }
428 
429 
430 //---------------------------------------------------------------------
431 //
432 //   matcher(UnicodeString, err)
433 //
434 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436                                     UErrorCode          &status)  const {
437     RegexMatcher    *retMatcher = matcher(status);
438     if (retMatcher != NULL) {
439         retMatcher->fDeferredStatus = status;
440         retMatcher->reset(input);
441     }
442     return retMatcher;
443 }
444 
445 
446 //---------------------------------------------------------------------
447 //
448 //   matcher(status)
449 //
450 //---------------------------------------------------------------------
matcher(UErrorCode & status) const451 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
452     RegexMatcher    *retMatcher = NULL;
453 
454     if (U_FAILURE(status)) {
455         return NULL;
456     }
457     if (U_FAILURE(fDeferredStatus)) {
458         status = fDeferredStatus;
459         return NULL;
460     }
461 
462     retMatcher = new RegexMatcher(this);
463     if (retMatcher == NULL) {
464         status = U_MEMORY_ALLOCATION_ERROR;
465         return NULL;
466     }
467     return retMatcher;
468 }
469 
470 
471 
472 //---------------------------------------------------------------------
473 //
474 //   matches        Convenience function to test for a match, starting
475 //                  with a pattern string and a data string.
476 //
477 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)478 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
479               const UnicodeString   &input,
480                     UParseError     &pe,
481                     UErrorCode      &status) {
482 
483     if (U_FAILURE(status)) {return FALSE;}
484 
485     UBool         retVal;
486     RegexPattern *pat     = NULL;
487     RegexMatcher *matcher = NULL;
488 
489     pat     = RegexPattern::compile(regex, 0, pe, status);
490     matcher = pat->matcher(input, status);
491     retVal  = matcher->matches(status);
492 
493     delete matcher;
494     delete pat;
495     return retVal;
496 }
497 
498 
499 //
500 //   matches, UText mode
501 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)502 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
503                     UText           *input,
504                     UParseError     &pe,
505                     UErrorCode      &status) {
506 
507     if (U_FAILURE(status)) {return FALSE;}
508 
509     UBool         retVal  = FALSE;
510     RegexPattern *pat     = NULL;
511     RegexMatcher *matcher = NULL;
512 
513     pat     = RegexPattern::compile(regex, 0, pe, status);
514     matcher = pat->matcher(status);
515     if (U_SUCCESS(status)) {
516         matcher->reset(input);
517         retVal  = matcher->matches(status);
518     }
519 
520     delete matcher;
521     delete pat;
522     return retVal;
523 }
524 
525 
526 
527 
528 
529 //---------------------------------------------------------------------
530 //
531 //   pattern
532 //
533 //---------------------------------------------------------------------
pattern() const534 UnicodeString RegexPattern::pattern() const {
535     if (fPatternString != NULL) {
536         return *fPatternString;
537     } else if (fPattern == NULL) {
538         return UnicodeString();
539     } else {
540         UErrorCode status = U_ZERO_ERROR;
541         int64_t nativeLen = utext_nativeLength(fPattern);
542         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
543         UnicodeString result;
544 
545         status = U_ZERO_ERROR;
546         UChar *resultChars = result.getBuffer(len16);
547         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
548         result.releaseBuffer(len16);
549 
550         return result;
551     }
552 }
553 
554 
555 
556 
557 //---------------------------------------------------------------------
558 //
559 //   patternText
560 //
561 //---------------------------------------------------------------------
patternText(UErrorCode & status) const562 UText *RegexPattern::patternText(UErrorCode      &status) const {
563     if (U_FAILURE(status)) {return NULL;}
564     status = U_ZERO_ERROR;
565 
566     if (fPattern != NULL) {
567         return fPattern;
568     } else {
569         RegexStaticSets::initGlobals(&status);
570         return RegexStaticSets::gStaticSets->fEmptyText;
571     }
572 }
573 
574 
575 
576 //---------------------------------------------------------------------
577 //
578 //   split
579 //
580 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const581 int32_t  RegexPattern::split(const UnicodeString &input,
582         UnicodeString    dest[],
583         int32_t          destCapacity,
584         UErrorCode      &status) const
585 {
586     if (U_FAILURE(status)) {
587         return 0;
588     };
589 
590     RegexMatcher  m(this);
591     int32_t r = 0;
592     // Check m's status to make sure all is ok.
593     if (U_SUCCESS(m.fDeferredStatus)) {
594     	r = m.split(input, dest, destCapacity, status);
595     }
596     return r;
597 }
598 
599 //
600 //   split, UText mode
601 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const602 int32_t  RegexPattern::split(UText *input,
603         UText           *dest[],
604         int32_t          destCapacity,
605         UErrorCode      &status) const
606 {
607     if (U_FAILURE(status)) {
608         return 0;
609     };
610 
611     RegexMatcher  m(this);
612     int32_t r = 0;
613     // Check m's status to make sure all is ok.
614     if (U_SUCCESS(m.fDeferredStatus)) {
615     	r = m.split(input, dest, destCapacity, status);
616     }
617     return r;
618 }
619 
620 
621 
622 //---------------------------------------------------------------------
623 //
624 //   dump    Output the compiled form of the pattern.
625 //           Debugging function only.
626 //
627 //---------------------------------------------------------------------
628 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const629 void   RegexPattern::dumpOp(int32_t index) const {
630     static const char * const opNames[] = {URX_OPCODE_NAMES};
631     int32_t op          = fCompiledPat->elementAti(index);
632     int32_t val         = URX_VAL(op);
633     int32_t type        = URX_TYPE(op);
634     int32_t pinnedType  = type;
635     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
636         pinnedType = 0;
637     }
638 
639     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
640     switch (type) {
641     case URX_NOP:
642     case URX_DOTANY:
643     case URX_DOTANY_ALL:
644     case URX_FAIL:
645     case URX_CARET:
646     case URX_DOLLAR:
647     case URX_BACKSLASH_G:
648     case URX_BACKSLASH_X:
649     case URX_END:
650     case URX_DOLLAR_M:
651     case URX_CARET_M:
652         // Types with no operand field of interest.
653         break;
654 
655     case URX_RESERVED_OP:
656     case URX_START_CAPTURE:
657     case URX_END_CAPTURE:
658     case URX_STATE_SAVE:
659     case URX_JMP:
660     case URX_JMP_SAV:
661     case URX_JMP_SAV_X:
662     case URX_BACKSLASH_B:
663     case URX_BACKSLASH_BU:
664     case URX_BACKSLASH_D:
665     case URX_BACKSLASH_Z:
666     case URX_STRING_LEN:
667     case URX_CTR_INIT:
668     case URX_CTR_INIT_NG:
669     case URX_CTR_LOOP:
670     case URX_CTR_LOOP_NG:
671     case URX_RELOC_OPRND:
672     case URX_STO_SP:
673     case URX_LD_SP:
674     case URX_BACKREF:
675     case URX_STO_INP_LOC:
676     case URX_JMPX:
677     case URX_LA_START:
678     case URX_LA_END:
679     case URX_BACKREF_I:
680     case URX_LB_START:
681     case URX_LB_CONT:
682     case URX_LB_END:
683     case URX_LBN_CONT:
684     case URX_LBN_END:
685     case URX_LOOP_C:
686     case URX_LOOP_DOT_I:
687         // types with an integer operand field.
688         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
689         break;
690 
691     case URX_ONECHAR:
692     case URX_ONECHAR_I:
693         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
694         break;
695 
696     case URX_STRING:
697     case URX_STRING_I:
698         {
699             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
700             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
701             int32_t length = URX_VAL(lengthOp);
702             int32_t i;
703             for (i=val; i<val+length; i++) {
704                 UChar c = fLiteralText[i];
705                 if (c < 32 || c >= 256) {c = '.';}
706                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
707             }
708         }
709         break;
710 
711     case URX_SETREF:
712     case URX_LOOP_SR_I:
713         {
714             UnicodeString s;
715             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
716             set->toPattern(s, TRUE);
717             for (int32_t i=0; i<s.length(); i++) {
718                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
719             }
720         }
721         break;
722 
723     case URX_STATIC_SETREF:
724     case URX_STAT_SETREF_N:
725         {
726             UnicodeString s;
727             if (val & URX_NEG_SET) {
728                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
729                 val &= ~URX_NEG_SET;
730             }
731             UnicodeSet *set = fStaticSets[val];
732             set->toPattern(s, TRUE);
733             for (int32_t i=0; i<s.length(); i++) {
734                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
735             }
736         }
737         break;
738 
739 
740     default:
741         REGEX_DUMP_DEBUG_PRINTF(("??????"));
742         break;
743     }
744     REGEX_DUMP_DEBUG_PRINTF(("\n"));
745 }
746 #endif
747 
748 
749 #if defined(REGEX_DEBUG)
750 U_CAPI void  U_EXPORT2
RegexPatternDump(const RegexPattern * This)751 RegexPatternDump(const RegexPattern *This) {
752     int      index;
753     int      i;
754 
755     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
756     UChar32 c = utext_next32From(This->fPattern, 0);
757     while (c != U_SENTINEL) {
758         if (c<32 || c>256) {
759             c = '.';
760         }
761         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
762 
763         c = UTEXT_NEXT32(This->fPattern);
764     }
765     REGEX_DUMP_DEBUG_PRINTF(("\n"));
766     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
767     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
768     if (This->fStartType == START_STRING) {
769         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
770         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
771             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
772         }
773         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
774 
775     } else if (This->fStartType == START_SET) {
776         int32_t numSetChars = This->fInitialChars->size();
777         if (numSetChars > 20) {
778             numSetChars = 20;
779         }
780         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
781         for (i=0; i<numSetChars; i++) {
782             UChar32 c = This->fInitialChars->charAt(i);
783             if (0x20<c && c <0x7e) {
784                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
785             } else {
786                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
787             }
788         }
789         if (numSetChars < This->fInitialChars->size()) {
790             REGEX_DUMP_DEBUG_PRINTF((" ..."));
791         }
792         REGEX_DUMP_DEBUG_PRINTF(("\n"));
793 
794     } else if (This->fStartType == START_CHAR) {
795         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
796         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
797                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
798             } else {
799                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
800             }
801     }
802 
803     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
804            "-------------------------------------------\n"));
805     for (index = 0; index<This->fCompiledPat->size(); index++) {
806         This->dumpOp(index);
807     }
808     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
809 }
810 #endif
811 
812 
813 
814 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
815 
816 U_NAMESPACE_END
817 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
818