• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  file:  repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2012 International Business Machines Corporation   *
7 *   and others. All rights reserved.                                      *
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 //--------------------------------------------------------------------------
28 //
29 //    RegexPattern    Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33     // Init all of this instances data.
34     init();
35 }
36 
37 
38 //--------------------------------------------------------------------------
39 //
40 //   Copy Constructor        Note:  This is a rather inefficient implementation,
41 //                                  but it probably doesn't matter.
42 //
43 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)44 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
45     init();
46     *this = other;
47 }
48 
49 
50 
51 //--------------------------------------------------------------------------
52 //
53 //    Assignment Operator
54 //
55 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
57     if (this == &other) {
58         // Source and destination are the same.  Don't do anything.
59         return *this;
60     }
61 
62     // Clean out any previous contents of object being assigned to.
63     zap();
64 
65     // Give target object a default initialization
66     init();
67 
68     // Copy simple fields
69     if ( other.fPatternString == NULL ) {
70         fPatternString = NULL;
71         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
72     } else {
73         fPatternString = new UnicodeString(*(other.fPatternString));
74         UErrorCode status = U_ZERO_ERROR;
75         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
76         if (U_FAILURE(status)) {
77             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
78             return *this;
79         }
80     }
81     fFlags            = other.fFlags;
82     fLiteralText      = other.fLiteralText;
83     fDeferredStatus   = other.fDeferredStatus;
84     fMinMatchLen      = other.fMinMatchLen;
85     fFrameSize        = other.fFrameSize;
86     fDataSize         = other.fDataSize;
87     fMaxCaptureDigits = other.fMaxCaptureDigits;
88     fStaticSets       = other.fStaticSets;
89     fStaticSets8      = other.fStaticSets8;
90 
91     fStartType        = other.fStartType;
92     fInitialStringIdx = other.fInitialStringIdx;
93     fInitialStringLen = other.fInitialStringLen;
94     *fInitialChars    = *other.fInitialChars;
95     fInitialChar      = other.fInitialChar;
96     *fInitialChars8   = *other.fInitialChars8;
97     fNeedsAltInput    = other.fNeedsAltInput;
98 
99     //  Copy the pattern.  It's just values, nothing deep to copy.
100     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
101     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
102 
103     //  Copy the Unicode Sets.
104     //    Could be made more efficient if the sets were reference counted and shared,
105     //    but I doubt that pattern copying will be particularly common.
106     //    Note:  init() already added an empty element zero to fSets
107     int32_t i;
108     int32_t  numSets = other.fSets->size();
109     fSets8 = new Regex8BitSet[numSets];
110     if (fSets8 == NULL) {
111     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
112     	return *this;
113     }
114     for (i=1; i<numSets; i++) {
115         if (U_FAILURE(fDeferredStatus)) {
116             return *this;
117         }
118         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
119         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
120         if (newSet == NULL) {
121             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122             break;
123         }
124         fSets->addElement(newSet, fDeferredStatus);
125         fSets8[i] = other.fSets8[i];
126     }
127 
128     return *this;
129 }
130 
131 
132 //--------------------------------------------------------------------------
133 //
134 //    init        Shared initialization for use by constructors.
135 //                Bring an uninitialized RegexPattern up to a default state.
136 //
137 //--------------------------------------------------------------------------
init()138 void RegexPattern::init() {
139     fFlags            = 0;
140     fCompiledPat      = 0;
141     fLiteralText.remove();
142     fSets             = NULL;
143     fSets8            = NULL;
144     fDeferredStatus   = U_ZERO_ERROR;
145     fMinMatchLen      = 0;
146     fFrameSize        = 0;
147     fDataSize         = 0;
148     fGroupMap         = NULL;
149     fMaxCaptureDigits = 1;
150     fStaticSets       = NULL;
151     fStaticSets8      = NULL;
152     fStartType        = START_NO_INFO;
153     fInitialStringIdx = 0;
154     fInitialStringLen = 0;
155     fInitialChars     = NULL;
156     fInitialChar      = 0;
157     fInitialChars8    = NULL;
158     fNeedsAltInput    = FALSE;
159 
160     fPattern          = NULL; // will be set later
161     fPatternString    = NULL; // may be set later
162     fCompiledPat      = new UVector64(fDeferredStatus);
163     fGroupMap         = new UVector32(fDeferredStatus);
164     fSets             = new UVector(fDeferredStatus);
165     fInitialChars     = new UnicodeSet;
166     fInitialChars8    = new Regex8BitSet;
167     if (U_FAILURE(fDeferredStatus)) {
168         return;
169     }
170     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
171         fInitialChars == NULL || fInitialChars8 == NULL) {
172         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
173         return;
174     }
175 
176     // Slot zero of the vector of sets is reserved.  Fill it here.
177     fSets->addElement((int32_t)0, fDeferredStatus);
178 }
179 
180 
181 //--------------------------------------------------------------------------
182 //
183 //   zap            Delete everything owned by this RegexPattern.
184 //
185 //--------------------------------------------------------------------------
zap()186 void RegexPattern::zap() {
187     delete fCompiledPat;
188     fCompiledPat = NULL;
189     int i;
190     for (i=1; i<fSets->size(); i++) {
191         UnicodeSet *s;
192         s = (UnicodeSet *)fSets->elementAt(i);
193         if (s != NULL) {
194             delete s;
195         }
196     }
197     delete fSets;
198     fSets = NULL;
199     delete[] fSets8;
200     fSets8 = NULL;
201     delete fGroupMap;
202     fGroupMap = NULL;
203     delete fInitialChars;
204     fInitialChars = NULL;
205     delete fInitialChars8;
206     fInitialChars8 = NULL;
207     if (fPattern != NULL) {
208         utext_close(fPattern);
209         fPattern = NULL;
210     }
211     if (fPatternString != NULL) {
212         delete fPatternString;
213         fPatternString = NULL;
214     }
215 }
216 
217 
218 //--------------------------------------------------------------------------
219 //
220 //   Destructor
221 //
222 //--------------------------------------------------------------------------
~RegexPattern()223 RegexPattern::~RegexPattern() {
224     zap();
225 }
226 
227 
228 //--------------------------------------------------------------------------
229 //
230 //   Clone
231 //
232 //--------------------------------------------------------------------------
clone() const233 RegexPattern  *RegexPattern::clone() const {
234     RegexPattern  *copy = new RegexPattern(*this);
235     return copy;
236 }
237 
238 
239 //--------------------------------------------------------------------------
240 //
241 //   operator ==   (comparison)    Consider to patterns to be == if the
242 //                                 pattern strings and the flags are the same.
243 //                                 Note that pattern strings with the same
244 //                                 characters can still be considered different.
245 //
246 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const247 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
248     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
249         if (this->fPatternString != NULL && other.fPatternString != NULL) {
250             return *(this->fPatternString) == *(other.fPatternString);
251         } else if (this->fPattern == NULL) {
252             if (other.fPattern == NULL) {
253                 return TRUE;
254             }
255         } else if (other.fPattern != NULL) {
256             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
257             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
258             return utext_equals(this->fPattern, other.fPattern);
259         }
260     }
261     return FALSE;
262 }
263 
264 //---------------------------------------------------------------------
265 //
266 //   compile
267 //
268 //---------------------------------------------------------------------
269 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)270 RegexPattern::compile(const UnicodeString &regex,
271                       uint32_t             flags,
272                       UParseError          &pe,
273                       UErrorCode           &status)
274 {
275     if (U_FAILURE(status)) {
276         return NULL;
277     }
278 
279     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
280     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
281     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
282 
283     if ((flags & ~allFlags) != 0) {
284         status = U_REGEX_INVALID_FLAG;
285         return NULL;
286     }
287 
288     if ((flags & UREGEX_CANON_EQ) != 0) {
289         status = U_REGEX_UNIMPLEMENTED;
290         return NULL;
291     }
292 
293     RegexPattern *This = new RegexPattern;
294     if (This == NULL) {
295         status = U_MEMORY_ALLOCATION_ERROR;
296         return NULL;
297     }
298     if (U_FAILURE(This->fDeferredStatus)) {
299         status = This->fDeferredStatus;
300         delete This;
301         return NULL;
302     }
303     This->fFlags = flags;
304 
305     RegexCompile     compiler(This, status);
306     compiler.compile(regex, pe, status);
307 
308     if (U_FAILURE(status)) {
309         delete This;
310         This = NULL;
311     }
312 
313     return This;
314 }
315 
316 
317 //
318 //   compile, UText mode
319 //
320 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)321 RegexPattern::compile(UText                *regex,
322                       uint32_t             flags,
323                       UParseError          &pe,
324                       UErrorCode           &status)
325 {
326     if (U_FAILURE(status)) {
327         return NULL;
328     }
329 
330     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
332                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333 
334     if ((flags & ~allFlags) != 0) {
335         status = U_REGEX_INVALID_FLAG;
336         return NULL;
337     }
338 
339     if ((flags & UREGEX_CANON_EQ) != 0) {
340         status = U_REGEX_UNIMPLEMENTED;
341         return NULL;
342     }
343 
344     RegexPattern *This = new RegexPattern;
345     if (This == NULL) {
346         status = U_MEMORY_ALLOCATION_ERROR;
347         return NULL;
348     }
349     if (U_FAILURE(This->fDeferredStatus)) {
350         status = This->fDeferredStatus;
351         delete This;
352         return NULL;
353     }
354     This->fFlags = flags;
355 
356     RegexCompile     compiler(This, status);
357     compiler.compile(regex, pe, status);
358 
359     if (U_FAILURE(status)) {
360         delete This;
361         This = NULL;
362     }
363 
364     return This;
365 }
366 
367 //
368 //   compile with default flags.
369 //
370 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)371 RegexPattern::compile(const UnicodeString &regex,
372                       UParseError         &pe,
373                       UErrorCode          &err)
374 {
375     return compile(regex, 0, pe, err);
376 }
377 
378 
379 //
380 //   compile with default flags, UText mode
381 //
382 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)383 RegexPattern::compile(UText               *regex,
384                       UParseError         &pe,
385                       UErrorCode          &err)
386 {
387     return compile(regex, 0, pe, err);
388 }
389 
390 
391 //
392 //   compile with no UParseErr parameter.
393 //
394 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)395 RegexPattern::compile(const UnicodeString &regex,
396                       uint32_t             flags,
397                       UErrorCode          &err)
398 {
399     UParseError pe;
400     return compile(regex, flags, pe, err);
401 }
402 
403 
404 //
405 //   compile with no UParseErr parameter, UText mode
406 //
407 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)408 RegexPattern::compile(UText                *regex,
409                       uint32_t             flags,
410                       UErrorCode           &err)
411 {
412     UParseError pe;
413     return compile(regex, flags, pe, err);
414 }
415 
416 
417 //---------------------------------------------------------------------
418 //
419 //   flags
420 //
421 //---------------------------------------------------------------------
flags() const422 uint32_t RegexPattern::flags() const {
423     return fFlags;
424 }
425 
426 
427 //---------------------------------------------------------------------
428 //
429 //   matcher(UnicodeString, err)
430 //
431 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
433                                     UErrorCode          &status)  const {
434     RegexMatcher    *retMatcher = matcher(status);
435     if (retMatcher != NULL) {
436         retMatcher->fDeferredStatus = status;
437         retMatcher->reset(input);
438     }
439     return retMatcher;
440 }
441 
442 
443 //---------------------------------------------------------------------
444 //
445 //   matcher(status)
446 //
447 //---------------------------------------------------------------------
matcher(UErrorCode & status) const448 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
449     RegexMatcher    *retMatcher = NULL;
450 
451     if (U_FAILURE(status)) {
452         return NULL;
453     }
454     if (U_FAILURE(fDeferredStatus)) {
455         status = fDeferredStatus;
456         return NULL;
457     }
458 
459     retMatcher = new RegexMatcher(this);
460     if (retMatcher == NULL) {
461         status = U_MEMORY_ALLOCATION_ERROR;
462         return NULL;
463     }
464     return retMatcher;
465 }
466 
467 
468 
469 //---------------------------------------------------------------------
470 //
471 //   matches        Convenience function to test for a match, starting
472 //                  with a pattern string and a data string.
473 //
474 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
476               const UnicodeString   &input,
477                     UParseError     &pe,
478                     UErrorCode      &status) {
479 
480     if (U_FAILURE(status)) {return FALSE;}
481 
482     UBool         retVal;
483     RegexPattern *pat     = NULL;
484     RegexMatcher *matcher = NULL;
485 
486     pat     = RegexPattern::compile(regex, 0, pe, status);
487     matcher = pat->matcher(input, status);
488     retVal  = matcher->matches(status);
489 
490     delete matcher;
491     delete pat;
492     return retVal;
493 }
494 
495 
496 //
497 //   matches, UText mode
498 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)499 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
500                     UText           *input,
501                     UParseError     &pe,
502                     UErrorCode      &status) {
503 
504     if (U_FAILURE(status)) {return FALSE;}
505 
506     UBool         retVal  = FALSE;
507     RegexPattern *pat     = NULL;
508     RegexMatcher *matcher = NULL;
509 
510     pat     = RegexPattern::compile(regex, 0, pe, status);
511     matcher = pat->matcher(status);
512     if (U_SUCCESS(status)) {
513         matcher->reset(input);
514         retVal  = matcher->matches(status);
515     }
516 
517     delete matcher;
518     delete pat;
519     return retVal;
520 }
521 
522 
523 
524 
525 
526 //---------------------------------------------------------------------
527 //
528 //   pattern
529 //
530 //---------------------------------------------------------------------
pattern() const531 UnicodeString RegexPattern::pattern() const {
532     if (fPatternString != NULL) {
533         return *fPatternString;
534     } else if (fPattern == NULL) {
535         return UnicodeString();
536     } else {
537         UErrorCode status = U_ZERO_ERROR;
538         int64_t nativeLen = utext_nativeLength(fPattern);
539         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
540         UnicodeString result;
541 
542         status = U_ZERO_ERROR;
543         UChar *resultChars = result.getBuffer(len16);
544         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
545         result.releaseBuffer(len16);
546 
547         return result;
548     }
549 }
550 
551 
552 
553 
554 //---------------------------------------------------------------------
555 //
556 //   patternText
557 //
558 //---------------------------------------------------------------------
patternText(UErrorCode & status) const559 UText *RegexPattern::patternText(UErrorCode      &status) const {
560     if (U_FAILURE(status)) {return NULL;}
561     status = U_ZERO_ERROR;
562 
563     if (fPattern != NULL) {
564         return fPattern;
565     } else {
566         RegexStaticSets::initGlobals(&status);
567         return RegexStaticSets::gStaticSets->fEmptyText;
568     }
569 }
570 
571 
572 
573 //---------------------------------------------------------------------
574 //
575 //   split
576 //
577 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const578 int32_t  RegexPattern::split(const UnicodeString &input,
579         UnicodeString    dest[],
580         int32_t          destCapacity,
581         UErrorCode      &status) const
582 {
583     if (U_FAILURE(status)) {
584         return 0;
585     };
586 
587     RegexMatcher  m(this);
588     int32_t r = 0;
589     // Check m's status to make sure all is ok.
590     if (U_SUCCESS(m.fDeferredStatus)) {
591     	r = m.split(input, dest, destCapacity, status);
592     }
593     return r;
594 }
595 
596 //
597 //   split, UText mode
598 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const599 int32_t  RegexPattern::split(UText *input,
600         UText           *dest[],
601         int32_t          destCapacity,
602         UErrorCode      &status) const
603 {
604     if (U_FAILURE(status)) {
605         return 0;
606     };
607 
608     RegexMatcher  m(this);
609     int32_t r = 0;
610     // Check m's status to make sure all is ok.
611     if (U_SUCCESS(m.fDeferredStatus)) {
612     	r = m.split(input, dest, destCapacity, status);
613     }
614     return r;
615 }
616 
617 
618 
619 //---------------------------------------------------------------------
620 //
621 //   dump    Output the compiled form of the pattern.
622 //           Debugging function only.
623 //
624 //---------------------------------------------------------------------
625 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const626 void   RegexPattern::dumpOp(int32_t index) const {
627     static const char * const opNames[] = {URX_OPCODE_NAMES};
628     int32_t op          = fCompiledPat->elementAti(index);
629     int32_t val         = URX_VAL(op);
630     int32_t type        = URX_TYPE(op);
631     int32_t pinnedType  = type;
632     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
633         pinnedType = 0;
634     }
635 
636     REGEX_DUMP_DEBUG_PRINTF(("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]));
637     switch (type) {
638     case URX_NOP:
639     case URX_DOTANY:
640     case URX_DOTANY_ALL:
641     case URX_FAIL:
642     case URX_CARET:
643     case URX_DOLLAR:
644     case URX_BACKSLASH_G:
645     case URX_BACKSLASH_X:
646     case URX_END:
647     case URX_DOLLAR_M:
648     case URX_CARET_M:
649         // Types with no operand field of interest.
650         break;
651 
652     case URX_RESERVED_OP:
653     case URX_START_CAPTURE:
654     case URX_END_CAPTURE:
655     case URX_STATE_SAVE:
656     case URX_JMP:
657     case URX_JMP_SAV:
658     case URX_JMP_SAV_X:
659     case URX_BACKSLASH_B:
660     case URX_BACKSLASH_BU:
661     case URX_BACKSLASH_D:
662     case URX_BACKSLASH_Z:
663     case URX_STRING_LEN:
664     case URX_CTR_INIT:
665     case URX_CTR_INIT_NG:
666     case URX_CTR_LOOP:
667     case URX_CTR_LOOP_NG:
668     case URX_RELOC_OPRND:
669     case URX_STO_SP:
670     case URX_LD_SP:
671     case URX_BACKREF:
672     case URX_STO_INP_LOC:
673     case URX_JMPX:
674     case URX_LA_START:
675     case URX_LA_END:
676     case URX_BACKREF_I:
677     case URX_LB_START:
678     case URX_LB_CONT:
679     case URX_LB_END:
680     case URX_LBN_CONT:
681     case URX_LBN_END:
682     case URX_LOOP_C:
683     case URX_LOOP_DOT_I:
684         // types with an integer operand field.
685         REGEX_DUMP_DEBUG_PRINTF(("%d", val));
686         break;
687 
688     case URX_ONECHAR:
689     case URX_ONECHAR_I:
690         REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
691         break;
692 
693     case URX_STRING:
694     case URX_STRING_I:
695         {
696             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
697             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
698             int32_t length = URX_VAL(lengthOp);
699             int32_t i;
700             for (i=val; i<val+length; i++) {
701                 UChar c = fLiteralText[i];
702                 if (c < 32 || c >= 256) {c = '.';}
703                 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
704             }
705         }
706         break;
707 
708     case URX_SETREF:
709     case URX_LOOP_SR_I:
710         {
711             UnicodeString s;
712             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
713             set->toPattern(s, TRUE);
714             for (int32_t i=0; i<s.length(); i++) {
715                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
716             }
717         }
718         break;
719 
720     case URX_STATIC_SETREF:
721     case URX_STAT_SETREF_N:
722         {
723             UnicodeString s;
724             if (val & URX_NEG_SET) {
725                 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
726                 val &= ~URX_NEG_SET;
727             }
728             UnicodeSet *set = fStaticSets[val];
729             set->toPattern(s, TRUE);
730             for (int32_t i=0; i<s.length(); i++) {
731                 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
732             }
733         }
734         break;
735 
736 
737     default:
738         REGEX_DUMP_DEBUG_PRINTF(("??????"));
739         break;
740     }
741     REGEX_DUMP_DEBUG_PRINTF(("\n"));
742 }
743 #endif
744 
745 
746 #if defined(REGEX_DEBUG)
747 U_CAPI void  U_EXPORT2
RegexPatternDump(const RegexPattern * This)748 RegexPatternDump(const RegexPattern *This) {
749     int      index;
750     int      i;
751 
752     REGEX_DUMP_DEBUG_PRINTF(("Original Pattern:  "));
753     UChar32 c = utext_next32From(This->fPattern, 0);
754     while (c != U_SENTINEL) {
755         if (c<32 || c>256) {
756             c = '.';
757         }
758         REGEX_DUMP_DEBUG_PRINTF(("%c", c));
759 
760         c = UTEXT_NEXT32(This->fPattern);
761     }
762     REGEX_DUMP_DEBUG_PRINTF(("\n"));
763     REGEX_DUMP_DEBUG_PRINTF(("   Min Match Length:  %d\n", This->fMinMatchLen));
764     REGEX_DUMP_DEBUG_PRINTF(("   Match Start Type:  %s\n", START_OF_MATCH_STR(This->fStartType)));
765     if (This->fStartType == START_STRING) {
766         REGEX_DUMP_DEBUG_PRINTF(("    Initial match string: \""));
767         for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
768             REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i]));   // TODO:  non-printables, surrogates.
769         }
770         REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
771 
772     } else if (This->fStartType == START_SET) {
773         int32_t numSetChars = This->fInitialChars->size();
774         if (numSetChars > 20) {
775             numSetChars = 20;
776         }
777         REGEX_DUMP_DEBUG_PRINTF(("     Match First Chars : "));
778         for (i=0; i<numSetChars; i++) {
779             UChar32 c = This->fInitialChars->charAt(i);
780             if (0x20<c && c <0x7e) {
781                 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
782             } else {
783                 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
784             }
785         }
786         if (numSetChars < This->fInitialChars->size()) {
787             REGEX_DUMP_DEBUG_PRINTF((" ..."));
788         }
789         REGEX_DUMP_DEBUG_PRINTF(("\n"));
790 
791     } else if (This->fStartType == START_CHAR) {
792         REGEX_DUMP_DEBUG_PRINTF(("    First char of Match : "));
793         if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
794                 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
795             } else {
796                 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
797             }
798     }
799 
800     REGEX_DUMP_DEBUG_PRINTF(("\nIndex   Binary     Type             Operand\n" \
801            "-------------------------------------------\n"));
802     for (index = 0; index<This->fCompiledPat->size(); index++) {
803         This->dumpOp(index);
804     }
805     REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
806 }
807 #endif
808 
809 
810 
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
812 
813 U_NAMESPACE_END
814 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
815