• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  file:  repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2013 International Business Machines Corporation   *
7 *   and others. All rights reserved.                                      *
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24 
25 U_NAMESPACE_BEGIN
26 
27 //--------------------------------------------------------------------------
28 //
29 //    RegexPattern    Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33     // Init all of this instances data.
34     init();
35 }
36 
37 
38 //--------------------------------------------------------------------------
39 //
40 //   Copy Constructor        Note:  This is a rather inefficient implementation,
41 //                                  but it probably doesn't matter.
42 //
43 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)44 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
45     init();
46     *this = other;
47 }
48 
49 
50 
51 //--------------------------------------------------------------------------
52 //
53 //    Assignment Operator
54 //
55 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
57     if (this == &other) {
58         // Source and destination are the same.  Don't do anything.
59         return *this;
60     }
61 
62     // Clean out any previous contents of object being assigned to.
63     zap();
64 
65     // Give target object a default initialization
66     init();
67 
68     // Copy simple fields
69     if ( other.fPatternString == NULL ) {
70         fPatternString = NULL;
71         fPattern      = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
72     } else {
73         fPatternString = new UnicodeString(*(other.fPatternString));
74         UErrorCode status = U_ZERO_ERROR;
75         fPattern      = utext_openConstUnicodeString(NULL, fPatternString, &status);
76         if (U_FAILURE(status)) {
77             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
78             return *this;
79         }
80     }
81     fFlags            = other.fFlags;
82     fLiteralText      = other.fLiteralText;
83     fDeferredStatus   = other.fDeferredStatus;
84     fMinMatchLen      = other.fMinMatchLen;
85     fFrameSize        = other.fFrameSize;
86     fDataSize         = other.fDataSize;
87     fMaxCaptureDigits = other.fMaxCaptureDigits;
88     fStaticSets       = other.fStaticSets;
89     fStaticSets8      = other.fStaticSets8;
90 
91     fStartType        = other.fStartType;
92     fInitialStringIdx = other.fInitialStringIdx;
93     fInitialStringLen = other.fInitialStringLen;
94     *fInitialChars    = *other.fInitialChars;
95     fInitialChar      = other.fInitialChar;
96     *fInitialChars8   = *other.fInitialChars8;
97     fNeedsAltInput    = other.fNeedsAltInput;
98 
99     //  Copy the pattern.  It's just values, nothing deep to copy.
100     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
101     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
102 
103     //  Copy the Unicode Sets.
104     //    Could be made more efficient if the sets were reference counted and shared,
105     //    but I doubt that pattern copying will be particularly common.
106     //    Note:  init() already added an empty element zero to fSets
107     int32_t i;
108     int32_t  numSets = other.fSets->size();
109     fSets8 = new Regex8BitSet[numSets];
110     if (fSets8 == NULL) {
111     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
112     	return *this;
113     }
114     for (i=1; i<numSets; i++) {
115         if (U_FAILURE(fDeferredStatus)) {
116             return *this;
117         }
118         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
119         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
120         if (newSet == NULL) {
121             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122             break;
123         }
124         fSets->addElement(newSet, fDeferredStatus);
125         fSets8[i] = other.fSets8[i];
126     }
127 
128     return *this;
129 }
130 
131 
132 //--------------------------------------------------------------------------
133 //
134 //    init        Shared initialization for use by constructors.
135 //                Bring an uninitialized RegexPattern up to a default state.
136 //
137 //--------------------------------------------------------------------------
init()138 void RegexPattern::init() {
139     fFlags            = 0;
140     fCompiledPat      = 0;
141     fLiteralText.remove();
142     fSets             = NULL;
143     fSets8            = NULL;
144     fDeferredStatus   = U_ZERO_ERROR;
145     fMinMatchLen      = 0;
146     fFrameSize        = 0;
147     fDataSize         = 0;
148     fGroupMap         = NULL;
149     fMaxCaptureDigits = 1;
150     fStaticSets       = NULL;
151     fStaticSets8      = NULL;
152     fStartType        = START_NO_INFO;
153     fInitialStringIdx = 0;
154     fInitialStringLen = 0;
155     fInitialChars     = NULL;
156     fInitialChar      = 0;
157     fInitialChars8    = NULL;
158     fNeedsAltInput    = FALSE;
159 
160     fPattern          = NULL; // will be set later
161     fPatternString    = NULL; // may be set later
162     fCompiledPat      = new UVector64(fDeferredStatus);
163     fGroupMap         = new UVector32(fDeferredStatus);
164     fSets             = new UVector(fDeferredStatus);
165     fInitialChars     = new UnicodeSet;
166     fInitialChars8    = new Regex8BitSet;
167     if (U_FAILURE(fDeferredStatus)) {
168         return;
169     }
170     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
171         fInitialChars == NULL || fInitialChars8 == NULL) {
172         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
173         return;
174     }
175 
176     // Slot zero of the vector of sets is reserved.  Fill it here.
177     fSets->addElement((int32_t)0, fDeferredStatus);
178 }
179 
180 
181 //--------------------------------------------------------------------------
182 //
183 //   zap            Delete everything owned by this RegexPattern.
184 //
185 //--------------------------------------------------------------------------
zap()186 void RegexPattern::zap() {
187     delete fCompiledPat;
188     fCompiledPat = NULL;
189     int i;
190     for (i=1; i<fSets->size(); i++) {
191         UnicodeSet *s;
192         s = (UnicodeSet *)fSets->elementAt(i);
193         if (s != NULL) {
194             delete s;
195         }
196     }
197     delete fSets;
198     fSets = NULL;
199     delete[] fSets8;
200     fSets8 = NULL;
201     delete fGroupMap;
202     fGroupMap = NULL;
203     delete fInitialChars;
204     fInitialChars = NULL;
205     delete fInitialChars8;
206     fInitialChars8 = NULL;
207     if (fPattern != NULL) {
208         utext_close(fPattern);
209         fPattern = NULL;
210     }
211     if (fPatternString != NULL) {
212         delete fPatternString;
213         fPatternString = NULL;
214     }
215 }
216 
217 
218 //--------------------------------------------------------------------------
219 //
220 //   Destructor
221 //
222 //--------------------------------------------------------------------------
~RegexPattern()223 RegexPattern::~RegexPattern() {
224     zap();
225 }
226 
227 
228 //--------------------------------------------------------------------------
229 //
230 //   Clone
231 //
232 //--------------------------------------------------------------------------
clone() const233 RegexPattern  *RegexPattern::clone() const {
234     RegexPattern  *copy = new RegexPattern(*this);
235     return copy;
236 }
237 
238 
239 //--------------------------------------------------------------------------
240 //
241 //   operator ==   (comparison)    Consider to patterns to be == if the
242 //                                 pattern strings and the flags are the same.
243 //                                 Note that pattern strings with the same
244 //                                 characters can still be considered different.
245 //
246 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const247 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
248     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
249         if (this->fPatternString != NULL && other.fPatternString != NULL) {
250             return *(this->fPatternString) == *(other.fPatternString);
251         } else if (this->fPattern == NULL) {
252             if (other.fPattern == NULL) {
253                 return TRUE;
254             }
255         } else if (other.fPattern != NULL) {
256             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
257             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
258             return utext_equals(this->fPattern, other.fPattern);
259         }
260     }
261     return FALSE;
262 }
263 
264 //---------------------------------------------------------------------
265 //
266 //   compile
267 //
268 //---------------------------------------------------------------------
269 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)270 RegexPattern::compile(const UnicodeString &regex,
271                       uint32_t             flags,
272                       UParseError          &pe,
273                       UErrorCode           &status)
274 {
275     if (U_FAILURE(status)) {
276         return NULL;
277     }
278 
279     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
280     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
281     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
282 
283     if ((flags & ~allFlags) != 0) {
284         status = U_REGEX_INVALID_FLAG;
285         return NULL;
286     }
287 
288     if ((flags & UREGEX_CANON_EQ) != 0) {
289         status = U_REGEX_UNIMPLEMENTED;
290         return NULL;
291     }
292 
293     RegexPattern *This = new RegexPattern;
294     if (This == NULL) {
295         status = U_MEMORY_ALLOCATION_ERROR;
296         return NULL;
297     }
298     if (U_FAILURE(This->fDeferredStatus)) {
299         status = This->fDeferredStatus;
300         delete This;
301         return NULL;
302     }
303     This->fFlags = flags;
304 
305     RegexCompile     compiler(This, status);
306     compiler.compile(regex, pe, status);
307 
308     if (U_FAILURE(status)) {
309         delete This;
310         This = NULL;
311     }
312 
313     return This;
314 }
315 
316 
317 //
318 //   compile, UText mode
319 //
320 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)321 RegexPattern::compile(UText                *regex,
322                       uint32_t             flags,
323                       UParseError          &pe,
324                       UErrorCode           &status)
325 {
326     if (U_FAILURE(status)) {
327         return NULL;
328     }
329 
330     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
332                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333 
334     if ((flags & ~allFlags) != 0) {
335         status = U_REGEX_INVALID_FLAG;
336         return NULL;
337     }
338 
339     if ((flags & UREGEX_CANON_EQ) != 0) {
340         status = U_REGEX_UNIMPLEMENTED;
341         return NULL;
342     }
343 
344     RegexPattern *This = new RegexPattern;
345     if (This == NULL) {
346         status = U_MEMORY_ALLOCATION_ERROR;
347         return NULL;
348     }
349     if (U_FAILURE(This->fDeferredStatus)) {
350         status = This->fDeferredStatus;
351         delete This;
352         return NULL;
353     }
354     This->fFlags = flags;
355 
356     RegexCompile     compiler(This, status);
357     compiler.compile(regex, pe, status);
358 
359     if (U_FAILURE(status)) {
360         delete This;
361         This = NULL;
362     }
363 
364     return This;
365 }
366 
367 //
368 //   compile with default flags.
369 //
370 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)371 RegexPattern::compile(const UnicodeString &regex,
372                       UParseError         &pe,
373                       UErrorCode          &err)
374 {
375     return compile(regex, 0, pe, err);
376 }
377 
378 
379 //
380 //   compile with default flags, UText mode
381 //
382 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)383 RegexPattern::compile(UText               *regex,
384                       UParseError         &pe,
385                       UErrorCode          &err)
386 {
387     return compile(regex, 0, pe, err);
388 }
389 
390 
391 //
392 //   compile with no UParseErr parameter.
393 //
394 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)395 RegexPattern::compile(const UnicodeString &regex,
396                       uint32_t             flags,
397                       UErrorCode          &err)
398 {
399     UParseError pe;
400     return compile(regex, flags, pe, err);
401 }
402 
403 
404 //
405 //   compile with no UParseErr parameter, UText mode
406 //
407 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)408 RegexPattern::compile(UText                *regex,
409                       uint32_t             flags,
410                       UErrorCode           &err)
411 {
412     UParseError pe;
413     return compile(regex, flags, pe, err);
414 }
415 
416 
417 //---------------------------------------------------------------------
418 //
419 //   flags
420 //
421 //---------------------------------------------------------------------
flags() const422 uint32_t RegexPattern::flags() const {
423     return fFlags;
424 }
425 
426 
427 //---------------------------------------------------------------------
428 //
429 //   matcher(UnicodeString, err)
430 //
431 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
433                                     UErrorCode          &status)  const {
434     RegexMatcher    *retMatcher = matcher(status);
435     if (retMatcher != NULL) {
436         retMatcher->fDeferredStatus = status;
437         retMatcher->reset(input);
438     }
439     return retMatcher;
440 }
441 
442 
443 //---------------------------------------------------------------------
444 //
445 //   matcher(status)
446 //
447 //---------------------------------------------------------------------
matcher(UErrorCode & status) const448 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
449     RegexMatcher    *retMatcher = NULL;
450 
451     if (U_FAILURE(status)) {
452         return NULL;
453     }
454     if (U_FAILURE(fDeferredStatus)) {
455         status = fDeferredStatus;
456         return NULL;
457     }
458 
459     retMatcher = new RegexMatcher(this);
460     if (retMatcher == NULL) {
461         status = U_MEMORY_ALLOCATION_ERROR;
462         return NULL;
463     }
464     return retMatcher;
465 }
466 
467 
468 
469 //---------------------------------------------------------------------
470 //
471 //   matches        Convenience function to test for a match, starting
472 //                  with a pattern string and a data string.
473 //
474 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
476               const UnicodeString   &input,
477                     UParseError     &pe,
478                     UErrorCode      &status) {
479 
480     if (U_FAILURE(status)) {return FALSE;}
481 
482     UBool         retVal;
483     RegexPattern *pat     = NULL;
484     RegexMatcher *matcher = NULL;
485 
486     pat     = RegexPattern::compile(regex, 0, pe, status);
487     matcher = pat->matcher(input, status);
488     retVal  = matcher->matches(status);
489 
490     delete matcher;
491     delete pat;
492     return retVal;
493 }
494 
495 
496 //
497 //   matches, UText mode
498 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)499 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
500                     UText           *input,
501                     UParseError     &pe,
502                     UErrorCode      &status) {
503 
504     if (U_FAILURE(status)) {return FALSE;}
505 
506     UBool         retVal  = FALSE;
507     RegexPattern *pat     = NULL;
508     RegexMatcher *matcher = NULL;
509 
510     pat     = RegexPattern::compile(regex, 0, pe, status);
511     matcher = pat->matcher(status);
512     if (U_SUCCESS(status)) {
513         matcher->reset(input);
514         retVal  = matcher->matches(status);
515     }
516 
517     delete matcher;
518     delete pat;
519     return retVal;
520 }
521 
522 
523 
524 
525 
526 //---------------------------------------------------------------------
527 //
528 //   pattern
529 //
530 //---------------------------------------------------------------------
pattern() const531 UnicodeString RegexPattern::pattern() const {
532     if (fPatternString != NULL) {
533         return *fPatternString;
534     } else if (fPattern == NULL) {
535         return UnicodeString();
536     } else {
537         UErrorCode status = U_ZERO_ERROR;
538         int64_t nativeLen = utext_nativeLength(fPattern);
539         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
540         UnicodeString result;
541 
542         status = U_ZERO_ERROR;
543         UChar *resultChars = result.getBuffer(len16);
544         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
545         result.releaseBuffer(len16);
546 
547         return result;
548     }
549 }
550 
551 
552 
553 
554 //---------------------------------------------------------------------
555 //
556 //   patternText
557 //
558 //---------------------------------------------------------------------
patternText(UErrorCode & status) const559 UText *RegexPattern::patternText(UErrorCode      &status) const {
560     if (U_FAILURE(status)) {return NULL;}
561     status = U_ZERO_ERROR;
562 
563     if (fPattern != NULL) {
564         return fPattern;
565     } else {
566         RegexStaticSets::initGlobals(&status);
567         return RegexStaticSets::gStaticSets->fEmptyText;
568     }
569 }
570 
571 
572 
573 //---------------------------------------------------------------------
574 //
575 //   split
576 //
577 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const578 int32_t  RegexPattern::split(const UnicodeString &input,
579         UnicodeString    dest[],
580         int32_t          destCapacity,
581         UErrorCode      &status) const
582 {
583     if (U_FAILURE(status)) {
584         return 0;
585     };
586 
587     RegexMatcher  m(this);
588     int32_t r = 0;
589     // Check m's status to make sure all is ok.
590     if (U_SUCCESS(m.fDeferredStatus)) {
591     	r = m.split(input, dest, destCapacity, status);
592     }
593     return r;
594 }
595 
596 //
597 //   split, UText mode
598 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const599 int32_t  RegexPattern::split(UText *input,
600         UText           *dest[],
601         int32_t          destCapacity,
602         UErrorCode      &status) const
603 {
604     if (U_FAILURE(status)) {
605         return 0;
606     };
607 
608     RegexMatcher  m(this);
609     int32_t r = 0;
610     // Check m's status to make sure all is ok.
611     if (U_SUCCESS(m.fDeferredStatus)) {
612     	r = m.split(input, dest, destCapacity, status);
613     }
614     return r;
615 }
616 
617 
618 
619 //---------------------------------------------------------------------
620 //
621 //   dump    Output the compiled form of the pattern.
622 //           Debugging function only.
623 //
624 //---------------------------------------------------------------------
dumpOp(int32_t index) const625 void   RegexPattern::dumpOp(int32_t index) const {
626     (void)index;  // Suppress warnings in non-debug build.
627 #if defined(REGEX_DEBUG)
628     static const char * const opNames[] = {URX_OPCODE_NAMES};
629     int32_t op          = fCompiledPat->elementAti(index);
630     int32_t val         = URX_VAL(op);
631     int32_t type        = URX_TYPE(op);
632     int32_t pinnedType  = type;
633     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
634         pinnedType = 0;
635     }
636 
637     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
638     switch (type) {
639     case URX_NOP:
640     case URX_DOTANY:
641     case URX_DOTANY_ALL:
642     case URX_FAIL:
643     case URX_CARET:
644     case URX_DOLLAR:
645     case URX_BACKSLASH_G:
646     case URX_BACKSLASH_X:
647     case URX_END:
648     case URX_DOLLAR_M:
649     case URX_CARET_M:
650         // Types with no operand field of interest.
651         break;
652 
653     case URX_RESERVED_OP:
654     case URX_START_CAPTURE:
655     case URX_END_CAPTURE:
656     case URX_STATE_SAVE:
657     case URX_JMP:
658     case URX_JMP_SAV:
659     case URX_JMP_SAV_X:
660     case URX_BACKSLASH_B:
661     case URX_BACKSLASH_BU:
662     case URX_BACKSLASH_D:
663     case URX_BACKSLASH_Z:
664     case URX_STRING_LEN:
665     case URX_CTR_INIT:
666     case URX_CTR_INIT_NG:
667     case URX_CTR_LOOP:
668     case URX_CTR_LOOP_NG:
669     case URX_RELOC_OPRND:
670     case URX_STO_SP:
671     case URX_LD_SP:
672     case URX_BACKREF:
673     case URX_STO_INP_LOC:
674     case URX_JMPX:
675     case URX_LA_START:
676     case URX_LA_END:
677     case URX_BACKREF_I:
678     case URX_LB_START:
679     case URX_LB_CONT:
680     case URX_LB_END:
681     case URX_LBN_CONT:
682     case URX_LBN_END:
683     case URX_LOOP_C:
684     case URX_LOOP_DOT_I:
685         // types with an integer operand field.
686         printf("%d", val);
687         break;
688 
689     case URX_ONECHAR:
690     case URX_ONECHAR_I:
691         printf("%c", val<256?val:'?');
692         break;
693 
694     case URX_STRING:
695     case URX_STRING_I:
696         {
697             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
698             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
699             int32_t length = URX_VAL(lengthOp);
700             int32_t i;
701             for (i=val; i<val+length; i++) {
702                 UChar c = fLiteralText[i];
703                 if (c < 32 || c >= 256) {c = '.';}
704                 printf("%c", c);
705             }
706         }
707         break;
708 
709     case URX_SETREF:
710     case URX_LOOP_SR_I:
711         {
712             UnicodeString s;
713             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
714             set->toPattern(s, TRUE);
715             for (int32_t i=0; i<s.length(); i++) {
716                 printf("%c", s.charAt(i));
717             }
718         }
719         break;
720 
721     case URX_STATIC_SETREF:
722     case URX_STAT_SETREF_N:
723         {
724             UnicodeString s;
725             if (val & URX_NEG_SET) {
726                 printf("NOT ");
727                 val &= ~URX_NEG_SET;
728             }
729             UnicodeSet *set = fStaticSets[val];
730             set->toPattern(s, TRUE);
731             for (int32_t i=0; i<s.length(); i++) {
732                 printf("%c", s.charAt(i));
733             }
734         }
735         break;
736 
737 
738     default:
739         printf("??????");
740         break;
741     }
742     printf("\n");
743 #endif
744 }
745 
746 
dumpPattern() const747 void RegexPattern::dumpPattern() const {
748 #if defined(REGEX_DEBUG)
749     int      index;
750     int      i;
751 
752     printf("Original Pattern:  ");
753     UChar32 c = utext_next32From(fPattern, 0);
754     while (c != U_SENTINEL) {
755         if (c<32 || c>256) {
756             c = '.';
757         }
758         printf("%c", c);
759 
760         c = UTEXT_NEXT32(fPattern);
761     }
762     printf("\n");
763     printf("   Min Match Length:  %d\n", fMinMatchLen);
764     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
765     if (fStartType == START_STRING) {
766         printf("    Initial match string: \"");
767         for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
768             printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
769         }
770         printf("\"\n");
771 
772     } else if (fStartType == START_SET) {
773         int32_t numSetChars = fInitialChars->size();
774         if (numSetChars > 20) {
775             numSetChars = 20;
776         }
777         printf("     Match First Chars : ");
778         for (i=0; i<numSetChars; i++) {
779             UChar32 c = fInitialChars->charAt(i);
780             if (0x20<c && c <0x7e) {
781                 printf("%c ", c);
782             } else {
783                 printf("%#x ", c);
784             }
785         }
786         if (numSetChars < fInitialChars->size()) {
787             printf(" ...");
788         }
789         printf("\n");
790 
791     } else if (fStartType == START_CHAR) {
792         printf("    First char of Match : ");
793         if (0x20 < fInitialChar && fInitialChar<0x7e) {
794                 printf("%c\n", fInitialChar);
795             } else {
796                 printf("%#x\n", fInitialChar);
797             }
798     }
799 
800     printf("\nIndex   Binary     Type             Operand\n" \
801            "-------------------------------------------\n");
802     for (index = 0; index<fCompiledPat->size(); index++) {
803         dumpOp(index);
804     }
805     printf("\n\n");
806 #endif
807 }
808 
809 
810 
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
812 
813 U_NAMESPACE_END
814 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
815