1 //
2 // file: repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 * Copyright (C) 2002-2012 International Business Machines Corporation *
7 * and others. All rights reserved. *
8 ***************************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24
25 U_NAMESPACE_BEGIN
26
27 //--------------------------------------------------------------------------
28 //
29 // RegexPattern Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33 // Init all of this instances data.
34 init();
35 }
36
37
38 //--------------------------------------------------------------------------
39 //
40 // Copy Constructor Note: This is a rather inefficient implementation,
41 // but it probably doesn't matter.
42 //
43 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)44 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
45 init();
46 *this = other;
47 }
48
49
50
51 //--------------------------------------------------------------------------
52 //
53 // Assignment Operator
54 //
55 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)56 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
57 if (this == &other) {
58 // Source and destination are the same. Don't do anything.
59 return *this;
60 }
61
62 // Clean out any previous contents of object being assigned to.
63 zap();
64
65 // Give target object a default initialization
66 init();
67
68 // Copy simple fields
69 if ( other.fPatternString == NULL ) {
70 fPatternString = NULL;
71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
72 } else {
73 fPatternString = new UnicodeString(*(other.fPatternString));
74 UErrorCode status = U_ZERO_ERROR;
75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status);
76 if (U_FAILURE(status)) {
77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
78 return *this;
79 }
80 }
81 fFlags = other.fFlags;
82 fLiteralText = other.fLiteralText;
83 fDeferredStatus = other.fDeferredStatus;
84 fMinMatchLen = other.fMinMatchLen;
85 fFrameSize = other.fFrameSize;
86 fDataSize = other.fDataSize;
87 fMaxCaptureDigits = other.fMaxCaptureDigits;
88 fStaticSets = other.fStaticSets;
89 fStaticSets8 = other.fStaticSets8;
90
91 fStartType = other.fStartType;
92 fInitialStringIdx = other.fInitialStringIdx;
93 fInitialStringLen = other.fInitialStringLen;
94 *fInitialChars = *other.fInitialChars;
95 fInitialChar = other.fInitialChar;
96 *fInitialChars8 = *other.fInitialChars8;
97 fNeedsAltInput = other.fNeedsAltInput;
98
99 // Copy the pattern. It's just values, nothing deep to copy.
100 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
101 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
102
103 // Copy the Unicode Sets.
104 // Could be made more efficient if the sets were reference counted and shared,
105 // but I doubt that pattern copying will be particularly common.
106 // Note: init() already added an empty element zero to fSets
107 int32_t i;
108 int32_t numSets = other.fSets->size();
109 fSets8 = new Regex8BitSet[numSets];
110 if (fSets8 == NULL) {
111 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
112 return *this;
113 }
114 for (i=1; i<numSets; i++) {
115 if (U_FAILURE(fDeferredStatus)) {
116 return *this;
117 }
118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
119 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
120 if (newSet == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 break;
123 }
124 fSets->addElement(newSet, fDeferredStatus);
125 fSets8[i] = other.fSets8[i];
126 }
127
128 return *this;
129 }
130
131
132 //--------------------------------------------------------------------------
133 //
134 // init Shared initialization for use by constructors.
135 // Bring an uninitialized RegexPattern up to a default state.
136 //
137 //--------------------------------------------------------------------------
init()138 void RegexPattern::init() {
139 fFlags = 0;
140 fCompiledPat = 0;
141 fLiteralText.remove();
142 fSets = NULL;
143 fSets8 = NULL;
144 fDeferredStatus = U_ZERO_ERROR;
145 fMinMatchLen = 0;
146 fFrameSize = 0;
147 fDataSize = 0;
148 fGroupMap = NULL;
149 fMaxCaptureDigits = 1;
150 fStaticSets = NULL;
151 fStaticSets8 = NULL;
152 fStartType = START_NO_INFO;
153 fInitialStringIdx = 0;
154 fInitialStringLen = 0;
155 fInitialChars = NULL;
156 fInitialChar = 0;
157 fInitialChars8 = NULL;
158 fNeedsAltInput = FALSE;
159
160 fPattern = NULL; // will be set later
161 fPatternString = NULL; // may be set later
162 fCompiledPat = new UVector64(fDeferredStatus);
163 fGroupMap = new UVector32(fDeferredStatus);
164 fSets = new UVector(fDeferredStatus);
165 fInitialChars = new UnicodeSet;
166 fInitialChars8 = new Regex8BitSet;
167 if (U_FAILURE(fDeferredStatus)) {
168 return;
169 }
170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
171 fInitialChars == NULL || fInitialChars8 == NULL) {
172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
173 return;
174 }
175
176 // Slot zero of the vector of sets is reserved. Fill it here.
177 fSets->addElement((int32_t)0, fDeferredStatus);
178 }
179
180
181 //--------------------------------------------------------------------------
182 //
183 // zap Delete everything owned by this RegexPattern.
184 //
185 //--------------------------------------------------------------------------
zap()186 void RegexPattern::zap() {
187 delete fCompiledPat;
188 fCompiledPat = NULL;
189 int i;
190 for (i=1; i<fSets->size(); i++) {
191 UnicodeSet *s;
192 s = (UnicodeSet *)fSets->elementAt(i);
193 if (s != NULL) {
194 delete s;
195 }
196 }
197 delete fSets;
198 fSets = NULL;
199 delete[] fSets8;
200 fSets8 = NULL;
201 delete fGroupMap;
202 fGroupMap = NULL;
203 delete fInitialChars;
204 fInitialChars = NULL;
205 delete fInitialChars8;
206 fInitialChars8 = NULL;
207 if (fPattern != NULL) {
208 utext_close(fPattern);
209 fPattern = NULL;
210 }
211 if (fPatternString != NULL) {
212 delete fPatternString;
213 fPatternString = NULL;
214 }
215 }
216
217
218 //--------------------------------------------------------------------------
219 //
220 // Destructor
221 //
222 //--------------------------------------------------------------------------
~RegexPattern()223 RegexPattern::~RegexPattern() {
224 zap();
225 }
226
227
228 //--------------------------------------------------------------------------
229 //
230 // Clone
231 //
232 //--------------------------------------------------------------------------
clone() const233 RegexPattern *RegexPattern::clone() const {
234 RegexPattern *copy = new RegexPattern(*this);
235 return copy;
236 }
237
238
239 //--------------------------------------------------------------------------
240 //
241 // operator == (comparison) Consider to patterns to be == if the
242 // pattern strings and the flags are the same.
243 // Note that pattern strings with the same
244 // characters can still be considered different.
245 //
246 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const247 UBool RegexPattern::operator ==(const RegexPattern &other) const {
248 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
249 if (this->fPatternString != NULL && other.fPatternString != NULL) {
250 return *(this->fPatternString) == *(other.fPatternString);
251 } else if (this->fPattern == NULL) {
252 if (other.fPattern == NULL) {
253 return TRUE;
254 }
255 } else if (other.fPattern != NULL) {
256 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
257 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
258 return utext_equals(this->fPattern, other.fPattern);
259 }
260 }
261 return FALSE;
262 }
263
264 //---------------------------------------------------------------------
265 //
266 // compile
267 //
268 //---------------------------------------------------------------------
269 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)270 RegexPattern::compile(const UnicodeString ®ex,
271 uint32_t flags,
272 UParseError &pe,
273 UErrorCode &status)
274 {
275 if (U_FAILURE(status)) {
276 return NULL;
277 }
278
279 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
280 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
281 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
282
283 if ((flags & ~allFlags) != 0) {
284 status = U_REGEX_INVALID_FLAG;
285 return NULL;
286 }
287
288 if ((flags & UREGEX_CANON_EQ) != 0) {
289 status = U_REGEX_UNIMPLEMENTED;
290 return NULL;
291 }
292
293 RegexPattern *This = new RegexPattern;
294 if (This == NULL) {
295 status = U_MEMORY_ALLOCATION_ERROR;
296 return NULL;
297 }
298 if (U_FAILURE(This->fDeferredStatus)) {
299 status = This->fDeferredStatus;
300 delete This;
301 return NULL;
302 }
303 This->fFlags = flags;
304
305 RegexCompile compiler(This, status);
306 compiler.compile(regex, pe, status);
307
308 if (U_FAILURE(status)) {
309 delete This;
310 This = NULL;
311 }
312
313 return This;
314 }
315
316
317 //
318 // compile, UText mode
319 //
320 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)321 RegexPattern::compile(UText *regex,
322 uint32_t flags,
323 UParseError &pe,
324 UErrorCode &status)
325 {
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329
330 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
332 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333
334 if ((flags & ~allFlags) != 0) {
335 status = U_REGEX_INVALID_FLAG;
336 return NULL;
337 }
338
339 if ((flags & UREGEX_CANON_EQ) != 0) {
340 status = U_REGEX_UNIMPLEMENTED;
341 return NULL;
342 }
343
344 RegexPattern *This = new RegexPattern;
345 if (This == NULL) {
346 status = U_MEMORY_ALLOCATION_ERROR;
347 return NULL;
348 }
349 if (U_FAILURE(This->fDeferredStatus)) {
350 status = This->fDeferredStatus;
351 delete This;
352 return NULL;
353 }
354 This->fFlags = flags;
355
356 RegexCompile compiler(This, status);
357 compiler.compile(regex, pe, status);
358
359 if (U_FAILURE(status)) {
360 delete This;
361 This = NULL;
362 }
363
364 return This;
365 }
366
367 //
368 // compile with default flags.
369 //
370 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)371 RegexPattern::compile(const UnicodeString ®ex,
372 UParseError &pe,
373 UErrorCode &err)
374 {
375 return compile(regex, 0, pe, err);
376 }
377
378
379 //
380 // compile with default flags, UText mode
381 //
382 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)383 RegexPattern::compile(UText *regex,
384 UParseError &pe,
385 UErrorCode &err)
386 {
387 return compile(regex, 0, pe, err);
388 }
389
390
391 //
392 // compile with no UParseErr parameter.
393 //
394 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)395 RegexPattern::compile(const UnicodeString ®ex,
396 uint32_t flags,
397 UErrorCode &err)
398 {
399 UParseError pe;
400 return compile(regex, flags, pe, err);
401 }
402
403
404 //
405 // compile with no UParseErr parameter, UText mode
406 //
407 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)408 RegexPattern::compile(UText *regex,
409 uint32_t flags,
410 UErrorCode &err)
411 {
412 UParseError pe;
413 return compile(regex, flags, pe, err);
414 }
415
416
417 //---------------------------------------------------------------------
418 //
419 // flags
420 //
421 //---------------------------------------------------------------------
flags() const422 uint32_t RegexPattern::flags() const {
423 return fFlags;
424 }
425
426
427 //---------------------------------------------------------------------
428 //
429 // matcher(UnicodeString, err)
430 //
431 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const432 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
433 UErrorCode &status) const {
434 RegexMatcher *retMatcher = matcher(status);
435 if (retMatcher != NULL) {
436 retMatcher->fDeferredStatus = status;
437 retMatcher->reset(input);
438 }
439 return retMatcher;
440 }
441
442
443 //---------------------------------------------------------------------
444 //
445 // matcher(status)
446 //
447 //---------------------------------------------------------------------
matcher(UErrorCode & status) const448 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
449 RegexMatcher *retMatcher = NULL;
450
451 if (U_FAILURE(status)) {
452 return NULL;
453 }
454 if (U_FAILURE(fDeferredStatus)) {
455 status = fDeferredStatus;
456 return NULL;
457 }
458
459 retMatcher = new RegexMatcher(this);
460 if (retMatcher == NULL) {
461 status = U_MEMORY_ALLOCATION_ERROR;
462 return NULL;
463 }
464 return retMatcher;
465 }
466
467
468
469 //---------------------------------------------------------------------
470 //
471 // matches Convenience function to test for a match, starting
472 // with a pattern string and a data string.
473 //
474 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)475 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
476 const UnicodeString &input,
477 UParseError &pe,
478 UErrorCode &status) {
479
480 if (U_FAILURE(status)) {return FALSE;}
481
482 UBool retVal;
483 RegexPattern *pat = NULL;
484 RegexMatcher *matcher = NULL;
485
486 pat = RegexPattern::compile(regex, 0, pe, status);
487 matcher = pat->matcher(input, status);
488 retVal = matcher->matches(status);
489
490 delete matcher;
491 delete pat;
492 return retVal;
493 }
494
495
496 //
497 // matches, UText mode
498 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)499 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
500 UText *input,
501 UParseError &pe,
502 UErrorCode &status) {
503
504 if (U_FAILURE(status)) {return FALSE;}
505
506 UBool retVal = FALSE;
507 RegexPattern *pat = NULL;
508 RegexMatcher *matcher = NULL;
509
510 pat = RegexPattern::compile(regex, 0, pe, status);
511 matcher = pat->matcher(status);
512 if (U_SUCCESS(status)) {
513 matcher->reset(input);
514 retVal = matcher->matches(status);
515 }
516
517 delete matcher;
518 delete pat;
519 return retVal;
520 }
521
522
523
524
525
526 //---------------------------------------------------------------------
527 //
528 // pattern
529 //
530 //---------------------------------------------------------------------
pattern() const531 UnicodeString RegexPattern::pattern() const {
532 if (fPatternString != NULL) {
533 return *fPatternString;
534 } else if (fPattern == NULL) {
535 return UnicodeString();
536 } else {
537 UErrorCode status = U_ZERO_ERROR;
538 int64_t nativeLen = utext_nativeLength(fPattern);
539 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
540 UnicodeString result;
541
542 status = U_ZERO_ERROR;
543 UChar *resultChars = result.getBuffer(len16);
544 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
545 result.releaseBuffer(len16);
546
547 return result;
548 }
549 }
550
551
552
553
554 //---------------------------------------------------------------------
555 //
556 // patternText
557 //
558 //---------------------------------------------------------------------
patternText(UErrorCode & status) const559 UText *RegexPattern::patternText(UErrorCode &status) const {
560 if (U_FAILURE(status)) {return NULL;}
561 status = U_ZERO_ERROR;
562
563 if (fPattern != NULL) {
564 return fPattern;
565 } else {
566 RegexStaticSets::initGlobals(&status);
567 return RegexStaticSets::gStaticSets->fEmptyText;
568 }
569 }
570
571
572
573 //---------------------------------------------------------------------
574 //
575 // split
576 //
577 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const578 int32_t RegexPattern::split(const UnicodeString &input,
579 UnicodeString dest[],
580 int32_t destCapacity,
581 UErrorCode &status) const
582 {
583 if (U_FAILURE(status)) {
584 return 0;
585 };
586
587 RegexMatcher m(this);
588 int32_t r = 0;
589 // Check m's status to make sure all is ok.
590 if (U_SUCCESS(m.fDeferredStatus)) {
591 r = m.split(input, dest, destCapacity, status);
592 }
593 return r;
594 }
595
596 //
597 // split, UText mode
598 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const599 int32_t RegexPattern::split(UText *input,
600 UText *dest[],
601 int32_t destCapacity,
602 UErrorCode &status) const
603 {
604 if (U_FAILURE(status)) {
605 return 0;
606 };
607
608 RegexMatcher m(this);
609 int32_t r = 0;
610 // Check m's status to make sure all is ok.
611 if (U_SUCCESS(m.fDeferredStatus)) {
612 r = m.split(input, dest, destCapacity, status);
613 }
614 return r;
615 }
616
617
618
619 //---------------------------------------------------------------------
620 //
621 // dump Output the compiled form of the pattern.
622 // Debugging function only.
623 //
624 //---------------------------------------------------------------------
625 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const626 void RegexPattern::dumpOp(int32_t index) const {
627 static const char * const opNames[] = {URX_OPCODE_NAMES};
628 int32_t op = fCompiledPat->elementAti(index);
629 int32_t val = URX_VAL(op);
630 int32_t type = URX_TYPE(op);
631 int32_t pinnedType = type;
632 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
633 pinnedType = 0;
634 }
635
636 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
637 switch (type) {
638 case URX_NOP:
639 case URX_DOTANY:
640 case URX_DOTANY_ALL:
641 case URX_FAIL:
642 case URX_CARET:
643 case URX_DOLLAR:
644 case URX_BACKSLASH_G:
645 case URX_BACKSLASH_X:
646 case URX_END:
647 case URX_DOLLAR_M:
648 case URX_CARET_M:
649 // Types with no operand field of interest.
650 break;
651
652 case URX_RESERVED_OP:
653 case URX_START_CAPTURE:
654 case URX_END_CAPTURE:
655 case URX_STATE_SAVE:
656 case URX_JMP:
657 case URX_JMP_SAV:
658 case URX_JMP_SAV_X:
659 case URX_BACKSLASH_B:
660 case URX_BACKSLASH_BU:
661 case URX_BACKSLASH_D:
662 case URX_BACKSLASH_Z:
663 case URX_STRING_LEN:
664 case URX_CTR_INIT:
665 case URX_CTR_INIT_NG:
666 case URX_CTR_LOOP:
667 case URX_CTR_LOOP_NG:
668 case URX_RELOC_OPRND:
669 case URX_STO_SP:
670 case URX_LD_SP:
671 case URX_BACKREF:
672 case URX_STO_INP_LOC:
673 case URX_JMPX:
674 case URX_LA_START:
675 case URX_LA_END:
676 case URX_BACKREF_I:
677 case URX_LB_START:
678 case URX_LB_CONT:
679 case URX_LB_END:
680 case URX_LBN_CONT:
681 case URX_LBN_END:
682 case URX_LOOP_C:
683 case URX_LOOP_DOT_I:
684 // types with an integer operand field.
685 REGEX_DUMP_DEBUG_PRINTF(("%d", val));
686 break;
687
688 case URX_ONECHAR:
689 case URX_ONECHAR_I:
690 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
691 break;
692
693 case URX_STRING:
694 case URX_STRING_I:
695 {
696 int32_t lengthOp = fCompiledPat->elementAti(index+1);
697 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
698 int32_t length = URX_VAL(lengthOp);
699 int32_t i;
700 for (i=val; i<val+length; i++) {
701 UChar c = fLiteralText[i];
702 if (c < 32 || c >= 256) {c = '.';}
703 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
704 }
705 }
706 break;
707
708 case URX_SETREF:
709 case URX_LOOP_SR_I:
710 {
711 UnicodeString s;
712 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
713 set->toPattern(s, TRUE);
714 for (int32_t i=0; i<s.length(); i++) {
715 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
716 }
717 }
718 break;
719
720 case URX_STATIC_SETREF:
721 case URX_STAT_SETREF_N:
722 {
723 UnicodeString s;
724 if (val & URX_NEG_SET) {
725 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
726 val &= ~URX_NEG_SET;
727 }
728 UnicodeSet *set = fStaticSets[val];
729 set->toPattern(s, TRUE);
730 for (int32_t i=0; i<s.length(); i++) {
731 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
732 }
733 }
734 break;
735
736
737 default:
738 REGEX_DUMP_DEBUG_PRINTF(("??????"));
739 break;
740 }
741 REGEX_DUMP_DEBUG_PRINTF(("\n"));
742 }
743 #endif
744
745
746 #if defined(REGEX_DEBUG)
747 U_CAPI void U_EXPORT2
RegexPatternDump(const RegexPattern * This)748 RegexPatternDump(const RegexPattern *This) {
749 int index;
750 int i;
751
752 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
753 UChar32 c = utext_next32From(This->fPattern, 0);
754 while (c != U_SENTINEL) {
755 if (c<32 || c>256) {
756 c = '.';
757 }
758 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
759
760 c = UTEXT_NEXT32(This->fPattern);
761 }
762 REGEX_DUMP_DEBUG_PRINTF(("\n"));
763 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
764 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
765 if (This->fStartType == START_STRING) {
766 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
767 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
768 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
769 }
770 REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
771
772 } else if (This->fStartType == START_SET) {
773 int32_t numSetChars = This->fInitialChars->size();
774 if (numSetChars > 20) {
775 numSetChars = 20;
776 }
777 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
778 for (i=0; i<numSetChars; i++) {
779 UChar32 c = This->fInitialChars->charAt(i);
780 if (0x20<c && c <0x7e) {
781 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
782 } else {
783 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
784 }
785 }
786 if (numSetChars < This->fInitialChars->size()) {
787 REGEX_DUMP_DEBUG_PRINTF((" ..."));
788 }
789 REGEX_DUMP_DEBUG_PRINTF(("\n"));
790
791 } else if (This->fStartType == START_CHAR) {
792 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
793 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
794 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
795 } else {
796 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
797 }
798 }
799
800 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
801 "-------------------------------------------\n"));
802 for (index = 0; index<This->fCompiledPat->size(); index++) {
803 This->dumpOp(index);
804 }
805 REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
806 }
807 #endif
808
809
810
811 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
812
813 U_NAMESPACE_END
814 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
815