1 //
2 // file: repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 * Copyright (C) 2002-2011 International Business Machines Corporation *
7 * and others. All rights reserved. *
8 ***************************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24
25 U_NAMESPACE_BEGIN
26
27 //--------------------------------------------------------------------------
28 //
29 // RegexPattern Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33 UErrorCode status = U_ZERO_ERROR;
34 u_init(&status);
35
36 // Init all of this instances data.
37 init();
38 }
39
40
41 //--------------------------------------------------------------------------
42 //
43 // Copy Constructor Note: This is a rather inefficient implementation,
44 // but it probably doesn't matter.
45 //
46 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)47 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
48 init();
49 *this = other;
50 }
51
52
53
54 //--------------------------------------------------------------------------
55 //
56 // Assignment Operator
57 //
58 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60 if (this == &other) {
61 // Source and destination are the same. Don't do anything.
62 return *this;
63 }
64
65 // Clean out any previous contents of object being assigned to.
66 zap();
67
68 // Give target object a default initialization
69 init();
70
71 // Copy simple fields
72 if ( other.fPatternString == NULL ) {
73 fPatternString = NULL;
74 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75 } else {
76 fPatternString = new UnicodeString(*(other.fPatternString));
77 UErrorCode status = U_ZERO_ERROR;
78 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status);
79 if (U_FAILURE(status)) {
80 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81 return *this;
82 }
83 }
84 fFlags = other.fFlags;
85 fLiteralText = other.fLiteralText;
86 fDeferredStatus = other.fDeferredStatus;
87 fMinMatchLen = other.fMinMatchLen;
88 fFrameSize = other.fFrameSize;
89 fDataSize = other.fDataSize;
90 fMaxCaptureDigits = other.fMaxCaptureDigits;
91 fStaticSets = other.fStaticSets;
92 fStaticSets8 = other.fStaticSets8;
93
94 fStartType = other.fStartType;
95 fInitialStringIdx = other.fInitialStringIdx;
96 fInitialStringLen = other.fInitialStringLen;
97 *fInitialChars = *other.fInitialChars;
98 fInitialChar = other.fInitialChar;
99 *fInitialChars8 = *other.fInitialChars8;
100 fNeedsAltInput = other.fNeedsAltInput;
101
102 // Copy the pattern. It's just values, nothing deep to copy.
103 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105
106 // Copy the Unicode Sets.
107 // Could be made more efficient if the sets were reference counted and shared,
108 // but I doubt that pattern copying will be particularly common.
109 // Note: init() already added an empty element zero to fSets
110 int32_t i;
111 int32_t numSets = other.fSets->size();
112 fSets8 = new Regex8BitSet[numSets];
113 if (fSets8 == NULL) {
114 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115 return *this;
116 }
117 for (i=1; i<numSets; i++) {
118 if (U_FAILURE(fDeferredStatus)) {
119 return *this;
120 }
121 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
123 if (newSet == NULL) {
124 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125 break;
126 }
127 fSets->addElement(newSet, fDeferredStatus);
128 fSets8[i] = other.fSets8[i];
129 }
130
131 return *this;
132 }
133
134
135 //--------------------------------------------------------------------------
136 //
137 // init Shared initialization for use by constructors.
138 // Bring an uninitialized RegexPattern up to a default state.
139 //
140 //--------------------------------------------------------------------------
init()141 void RegexPattern::init() {
142 fFlags = 0;
143 fCompiledPat = 0;
144 fLiteralText.remove();
145 fSets = NULL;
146 fSets8 = NULL;
147 fDeferredStatus = U_ZERO_ERROR;
148 fMinMatchLen = 0;
149 fFrameSize = 0;
150 fDataSize = 0;
151 fGroupMap = NULL;
152 fMaxCaptureDigits = 1;
153 fStaticSets = NULL;
154 fStaticSets8 = NULL;
155 fStartType = START_NO_INFO;
156 fInitialStringIdx = 0;
157 fInitialStringLen = 0;
158 fInitialChars = NULL;
159 fInitialChar = 0;
160 fInitialChars8 = NULL;
161 fNeedsAltInput = FALSE;
162
163 fPattern = NULL; // will be set later
164 fPatternString = NULL; // may be set later
165 fCompiledPat = new UVector64(fDeferredStatus);
166 fGroupMap = new UVector32(fDeferredStatus);
167 fSets = new UVector(fDeferredStatus);
168 fInitialChars = new UnicodeSet;
169 fInitialChars8 = new Regex8BitSet;
170 if (U_FAILURE(fDeferredStatus)) {
171 return;
172 }
173 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
174 fInitialChars == NULL || fInitialChars8 == NULL) {
175 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176 return;
177 }
178
179 // Slot zero of the vector of sets is reserved. Fill it here.
180 fSets->addElement((int32_t)0, fDeferredStatus);
181 }
182
183
184 //--------------------------------------------------------------------------
185 //
186 // zap Delete everything owned by this RegexPattern.
187 //
188 //--------------------------------------------------------------------------
zap()189 void RegexPattern::zap() {
190 delete fCompiledPat;
191 fCompiledPat = NULL;
192 int i;
193 for (i=1; i<fSets->size(); i++) {
194 UnicodeSet *s;
195 s = (UnicodeSet *)fSets->elementAt(i);
196 if (s != NULL) {
197 delete s;
198 }
199 }
200 delete fSets;
201 fSets = NULL;
202 delete[] fSets8;
203 fSets8 = NULL;
204 delete fGroupMap;
205 fGroupMap = NULL;
206 delete fInitialChars;
207 fInitialChars = NULL;
208 delete fInitialChars8;
209 fInitialChars8 = NULL;
210 if (fPattern != NULL) {
211 utext_close(fPattern);
212 fPattern = NULL;
213 }
214 if (fPatternString != NULL) {
215 delete fPatternString;
216 fPatternString = NULL;
217 }
218 }
219
220
221 //--------------------------------------------------------------------------
222 //
223 // Destructor
224 //
225 //--------------------------------------------------------------------------
~RegexPattern()226 RegexPattern::~RegexPattern() {
227 zap();
228 }
229
230
231 //--------------------------------------------------------------------------
232 //
233 // Clone
234 //
235 //--------------------------------------------------------------------------
clone() const236 RegexPattern *RegexPattern::clone() const {
237 RegexPattern *copy = new RegexPattern(*this);
238 return copy;
239 }
240
241
242 //--------------------------------------------------------------------------
243 //
244 // operator == (comparison) Consider to patterns to be == if the
245 // pattern strings and the flags are the same.
246 // Note that pattern strings with the same
247 // characters can still be considered different.
248 //
249 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const250 UBool RegexPattern::operator ==(const RegexPattern &other) const {
251 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252 if (this->fPatternString != NULL && other.fPatternString != NULL) {
253 return *(this->fPatternString) == *(other.fPatternString);
254 } else if (this->fPattern == NULL) {
255 if (other.fPattern == NULL) {
256 return TRUE;
257 }
258 } else if (other.fPattern != NULL) {
259 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261 return utext_equals(this->fPattern, other.fPattern);
262 }
263 }
264 return FALSE;
265 }
266
267 //---------------------------------------------------------------------
268 //
269 // compile
270 //
271 //---------------------------------------------------------------------
272 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)273 RegexPattern::compile(const UnicodeString ®ex,
274 uint32_t flags,
275 UParseError &pe,
276 UErrorCode &status)
277 {
278 if (U_FAILURE(status)) {
279 return NULL;
280 }
281
282 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
284 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285
286 if ((flags & ~allFlags) != 0) {
287 status = U_REGEX_INVALID_FLAG;
288 return NULL;
289 }
290
291 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292 status = U_REGEX_UNIMPLEMENTED;
293 return NULL;
294 }
295
296 RegexPattern *This = new RegexPattern;
297 if (This == NULL) {
298 status = U_MEMORY_ALLOCATION_ERROR;
299 return NULL;
300 }
301 if (U_FAILURE(This->fDeferredStatus)) {
302 status = This->fDeferredStatus;
303 delete This;
304 return NULL;
305 }
306 This->fFlags = flags;
307
308 RegexCompile compiler(This, status);
309 compiler.compile(regex, pe, status);
310
311 if (U_FAILURE(status)) {
312 delete This;
313 This = NULL;
314 }
315
316 return This;
317 }
318
319
320 //
321 // compile, UText mode
322 //
323 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)324 RegexPattern::compile(UText *regex,
325 uint32_t flags,
326 UParseError &pe,
327 UErrorCode &status)
328 {
329 if (U_FAILURE(status)) {
330 return NULL;
331 }
332
333 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
334 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
335 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
336
337 if ((flags & ~allFlags) != 0) {
338 status = U_REGEX_INVALID_FLAG;
339 return NULL;
340 }
341
342 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
343 status = U_REGEX_UNIMPLEMENTED;
344 return NULL;
345 }
346
347 RegexPattern *This = new RegexPattern;
348 if (This == NULL) {
349 status = U_MEMORY_ALLOCATION_ERROR;
350 return NULL;
351 }
352 if (U_FAILURE(This->fDeferredStatus)) {
353 status = This->fDeferredStatus;
354 delete This;
355 return NULL;
356 }
357 This->fFlags = flags;
358
359 RegexCompile compiler(This, status);
360 compiler.compile(regex, pe, status);
361
362 if (U_FAILURE(status)) {
363 delete This;
364 This = NULL;
365 }
366
367 return This;
368 }
369
370 //
371 // compile with default flags.
372 //
373 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)374 RegexPattern::compile(const UnicodeString ®ex,
375 UParseError &pe,
376 UErrorCode &err)
377 {
378 return compile(regex, 0, pe, err);
379 }
380
381
382 //
383 // compile with default flags, UText mode
384 //
385 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)386 RegexPattern::compile(UText *regex,
387 UParseError &pe,
388 UErrorCode &err)
389 {
390 return compile(regex, 0, pe, err);
391 }
392
393
394 //
395 // compile with no UParseErr parameter.
396 //
397 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)398 RegexPattern::compile(const UnicodeString ®ex,
399 uint32_t flags,
400 UErrorCode &err)
401 {
402 UParseError pe;
403 return compile(regex, flags, pe, err);
404 }
405
406
407 //
408 // compile with no UParseErr parameter, UText mode
409 //
410 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)411 RegexPattern::compile(UText *regex,
412 uint32_t flags,
413 UErrorCode &err)
414 {
415 UParseError pe;
416 return compile(regex, flags, pe, err);
417 }
418
419
420 //---------------------------------------------------------------------
421 //
422 // flags
423 //
424 //---------------------------------------------------------------------
flags() const425 uint32_t RegexPattern::flags() const {
426 return fFlags;
427 }
428
429
430 //---------------------------------------------------------------------
431 //
432 // matcher(UnicodeString, err)
433 //
434 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436 UErrorCode &status) const {
437 RegexMatcher *retMatcher = matcher(status);
438 if (retMatcher != NULL) {
439 retMatcher->fDeferredStatus = status;
440 retMatcher->reset(input);
441 }
442 return retMatcher;
443 }
444
445
446 //---------------------------------------------------------------------
447 //
448 // matcher(status)
449 //
450 //---------------------------------------------------------------------
matcher(UErrorCode & status) const451 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
452 RegexMatcher *retMatcher = NULL;
453
454 if (U_FAILURE(status)) {
455 return NULL;
456 }
457 if (U_FAILURE(fDeferredStatus)) {
458 status = fDeferredStatus;
459 return NULL;
460 }
461
462 retMatcher = new RegexMatcher(this);
463 if (retMatcher == NULL) {
464 status = U_MEMORY_ALLOCATION_ERROR;
465 return NULL;
466 }
467 return retMatcher;
468 }
469
470
471
472 //---------------------------------------------------------------------
473 //
474 // matches Convenience function to test for a match, starting
475 // with a pattern string and a data string.
476 //
477 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)478 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
479 const UnicodeString &input,
480 UParseError &pe,
481 UErrorCode &status) {
482
483 if (U_FAILURE(status)) {return FALSE;}
484
485 UBool retVal;
486 RegexPattern *pat = NULL;
487 RegexMatcher *matcher = NULL;
488
489 pat = RegexPattern::compile(regex, 0, pe, status);
490 matcher = pat->matcher(input, status);
491 retVal = matcher->matches(status);
492
493 delete matcher;
494 delete pat;
495 return retVal;
496 }
497
498
499 //
500 // matches, UText mode
501 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)502 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
503 UText *input,
504 UParseError &pe,
505 UErrorCode &status) {
506
507 if (U_FAILURE(status)) {return FALSE;}
508
509 UBool retVal = FALSE;
510 RegexPattern *pat = NULL;
511 RegexMatcher *matcher = NULL;
512
513 pat = RegexPattern::compile(regex, 0, pe, status);
514 matcher = pat->matcher(status);
515 if (U_SUCCESS(status)) {
516 matcher->reset(input);
517 retVal = matcher->matches(status);
518 }
519
520 delete matcher;
521 delete pat;
522 return retVal;
523 }
524
525
526
527
528
529 //---------------------------------------------------------------------
530 //
531 // pattern
532 //
533 //---------------------------------------------------------------------
pattern() const534 UnicodeString RegexPattern::pattern() const {
535 if (fPatternString != NULL) {
536 return *fPatternString;
537 } else if (fPattern == NULL) {
538 return UnicodeString();
539 } else {
540 UErrorCode status = U_ZERO_ERROR;
541 int64_t nativeLen = utext_nativeLength(fPattern);
542 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
543 UnicodeString result;
544
545 status = U_ZERO_ERROR;
546 UChar *resultChars = result.getBuffer(len16);
547 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
548 result.releaseBuffer(len16);
549
550 return result;
551 }
552 }
553
554
555
556
557 //---------------------------------------------------------------------
558 //
559 // patternText
560 //
561 //---------------------------------------------------------------------
patternText(UErrorCode & status) const562 UText *RegexPattern::patternText(UErrorCode &status) const {
563 if (U_FAILURE(status)) {return NULL;}
564 status = U_ZERO_ERROR;
565
566 if (fPattern != NULL) {
567 return fPattern;
568 } else {
569 RegexStaticSets::initGlobals(&status);
570 return RegexStaticSets::gStaticSets->fEmptyText;
571 }
572 }
573
574
575
576 //---------------------------------------------------------------------
577 //
578 // split
579 //
580 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const581 int32_t RegexPattern::split(const UnicodeString &input,
582 UnicodeString dest[],
583 int32_t destCapacity,
584 UErrorCode &status) const
585 {
586 if (U_FAILURE(status)) {
587 return 0;
588 };
589
590 RegexMatcher m(this);
591 int32_t r = 0;
592 // Check m's status to make sure all is ok.
593 if (U_SUCCESS(m.fDeferredStatus)) {
594 r = m.split(input, dest, destCapacity, status);
595 }
596 return r;
597 }
598
599 //
600 // split, UText mode
601 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const602 int32_t RegexPattern::split(UText *input,
603 UText *dest[],
604 int32_t destCapacity,
605 UErrorCode &status) const
606 {
607 if (U_FAILURE(status)) {
608 return 0;
609 };
610
611 RegexMatcher m(this);
612 int32_t r = 0;
613 // Check m's status to make sure all is ok.
614 if (U_SUCCESS(m.fDeferredStatus)) {
615 r = m.split(input, dest, destCapacity, status);
616 }
617 return r;
618 }
619
620
621
622 //---------------------------------------------------------------------
623 //
624 // dump Output the compiled form of the pattern.
625 // Debugging function only.
626 //
627 //---------------------------------------------------------------------
628 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const629 void RegexPattern::dumpOp(int32_t index) const {
630 static const char * const opNames[] = {URX_OPCODE_NAMES};
631 int32_t op = fCompiledPat->elementAti(index);
632 int32_t val = URX_VAL(op);
633 int32_t type = URX_TYPE(op);
634 int32_t pinnedType = type;
635 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
636 pinnedType = 0;
637 }
638
639 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
640 switch (type) {
641 case URX_NOP:
642 case URX_DOTANY:
643 case URX_DOTANY_ALL:
644 case URX_FAIL:
645 case URX_CARET:
646 case URX_DOLLAR:
647 case URX_BACKSLASH_G:
648 case URX_BACKSLASH_X:
649 case URX_END:
650 case URX_DOLLAR_M:
651 case URX_CARET_M:
652 // Types with no operand field of interest.
653 break;
654
655 case URX_RESERVED_OP:
656 case URX_START_CAPTURE:
657 case URX_END_CAPTURE:
658 case URX_STATE_SAVE:
659 case URX_JMP:
660 case URX_JMP_SAV:
661 case URX_JMP_SAV_X:
662 case URX_BACKSLASH_B:
663 case URX_BACKSLASH_BU:
664 case URX_BACKSLASH_D:
665 case URX_BACKSLASH_Z:
666 case URX_STRING_LEN:
667 case URX_CTR_INIT:
668 case URX_CTR_INIT_NG:
669 case URX_CTR_LOOP:
670 case URX_CTR_LOOP_NG:
671 case URX_RELOC_OPRND:
672 case URX_STO_SP:
673 case URX_LD_SP:
674 case URX_BACKREF:
675 case URX_STO_INP_LOC:
676 case URX_JMPX:
677 case URX_LA_START:
678 case URX_LA_END:
679 case URX_BACKREF_I:
680 case URX_LB_START:
681 case URX_LB_CONT:
682 case URX_LB_END:
683 case URX_LBN_CONT:
684 case URX_LBN_END:
685 case URX_LOOP_C:
686 case URX_LOOP_DOT_I:
687 // types with an integer operand field.
688 REGEX_DUMP_DEBUG_PRINTF(("%d", val));
689 break;
690
691 case URX_ONECHAR:
692 case URX_ONECHAR_I:
693 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
694 break;
695
696 case URX_STRING:
697 case URX_STRING_I:
698 {
699 int32_t lengthOp = fCompiledPat->elementAti(index+1);
700 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
701 int32_t length = URX_VAL(lengthOp);
702 int32_t i;
703 for (i=val; i<val+length; i++) {
704 UChar c = fLiteralText[i];
705 if (c < 32 || c >= 256) {c = '.';}
706 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
707 }
708 }
709 break;
710
711 case URX_SETREF:
712 case URX_LOOP_SR_I:
713 {
714 UnicodeString s;
715 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
716 set->toPattern(s, TRUE);
717 for (int32_t i=0; i<s.length(); i++) {
718 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
719 }
720 }
721 break;
722
723 case URX_STATIC_SETREF:
724 case URX_STAT_SETREF_N:
725 {
726 UnicodeString s;
727 if (val & URX_NEG_SET) {
728 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
729 val &= ~URX_NEG_SET;
730 }
731 UnicodeSet *set = fStaticSets[val];
732 set->toPattern(s, TRUE);
733 for (int32_t i=0; i<s.length(); i++) {
734 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
735 }
736 }
737 break;
738
739
740 default:
741 REGEX_DUMP_DEBUG_PRINTF(("??????"));
742 break;
743 }
744 REGEX_DUMP_DEBUG_PRINTF(("\n"));
745 }
746 #endif
747
748
749 #if defined(REGEX_DEBUG)
750 U_CAPI void U_EXPORT2
RegexPatternDump(const RegexPattern * This)751 RegexPatternDump(const RegexPattern *This) {
752 int index;
753 int i;
754
755 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
756 UChar32 c = utext_next32From(This->fPattern, 0);
757 while (c != U_SENTINEL) {
758 if (c<32 || c>256) {
759 c = '.';
760 }
761 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
762
763 c = UTEXT_NEXT32(This->fPattern);
764 }
765 REGEX_DUMP_DEBUG_PRINTF(("\n"));
766 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
767 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
768 if (This->fStartType == START_STRING) {
769 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
770 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
771 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
772 }
773 REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
774
775 } else if (This->fStartType == START_SET) {
776 int32_t numSetChars = This->fInitialChars->size();
777 if (numSetChars > 20) {
778 numSetChars = 20;
779 }
780 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
781 for (i=0; i<numSetChars; i++) {
782 UChar32 c = This->fInitialChars->charAt(i);
783 if (0x20<c && c <0x7e) {
784 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
785 } else {
786 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
787 }
788 }
789 if (numSetChars < This->fInitialChars->size()) {
790 REGEX_DUMP_DEBUG_PRINTF((" ..."));
791 }
792 REGEX_DUMP_DEBUG_PRINTF(("\n"));
793
794 } else if (This->fStartType == START_CHAR) {
795 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
796 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
797 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
798 } else {
799 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
800 }
801 }
802
803 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
804 "-------------------------------------------\n"));
805 for (index = 0; index<This->fCompiledPat->size(); index++) {
806 This->dumpOp(index);
807 }
808 REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
809 }
810 #endif
811
812
813
814 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
815
816 U_NAMESPACE_END
817 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
818