1 //
2 // file: repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 * Copyright (C) 2002-2010 International Business Machines Corporation *
7 * and others. All rights reserved. *
8 ***************************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uvector.h"
19 #include "uvectr32.h"
20 #include "uvectr64.h"
21 #include "regexcmp.h"
22 #include "regeximp.h"
23 #include "regexst.h"
24
25 U_NAMESPACE_BEGIN
26
27 //--------------------------------------------------------------------------
28 //
29 // RegexPattern Default Constructor
30 //
31 //--------------------------------------------------------------------------
RegexPattern()32 RegexPattern::RegexPattern() {
33 UErrorCode status = U_ZERO_ERROR;
34 u_init(&status);
35
36 // Init all of this instances data.
37 init();
38 }
39
40
41 //--------------------------------------------------------------------------
42 //
43 // Copy Constructor Note: This is a rather inefficient implementation,
44 // but it probably doesn't matter.
45 //
46 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)47 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
48 init();
49 *this = other;
50 }
51
52
53
54 //--------------------------------------------------------------------------
55 //
56 // Assignment Operator
57 //
58 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60 if (this == &other) {
61 // Source and destination are the same. Don't do anything.
62 return *this;
63 }
64
65 // Clean out any previous contents of object being assigned to.
66 zap();
67
68 // Give target object a default initialization
69 init();
70
71 // Copy simple fields
72 if ( other.fPatternString == NULL ) {
73 fPatternString = NULL;
74 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75 } else {
76 fPatternString = new UnicodeString(*(other.fPatternString));
77 UErrorCode status = U_ZERO_ERROR;
78 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status);
79 if (U_FAILURE(status)) {
80 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81 return *this;
82 }
83 }
84 fFlags = other.fFlags;
85 fLiteralText = other.fLiteralText;
86 fDeferredStatus = other.fDeferredStatus;
87 fMinMatchLen = other.fMinMatchLen;
88 fFrameSize = other.fFrameSize;
89 fDataSize = other.fDataSize;
90 fMaxCaptureDigits = other.fMaxCaptureDigits;
91 fStaticSets = other.fStaticSets;
92 fStaticSets8 = other.fStaticSets8;
93
94 fStartType = other.fStartType;
95 fInitialStringIdx = other.fInitialStringIdx;
96 fInitialStringLen = other.fInitialStringLen;
97 *fInitialChars = *other.fInitialChars;
98 fInitialChar = other.fInitialChar;
99 *fInitialChars8 = *other.fInitialChars8;
100 fNeedsAltInput = other.fNeedsAltInput;
101
102 // Copy the pattern. It's just values, nothing deep to copy.
103 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105
106 // Copy the Unicode Sets.
107 // Could be made more efficient if the sets were reference counted and shared,
108 // but I doubt that pattern copying will be particularly common.
109 // Note: init() already added an empty element zero to fSets
110 int32_t i;
111 int32_t numSets = other.fSets->size();
112 fSets8 = new Regex8BitSet[numSets];
113 if (fSets8 == NULL) {
114 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115 return *this;
116 }
117 for (i=1; i<numSets; i++) {
118 if (U_FAILURE(fDeferredStatus)) {
119 return *this;
120 }
121 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
123 if (newSet == NULL) {
124 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125 break;
126 }
127 fSets->addElement(newSet, fDeferredStatus);
128 fSets8[i] = other.fSets8[i];
129 }
130
131 return *this;
132 }
133
134
135 //--------------------------------------------------------------------------
136 //
137 // init Shared initialization for use by constructors.
138 // Bring an uninitialized RegexPattern up to a default state.
139 //
140 //--------------------------------------------------------------------------
init()141 void RegexPattern::init() {
142 fFlags = 0;
143 fCompiledPat = 0;
144 fLiteralText.remove();
145 fSets = NULL;
146 fSets8 = NULL;
147 fDeferredStatus = U_ZERO_ERROR;
148 fMinMatchLen = 0;
149 fFrameSize = 0;
150 fDataSize = 0;
151 fGroupMap = NULL;
152 fMaxCaptureDigits = 1;
153 fStaticSets = NULL;
154 fStaticSets8 = NULL;
155 fStartType = START_NO_INFO;
156 fInitialStringIdx = 0;
157 fInitialStringLen = 0;
158 fInitialChars = NULL;
159 fInitialChar = 0;
160 fInitialChars8 = NULL;
161 fNeedsAltInput = FALSE;
162
163 fPattern = NULL; // will be set later
164 fPatternString = NULL; // may be set later
165 fCompiledPat = new UVector64(fDeferredStatus);
166 fGroupMap = new UVector32(fDeferredStatus);
167 fSets = new UVector(fDeferredStatus);
168 fInitialChars = new UnicodeSet;
169 fInitialChars8 = new Regex8BitSet;
170 if (U_FAILURE(fDeferredStatus)) {
171 return;
172 }
173 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
174 fInitialChars == NULL || fInitialChars8 == NULL) {
175 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176 return;
177 }
178
179 // Slot zero of the vector of sets is reserved. Fill it here.
180 fSets->addElement((int32_t)0, fDeferredStatus);
181 }
182
183
184 //--------------------------------------------------------------------------
185 //
186 // zap Delete everything owned by this RegexPattern.
187 //
188 //--------------------------------------------------------------------------
zap()189 void RegexPattern::zap() {
190 delete fCompiledPat;
191 fCompiledPat = NULL;
192 int i;
193 for (i=1; i<fSets->size(); i++) {
194 UnicodeSet *s;
195 s = (UnicodeSet *)fSets->elementAt(i);
196 if (s != NULL) {
197 delete s;
198 }
199 }
200 delete fSets;
201 fSets = NULL;
202 delete[] fSets8;
203 fSets8 = NULL;
204 delete fGroupMap;
205 fGroupMap = NULL;
206 delete fInitialChars;
207 fInitialChars = NULL;
208 delete fInitialChars8;
209 fInitialChars8 = NULL;
210 if (fPattern != NULL) {
211 utext_close(fPattern);
212 fPattern = NULL;
213 }
214 if (fPatternString != NULL) {
215 delete fPatternString;
216 fPatternString = NULL;
217 }
218 }
219
220
221 //--------------------------------------------------------------------------
222 //
223 // Destructor
224 //
225 //--------------------------------------------------------------------------
~RegexPattern()226 RegexPattern::~RegexPattern() {
227 zap();
228 }
229
230
231 //--------------------------------------------------------------------------
232 //
233 // Clone
234 //
235 //--------------------------------------------------------------------------
clone() const236 RegexPattern *RegexPattern::clone() const {
237 RegexPattern *copy = new RegexPattern(*this);
238 return copy;
239 }
240
241
242 //--------------------------------------------------------------------------
243 //
244 // operator == (comparison) Consider to patterns to be == if the
245 // pattern strings and the flags are the same.
246 // Note that pattern strings with the same
247 // characters can still be considered different.
248 //
249 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const250 UBool RegexPattern::operator ==(const RegexPattern &other) const {
251 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252 if (this->fPatternString != NULL && other.fPatternString != NULL) {
253 return *(this->fPatternString) == *(other.fPatternString);
254 } else if (this->fPattern == NULL) {
255 if (other.fPattern == NULL) {
256 return TRUE;
257 }
258 } else if (other.fPattern != NULL) {
259 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261 return utext_equals(this->fPattern, other.fPattern);
262 }
263 }
264 return FALSE;
265 }
266
267 //---------------------------------------------------------------------
268 //
269 // compile
270 //
271 //---------------------------------------------------------------------
272 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)273 RegexPattern::compile(const UnicodeString ®ex,
274 uint32_t flags,
275 UParseError &pe,
276 UErrorCode &status)
277 {
278 if (U_FAILURE(status)) {
279 return NULL;
280 }
281
282 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
284 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285
286 if ((flags & ~allFlags) != 0) {
287 status = U_REGEX_INVALID_FLAG;
288 return NULL;
289 }
290
291 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292 status = U_REGEX_UNIMPLEMENTED;
293 return NULL;
294 }
295
296 RegexPattern *This = new RegexPattern;
297 if (This == NULL) {
298 status = U_MEMORY_ALLOCATION_ERROR;
299 return NULL;
300 }
301 if (U_FAILURE(This->fDeferredStatus)) {
302 status = This->fDeferredStatus;
303 delete This;
304 return NULL;
305 }
306 This->fFlags = flags;
307
308 RegexCompile compiler(This, status);
309 compiler.compile(regex, pe, status);
310
311 if (U_FAILURE(status)) {
312 delete This;
313 This = NULL;
314 }
315
316 return This;
317 }
318
319
320 //
321 // compile, UText mode
322 //
323 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)324 RegexPattern::compile(UText *regex,
325 uint32_t flags,
326 UParseError &pe,
327 UErrorCode &status)
328 {
329 if (U_FAILURE(status)) {
330 return NULL;
331 }
332
333 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
334 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
335 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
336
337 if ((flags & ~allFlags) != 0) {
338 status = U_REGEX_INVALID_FLAG;
339 return NULL;
340 }
341
342 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
343 status = U_REGEX_UNIMPLEMENTED;
344 return NULL;
345 }
346
347 RegexPattern *This = new RegexPattern;
348 if (This == NULL) {
349 status = U_MEMORY_ALLOCATION_ERROR;
350 return NULL;
351 }
352 if (U_FAILURE(This->fDeferredStatus)) {
353 status = This->fDeferredStatus;
354 delete This;
355 return NULL;
356 }
357 This->fFlags = flags;
358
359 RegexCompile compiler(This, status);
360 compiler.compile(regex, pe, status);
361
362 if (U_FAILURE(status)) {
363 delete This;
364 This = NULL;
365 }
366
367 return This;
368 }
369
370 //
371 // compile with default flags.
372 //
373 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)374 RegexPattern::compile(const UnicodeString ®ex,
375 UParseError &pe,
376 UErrorCode &err)
377 {
378 return compile(regex, 0, pe, err);
379 }
380
381
382 //
383 // compile with default flags, UText mode
384 //
385 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)386 RegexPattern::compile(UText *regex,
387 UParseError &pe,
388 UErrorCode &err)
389 {
390 return compile(regex, 0, pe, err);
391 }
392
393
394 //
395 // compile with no UParseErr parameter.
396 //
397 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)398 RegexPattern::compile(const UnicodeString ®ex,
399 uint32_t flags,
400 UErrorCode &err)
401 {
402 UParseError pe;
403 return compile(regex, flags, pe, err);
404 }
405
406
407 //
408 // compile with no UParseErr parameter, UText mode
409 //
410 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)411 RegexPattern::compile(UText *regex,
412 uint32_t flags,
413 UErrorCode &err)
414 {
415 UParseError pe;
416 return compile(regex, flags, pe, err);
417 }
418
419
420 //---------------------------------------------------------------------
421 //
422 // flags
423 //
424 //---------------------------------------------------------------------
flags() const425 uint32_t RegexPattern::flags() const {
426 return fFlags;
427 }
428
429
430 //---------------------------------------------------------------------
431 //
432 // matcher(UnicodeString, err)
433 //
434 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const435 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436 UErrorCode &status) const {
437 RegexMatcher *retMatcher = matcher(status);
438 if (retMatcher != NULL) {
439 retMatcher->fDeferredStatus = status;
440 retMatcher->reset(input);
441 }
442 return retMatcher;
443 }
444
445 //
446 // matcher, UText mode
447 //
matcher(UText * input,PatternIsUTextFlag,UErrorCode & status) const448 RegexMatcher *RegexPattern::matcher(UText *input,
449 PatternIsUTextFlag /*flag*/,
450 UErrorCode &status) const {
451 RegexMatcher *retMatcher = matcher(status);
452 if (retMatcher != NULL) {
453 retMatcher->fDeferredStatus = status;
454 retMatcher->reset(input);
455 }
456 return retMatcher;
457 }
458
459 #if 0
460 RegexMatcher *RegexPattern::matcher(const UChar * /*input*/,
461 UErrorCode &status) const
462 {
463 /* This should never get called. The API with UnicodeString should be called instead. */
464 if (U_SUCCESS(status)) {
465 status = U_UNSUPPORTED_ERROR;
466 }
467 return NULL;
468 }
469 #endif
470
471 //---------------------------------------------------------------------
472 //
473 // matcher(status)
474 //
475 //---------------------------------------------------------------------
matcher(UErrorCode & status) const476 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
477 RegexMatcher *retMatcher = NULL;
478
479 if (U_FAILURE(status)) {
480 return NULL;
481 }
482 if (U_FAILURE(fDeferredStatus)) {
483 status = fDeferredStatus;
484 return NULL;
485 }
486
487 retMatcher = new RegexMatcher(this);
488 if (retMatcher == NULL) {
489 status = U_MEMORY_ALLOCATION_ERROR;
490 return NULL;
491 }
492 return retMatcher;
493 }
494
495
496
497 //---------------------------------------------------------------------
498 //
499 // matches Convenience function to test for a match, starting
500 // with a pattern string and a data string.
501 //
502 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)503 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
504 const UnicodeString &input,
505 UParseError &pe,
506 UErrorCode &status) {
507
508 if (U_FAILURE(status)) {return FALSE;}
509
510 UBool retVal;
511 RegexPattern *pat = NULL;
512 RegexMatcher *matcher = NULL;
513
514 pat = RegexPattern::compile(regex, 0, pe, status);
515 matcher = pat->matcher(input, status);
516 retVal = matcher->matches(status);
517
518 delete matcher;
519 delete pat;
520 return retVal;
521 }
522
523
524 //
525 // matches, UText mode
526 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)527 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
528 UText *input,
529 UParseError &pe,
530 UErrorCode &status) {
531
532 if (U_FAILURE(status)) {return FALSE;}
533
534 UBool retVal;
535 RegexPattern *pat = NULL;
536 RegexMatcher *matcher = NULL;
537
538 pat = RegexPattern::compile(regex, 0, pe, status);
539 matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
540 retVal = matcher->matches(status);
541
542 delete matcher;
543 delete pat;
544 return retVal;
545 }
546
547
548
549
550
551 //---------------------------------------------------------------------
552 //
553 // pattern
554 //
555 //---------------------------------------------------------------------
pattern() const556 UnicodeString RegexPattern::pattern() const {
557 if (fPatternString != NULL) {
558 return *fPatternString;
559 } else if (fPattern == NULL) {
560 return UnicodeString();
561 } else {
562 UErrorCode status = U_ZERO_ERROR;
563 int64_t nativeLen = utext_nativeLength(fPattern);
564 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
565 UnicodeString result;
566
567 status = U_ZERO_ERROR;
568 UChar *resultChars = result.getBuffer(len16);
569 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
570 result.releaseBuffer(len16);
571
572 return result;
573 }
574 }
575
576
577
578
579 //---------------------------------------------------------------------
580 //
581 // patternText
582 //
583 //---------------------------------------------------------------------
patternText(UErrorCode & status) const584 UText *RegexPattern::patternText(UErrorCode &status) const {
585 if (U_FAILURE(status)) {return NULL;}
586 status = U_ZERO_ERROR;
587
588 if (fPattern != NULL) {
589 return fPattern;
590 } else {
591 RegexStaticSets::initGlobals(&status);
592 return RegexStaticSets::gStaticSets->fEmptyText;
593 }
594 }
595
596
597
598 //---------------------------------------------------------------------
599 //
600 // split
601 //
602 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const603 int32_t RegexPattern::split(const UnicodeString &input,
604 UnicodeString dest[],
605 int32_t destCapacity,
606 UErrorCode &status) const
607 {
608 if (U_FAILURE(status)) {
609 return 0;
610 };
611
612 RegexMatcher m(this);
613 int32_t r = 0;
614 // Check m's status to make sure all is ok.
615 if (U_SUCCESS(m.fDeferredStatus)) {
616 r = m.split(input, dest, destCapacity, status);
617 }
618 return r;
619 }
620
621 //
622 // split, UText mode
623 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const624 int32_t RegexPattern::split(UText *input,
625 UText *dest[],
626 int32_t destCapacity,
627 UErrorCode &status) const
628 {
629 if (U_FAILURE(status)) {
630 return 0;
631 };
632
633 RegexMatcher m(this);
634 int32_t r = 0;
635 // Check m's status to make sure all is ok.
636 if (U_SUCCESS(m.fDeferredStatus)) {
637 r = m.split(input, dest, destCapacity, status);
638 }
639 return r;
640 }
641
642
643
644 //---------------------------------------------------------------------
645 //
646 // dump Output the compiled form of the pattern.
647 // Debugging function only.
648 //
649 //---------------------------------------------------------------------
650 #if defined(REGEX_DEBUG)
dumpOp(int32_t index) const651 void RegexPattern::dumpOp(int32_t index) const {
652 static const char * const opNames[] = {URX_OPCODE_NAMES};
653 int32_t op = fCompiledPat->elementAti(index);
654 int32_t val = URX_VAL(op);
655 int32_t type = URX_TYPE(op);
656 int32_t pinnedType = type;
657 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
658 pinnedType = 0;
659 }
660
661 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
662 switch (type) {
663 case URX_NOP:
664 case URX_DOTANY:
665 case URX_DOTANY_ALL:
666 case URX_FAIL:
667 case URX_CARET:
668 case URX_DOLLAR:
669 case URX_BACKSLASH_G:
670 case URX_BACKSLASH_X:
671 case URX_END:
672 case URX_DOLLAR_M:
673 case URX_CARET_M:
674 // Types with no operand field of interest.
675 break;
676
677 case URX_RESERVED_OP:
678 case URX_START_CAPTURE:
679 case URX_END_CAPTURE:
680 case URX_STATE_SAVE:
681 case URX_JMP:
682 case URX_JMP_SAV:
683 case URX_JMP_SAV_X:
684 case URX_BACKSLASH_B:
685 case URX_BACKSLASH_BU:
686 case URX_BACKSLASH_D:
687 case URX_BACKSLASH_Z:
688 case URX_STRING_LEN:
689 case URX_CTR_INIT:
690 case URX_CTR_INIT_NG:
691 case URX_CTR_LOOP:
692 case URX_CTR_LOOP_NG:
693 case URX_RELOC_OPRND:
694 case URX_STO_SP:
695 case URX_LD_SP:
696 case URX_BACKREF:
697 case URX_STO_INP_LOC:
698 case URX_JMPX:
699 case URX_LA_START:
700 case URX_LA_END:
701 case URX_BACKREF_I:
702 case URX_LB_START:
703 case URX_LB_CONT:
704 case URX_LB_END:
705 case URX_LBN_CONT:
706 case URX_LBN_END:
707 case URX_LOOP_C:
708 case URX_LOOP_DOT_I:
709 // types with an integer operand field.
710 REGEX_DUMP_DEBUG_PRINTF(("%d", val));
711 break;
712
713 case URX_ONECHAR:
714 case URX_ONECHAR_I:
715 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
716 break;
717
718 case URX_STRING:
719 case URX_STRING_I:
720 {
721 int32_t lengthOp = fCompiledPat->elementAti(index+1);
722 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
723 int32_t length = URX_VAL(lengthOp);
724 int32_t i;
725 for (i=val; i<val+length; i++) {
726 UChar c = fLiteralText[i];
727 if (c < 32 || c >= 256) {c = '.';}
728 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
729 }
730 }
731 break;
732
733 case URX_SETREF:
734 case URX_LOOP_SR_I:
735 {
736 UnicodeString s;
737 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
738 set->toPattern(s, TRUE);
739 for (int32_t i=0; i<s.length(); i++) {
740 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
741 }
742 }
743 break;
744
745 case URX_STATIC_SETREF:
746 case URX_STAT_SETREF_N:
747 {
748 UnicodeString s;
749 if (val & URX_NEG_SET) {
750 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
751 val &= ~URX_NEG_SET;
752 }
753 UnicodeSet *set = fStaticSets[val];
754 set->toPattern(s, TRUE);
755 for (int32_t i=0; i<s.length(); i++) {
756 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
757 }
758 }
759 break;
760
761
762 default:
763 REGEX_DUMP_DEBUG_PRINTF(("??????"));
764 break;
765 }
766 REGEX_DUMP_DEBUG_PRINTF(("\n"));
767 }
768 #endif
769
770
771 #if defined(REGEX_DEBUG)
772 U_CAPI void U_EXPORT2
RegexPatternDump(const RegexPattern * This)773 RegexPatternDump(const RegexPattern *This) {
774 int index;
775 int i;
776
777 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
778 UChar32 c = utext_next32From(This->fPattern, 0);
779 while (c != U_SENTINEL) {
780 if (c<32 || c>256) {
781 c = '.';
782 }
783 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
784
785 c = UTEXT_NEXT32(This->fPattern);
786 }
787 REGEX_DUMP_DEBUG_PRINTF(("\n"));
788 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
789 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
790 if (This->fStartType == START_STRING) {
791 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
792 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
793 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
794 }
795 REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
796
797 } else if (This->fStartType == START_SET) {
798 int32_t numSetChars = This->fInitialChars->size();
799 if (numSetChars > 20) {
800 numSetChars = 20;
801 }
802 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
803 for (i=0; i<numSetChars; i++) {
804 UChar32 c = This->fInitialChars->charAt(i);
805 if (0x20<c && c <0x7e) {
806 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
807 } else {
808 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
809 }
810 }
811 if (numSetChars < This->fInitialChars->size()) {
812 REGEX_DUMP_DEBUG_PRINTF((" ..."));
813 }
814 REGEX_DUMP_DEBUG_PRINTF(("\n"));
815
816 } else if (This->fStartType == START_CHAR) {
817 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
818 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
819 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
820 } else {
821 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
822 }
823 }
824
825 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
826 "-------------------------------------------\n"));
827 for (index = 0; index<This->fCompiledPat->size(); index++) {
828 This->dumpOp(index);
829 }
830 REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
831 }
832 #endif
833
834
835
836 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
837
838 U_NAMESPACE_END
839 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
840