1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // file: repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 * Copyright (C) 2002-2016 International Business Machines Corporation
9 * and others. All rights reserved.
10 ***************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29
30 U_NAMESPACE_BEGIN
31
32 //--------------------------------------------------------------------------
33 //
34 // RegexPattern Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
40 }
41
42
43 //--------------------------------------------------------------------------
44 //
45 // Copy Constructor Note: This is a rather inefficient implementation,
46 // but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
50 init();
51 *this = other;
52 }
53
54
55
56 //--------------------------------------------------------------------------
57 //
58 // Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == NULL) {
81 fPatternString = NULL;
82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
85 if (fPatternString == NULL) {
86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87 } else {
88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89 }
90 }
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
97 fMinMatchLen = other.fMinMatchLen;
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
100 fStaticSets = other.fStaticSets;
101 fStaticSets8 = other.fStaticSets8;
102
103 fStartType = other.fStartType;
104 fInitialStringIdx = other.fInitialStringIdx;
105 fInitialStringLen = other.fInitialStringLen;
106 *fInitialChars = *other.fInitialChars;
107 fInitialChar = other.fInitialChar;
108 *fInitialChars8 = *other.fInitialChars8;
109 fNeedsAltInput = other.fNeedsAltInput;
110
111 // Copy the pattern. It's just values, nothing deep to copy.
112 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114
115 // Copy the Unicode Sets.
116 // Could be made more efficient if the sets were reference counted and shared,
117 // but I doubt that pattern copying will be particularly common.
118 // Note: init() already added an empty element zero to fSets
119 int32_t i;
120 int32_t numSets = other.fSets->size();
121 fSets8 = new Regex8BitSet[numSets];
122 if (fSets8 == NULL) {
123 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
126 for (i=1; i<numSets; i++) {
127 if (U_FAILURE(fDeferredStatus)) {
128 return *this;
129 }
130 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
132 if (newSet == NULL) {
133 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134 break;
135 }
136 fSets->addElement(newSet, fDeferredStatus);
137 fSets8[i] = other.fSets8[i];
138 }
139
140 // Copy the named capture group hash map.
141 int32_t hashPos = UHASH_FIRST;
142 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
143 if (U_FAILURE(fDeferredStatus)) {
144 break;
145 }
146 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
147 UnicodeString *key = new UnicodeString(*name);
148 int32_t val = hashEl->value.integer;
149 if (key == NULL) {
150 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
151 } else {
152 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
153 }
154 }
155 return *this;
156 }
157
158
159 //--------------------------------------------------------------------------
160 //
161 // init Shared initialization for use by constructors.
162 // Bring an uninitialized RegexPattern up to a default state.
163 //
164 //--------------------------------------------------------------------------
init()165 void RegexPattern::init() {
166 fFlags = 0;
167 fCompiledPat = 0;
168 fLiteralText.remove();
169 fSets = NULL;
170 fSets8 = NULL;
171 fDeferredStatus = U_ZERO_ERROR;
172 fMinMatchLen = 0;
173 fFrameSize = 0;
174 fDataSize = 0;
175 fGroupMap = NULL;
176 fStaticSets = NULL;
177 fStaticSets8 = NULL;
178 fStartType = START_NO_INFO;
179 fInitialStringIdx = 0;
180 fInitialStringLen = 0;
181 fInitialChars = NULL;
182 fInitialChar = 0;
183 fInitialChars8 = NULL;
184 fNeedsAltInput = FALSE;
185 fNamedCaptureMap = NULL;
186
187 fPattern = NULL; // will be set later
188 fPatternString = NULL; // may be set later
189 fCompiledPat = new UVector64(fDeferredStatus);
190 fGroupMap = new UVector32(fDeferredStatus);
191 fSets = new UVector(fDeferredStatus);
192 fInitialChars = new UnicodeSet;
193 fInitialChars8 = new Regex8BitSet;
194 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function
195 uhash_compareUnicodeString, // Key comparator function
196 uhash_compareLong, // Value comparator function
197 &fDeferredStatus);
198 if (U_FAILURE(fDeferredStatus)) {
199 return;
200 }
201 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
202 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
203 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
204 return;
205 }
206
207 // Slot zero of the vector of sets is reserved. Fill it here.
208 fSets->addElement((int32_t)0, fDeferredStatus);
209
210 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
211 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
212 }
213
214
215 //--------------------------------------------------------------------------
216 //
217 // zap Delete everything owned by this RegexPattern.
218 //
219 //--------------------------------------------------------------------------
zap()220 void RegexPattern::zap() {
221 delete fCompiledPat;
222 fCompiledPat = NULL;
223 int i;
224 for (i=1; i<fSets->size(); i++) {
225 UnicodeSet *s;
226 s = (UnicodeSet *)fSets->elementAt(i);
227 if (s != NULL) {
228 delete s;
229 }
230 }
231 delete fSets;
232 fSets = NULL;
233 delete[] fSets8;
234 fSets8 = NULL;
235 delete fGroupMap;
236 fGroupMap = NULL;
237 delete fInitialChars;
238 fInitialChars = NULL;
239 delete fInitialChars8;
240 fInitialChars8 = NULL;
241 if (fPattern != NULL) {
242 utext_close(fPattern);
243 fPattern = NULL;
244 }
245 if (fPatternString != NULL) {
246 delete fPatternString;
247 fPatternString = NULL;
248 }
249 uhash_close(fNamedCaptureMap);
250 fNamedCaptureMap = NULL;
251 }
252
253
254 //--------------------------------------------------------------------------
255 //
256 // Destructor
257 //
258 //--------------------------------------------------------------------------
~RegexPattern()259 RegexPattern::~RegexPattern() {
260 zap();
261 }
262
263
264 //--------------------------------------------------------------------------
265 //
266 // Clone
267 //
268 //--------------------------------------------------------------------------
clone() const269 RegexPattern *RegexPattern::clone() const {
270 RegexPattern *copy = new RegexPattern(*this);
271 return copy;
272 }
273
274
275 //--------------------------------------------------------------------------
276 //
277 // operator == (comparison) Consider to patterns to be == if the
278 // pattern strings and the flags are the same.
279 // Note that pattern strings with the same
280 // characters can still be considered different.
281 //
282 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const283 UBool RegexPattern::operator ==(const RegexPattern &other) const {
284 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
285 if (this->fPatternString != NULL && other.fPatternString != NULL) {
286 return *(this->fPatternString) == *(other.fPatternString);
287 } else if (this->fPattern == NULL) {
288 if (other.fPattern == NULL) {
289 return TRUE;
290 }
291 } else if (other.fPattern != NULL) {
292 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
293 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
294 return utext_equals(this->fPattern, other.fPattern);
295 }
296 }
297 return FALSE;
298 }
299
300 //---------------------------------------------------------------------
301 //
302 // compile
303 //
304 //---------------------------------------------------------------------
305 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)306 RegexPattern::compile(const UnicodeString ®ex,
307 uint32_t flags,
308 UParseError &pe,
309 UErrorCode &status)
310 {
311 if (U_FAILURE(status)) {
312 return NULL;
313 }
314
315 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
316 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
317 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
318
319 if ((flags & ~allFlags) != 0) {
320 status = U_REGEX_INVALID_FLAG;
321 return NULL;
322 }
323
324 if ((flags & UREGEX_CANON_EQ) != 0) {
325 status = U_REGEX_UNIMPLEMENTED;
326 return NULL;
327 }
328
329 RegexPattern *This = new RegexPattern;
330 if (This == NULL) {
331 status = U_MEMORY_ALLOCATION_ERROR;
332 return NULL;
333 }
334 if (U_FAILURE(This->fDeferredStatus)) {
335 status = This->fDeferredStatus;
336 delete This;
337 return NULL;
338 }
339 This->fFlags = flags;
340
341 RegexCompile compiler(This, status);
342 compiler.compile(regex, pe, status);
343
344 if (U_FAILURE(status)) {
345 delete This;
346 This = NULL;
347 }
348
349 return This;
350 }
351
352
353 //
354 // compile, UText mode
355 //
356 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)357 RegexPattern::compile(UText *regex,
358 uint32_t flags,
359 UParseError &pe,
360 UErrorCode &status)
361 {
362 if (U_FAILURE(status)) {
363 return NULL;
364 }
365
366 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
367 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
368 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
369
370 if ((flags & ~allFlags) != 0) {
371 status = U_REGEX_INVALID_FLAG;
372 return NULL;
373 }
374
375 if ((flags & UREGEX_CANON_EQ) != 0) {
376 status = U_REGEX_UNIMPLEMENTED;
377 return NULL;
378 }
379
380 RegexPattern *This = new RegexPattern;
381 if (This == NULL) {
382 status = U_MEMORY_ALLOCATION_ERROR;
383 return NULL;
384 }
385 if (U_FAILURE(This->fDeferredStatus)) {
386 status = This->fDeferredStatus;
387 delete This;
388 return NULL;
389 }
390 This->fFlags = flags;
391
392 RegexCompile compiler(This, status);
393 compiler.compile(regex, pe, status);
394
395 if (U_FAILURE(status)) {
396 delete This;
397 This = NULL;
398 }
399
400 return This;
401 }
402
403 //
404 // compile with default flags.
405 //
406 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)407 RegexPattern::compile(const UnicodeString ®ex,
408 UParseError &pe,
409 UErrorCode &err)
410 {
411 return compile(regex, 0, pe, err);
412 }
413
414
415 //
416 // compile with default flags, UText mode
417 //
418 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)419 RegexPattern::compile(UText *regex,
420 UParseError &pe,
421 UErrorCode &err)
422 {
423 return compile(regex, 0, pe, err);
424 }
425
426
427 //
428 // compile with no UParseErr parameter.
429 //
430 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)431 RegexPattern::compile(const UnicodeString ®ex,
432 uint32_t flags,
433 UErrorCode &err)
434 {
435 UParseError pe;
436 return compile(regex, flags, pe, err);
437 }
438
439
440 //
441 // compile with no UParseErr parameter, UText mode
442 //
443 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)444 RegexPattern::compile(UText *regex,
445 uint32_t flags,
446 UErrorCode &err)
447 {
448 UParseError pe;
449 return compile(regex, flags, pe, err);
450 }
451
452
453 //---------------------------------------------------------------------
454 //
455 // flags
456 //
457 //---------------------------------------------------------------------
flags() const458 uint32_t RegexPattern::flags() const {
459 return fFlags;
460 }
461
462
463 //---------------------------------------------------------------------
464 //
465 // matcher(UnicodeString, err)
466 //
467 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const468 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
469 UErrorCode &status) const {
470 RegexMatcher *retMatcher = matcher(status);
471 if (retMatcher != NULL) {
472 retMatcher->fDeferredStatus = status;
473 retMatcher->reset(input);
474 }
475 return retMatcher;
476 }
477
478
479 //---------------------------------------------------------------------
480 //
481 // matcher(status)
482 //
483 //---------------------------------------------------------------------
matcher(UErrorCode & status) const484 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
485 RegexMatcher *retMatcher = NULL;
486
487 if (U_FAILURE(status)) {
488 return NULL;
489 }
490 if (U_FAILURE(fDeferredStatus)) {
491 status = fDeferredStatus;
492 return NULL;
493 }
494
495 retMatcher = new RegexMatcher(this);
496 if (retMatcher == NULL) {
497 status = U_MEMORY_ALLOCATION_ERROR;
498 return NULL;
499 }
500 return retMatcher;
501 }
502
503
504
505 //---------------------------------------------------------------------
506 //
507 // matches Convenience function to test for a match, starting
508 // with a pattern string and a data string.
509 //
510 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)511 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
512 const UnicodeString &input,
513 UParseError &pe,
514 UErrorCode &status) {
515
516 if (U_FAILURE(status)) {return FALSE;}
517
518 UBool retVal;
519 RegexPattern *pat = NULL;
520 RegexMatcher *matcher = NULL;
521
522 pat = RegexPattern::compile(regex, 0, pe, status);
523 matcher = pat->matcher(input, status);
524 retVal = matcher->matches(status);
525
526 delete matcher;
527 delete pat;
528 return retVal;
529 }
530
531
532 //
533 // matches, UText mode
534 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)535 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
536 UText *input,
537 UParseError &pe,
538 UErrorCode &status) {
539
540 if (U_FAILURE(status)) {return FALSE;}
541
542 UBool retVal = FALSE;
543 RegexPattern *pat = NULL;
544 RegexMatcher *matcher = NULL;
545
546 pat = RegexPattern::compile(regex, 0, pe, status);
547 matcher = pat->matcher(status);
548 if (U_SUCCESS(status)) {
549 matcher->reset(input);
550 retVal = matcher->matches(status);
551 }
552
553 delete matcher;
554 delete pat;
555 return retVal;
556 }
557
558
559
560
561
562 //---------------------------------------------------------------------
563 //
564 // pattern
565 //
566 //---------------------------------------------------------------------
pattern() const567 UnicodeString RegexPattern::pattern() const {
568 if (fPatternString != NULL) {
569 return *fPatternString;
570 } else if (fPattern == NULL) {
571 return UnicodeString();
572 } else {
573 UErrorCode status = U_ZERO_ERROR;
574 int64_t nativeLen = utext_nativeLength(fPattern);
575 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
576 UnicodeString result;
577
578 status = U_ZERO_ERROR;
579 UChar *resultChars = result.getBuffer(len16);
580 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
581 result.releaseBuffer(len16);
582
583 return result;
584 }
585 }
586
587
588
589
590 //---------------------------------------------------------------------
591 //
592 // patternText
593 //
594 //---------------------------------------------------------------------
patternText(UErrorCode & status) const595 UText *RegexPattern::patternText(UErrorCode &status) const {
596 if (U_FAILURE(status)) {return NULL;}
597 status = U_ZERO_ERROR;
598
599 if (fPattern != NULL) {
600 return fPattern;
601 } else {
602 RegexStaticSets::initGlobals(&status);
603 return RegexStaticSets::gStaticSets->fEmptyText;
604 }
605 }
606
607
608 //--------------------------------------------------------------------------------
609 //
610 // groupNumberFromName()
611 //
612 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const613 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
614 if (U_FAILURE(status)) {
615 return 0;
616 }
617
618 // No need to explicitly check for syntactically valid names.
619 // Invalid ones will never be in the map, and the lookup will fail.
620
621 int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
622 if (number == 0) {
623 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
624 }
625 return number;
626 }
627
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const628 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
629 if (U_FAILURE(status)) {
630 return 0;
631 }
632 UnicodeString name(groupName, nameLength, US_INV);
633 return groupNumberFromName(name, status);
634 }
635
636
637 //---------------------------------------------------------------------
638 //
639 // split
640 //
641 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const642 int32_t RegexPattern::split(const UnicodeString &input,
643 UnicodeString dest[],
644 int32_t destCapacity,
645 UErrorCode &status) const
646 {
647 if (U_FAILURE(status)) {
648 return 0;
649 };
650
651 RegexMatcher m(this);
652 int32_t r = 0;
653 // Check m's status to make sure all is ok.
654 if (U_SUCCESS(m.fDeferredStatus)) {
655 r = m.split(input, dest, destCapacity, status);
656 }
657 return r;
658 }
659
660 //
661 // split, UText mode
662 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const663 int32_t RegexPattern::split(UText *input,
664 UText *dest[],
665 int32_t destCapacity,
666 UErrorCode &status) const
667 {
668 if (U_FAILURE(status)) {
669 return 0;
670 };
671
672 RegexMatcher m(this);
673 int32_t r = 0;
674 // Check m's status to make sure all is ok.
675 if (U_SUCCESS(m.fDeferredStatus)) {
676 r = m.split(input, dest, destCapacity, status);
677 }
678 return r;
679 }
680
681
682 //---------------------------------------------------------------------
683 //
684 // dump Output the compiled form of the pattern.
685 // Debugging function only.
686 //
687 //---------------------------------------------------------------------
dumpOp(int32_t index) const688 void RegexPattern::dumpOp(int32_t index) const {
689 (void)index; // Suppress warnings in non-debug build.
690 #if defined(REGEX_DEBUG)
691 static const char * const opNames[] = {URX_OPCODE_NAMES};
692 int32_t op = fCompiledPat->elementAti(index);
693 int32_t val = URX_VAL(op);
694 int32_t type = URX_TYPE(op);
695 int32_t pinnedType = type;
696 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
697 pinnedType = 0;
698 }
699
700 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
701 switch (type) {
702 case URX_NOP:
703 case URX_DOTANY:
704 case URX_DOTANY_ALL:
705 case URX_FAIL:
706 case URX_CARET:
707 case URX_DOLLAR:
708 case URX_BACKSLASH_G:
709 case URX_BACKSLASH_X:
710 case URX_END:
711 case URX_DOLLAR_M:
712 case URX_CARET_M:
713 // Types with no operand field of interest.
714 break;
715
716 case URX_RESERVED_OP:
717 case URX_START_CAPTURE:
718 case URX_END_CAPTURE:
719 case URX_STATE_SAVE:
720 case URX_JMP:
721 case URX_JMP_SAV:
722 case URX_JMP_SAV_X:
723 case URX_BACKSLASH_B:
724 case URX_BACKSLASH_BU:
725 case URX_BACKSLASH_D:
726 case URX_BACKSLASH_Z:
727 case URX_STRING_LEN:
728 case URX_CTR_INIT:
729 case URX_CTR_INIT_NG:
730 case URX_CTR_LOOP:
731 case URX_CTR_LOOP_NG:
732 case URX_RELOC_OPRND:
733 case URX_STO_SP:
734 case URX_LD_SP:
735 case URX_BACKREF:
736 case URX_STO_INP_LOC:
737 case URX_JMPX:
738 case URX_LA_START:
739 case URX_LA_END:
740 case URX_BACKREF_I:
741 case URX_LB_START:
742 case URX_LB_CONT:
743 case URX_LB_END:
744 case URX_LBN_CONT:
745 case URX_LBN_END:
746 case URX_LOOP_C:
747 case URX_LOOP_DOT_I:
748 case URX_BACKSLASH_H:
749 case URX_BACKSLASH_R:
750 case URX_BACKSLASH_V:
751 // types with an integer operand field.
752 printf("%d", val);
753 break;
754
755 case URX_ONECHAR:
756 case URX_ONECHAR_I:
757 if (val < 0x20) {
758 printf("%#x", val);
759 } else {
760 printf("'%s'", CStr(UnicodeString(val))());
761 }
762 break;
763
764 case URX_STRING:
765 case URX_STRING_I:
766 {
767 int32_t lengthOp = fCompiledPat->elementAti(index+1);
768 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
769 int32_t length = URX_VAL(lengthOp);
770 UnicodeString str(fLiteralText, val, length);
771 printf("%s", CStr(str)());
772 }
773 break;
774
775 case URX_SETREF:
776 case URX_LOOP_SR_I:
777 {
778 UnicodeString s;
779 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
780 set->toPattern(s, TRUE);
781 printf("%s", CStr(s)());
782 }
783 break;
784
785 case URX_STATIC_SETREF:
786 case URX_STAT_SETREF_N:
787 {
788 UnicodeString s;
789 if (val & URX_NEG_SET) {
790 printf("NOT ");
791 val &= ~URX_NEG_SET;
792 }
793 UnicodeSet *set = fStaticSets[val];
794 set->toPattern(s, TRUE);
795 printf("%s", CStr(s)());
796 }
797 break;
798
799
800 default:
801 printf("??????");
802 break;
803 }
804 printf("\n");
805 #endif
806 }
807
808
dumpPattern() const809 void RegexPattern::dumpPattern() const {
810 #if defined(REGEX_DEBUG)
811 int index;
812
813 UnicodeString patStr;
814 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
815 patStr.append(c);
816 }
817 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
818 printf(" Min Match Length: %d\n", fMinMatchLen);
819 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
820 if (fStartType == START_STRING) {
821 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
822 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
823 } else if (fStartType == START_SET) {
824 UnicodeString s;
825 fInitialChars->toPattern(s, TRUE);
826 printf(" Match First Chars: %s\n", CStr(s)());
827
828 } else if (fStartType == START_CHAR) {
829 printf(" First char of Match: ");
830 if (fInitialChar > 0x20) {
831 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
832 } else {
833 printf("%#x\n", fInitialChar);
834 }
835 }
836
837 printf("Named Capture Groups:\n");
838 if (uhash_count(fNamedCaptureMap) == 0) {
839 printf(" None\n");
840 } else {
841 int32_t pos = UHASH_FIRST;
842 const UHashElement *el = NULL;
843 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
844 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
845 int32_t number = el->value.integer;
846 printf(" %d\t%s\n", number, CStr(*name)());
847 }
848 }
849
850 printf("\nIndex Binary Type Operand\n" \
851 "-------------------------------------------\n");
852 for (index = 0; index<fCompiledPat->size(); index++) {
853 dumpOp(index);
854 }
855 printf("\n\n");
856 #endif
857 }
858
859
860
861 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
862
863 U_NAMESPACE_END
864 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
865