1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // file: repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 * Copyright (C) 2002-2016 International Business Machines Corporation
9 * and others. All rights reserved.
10 ***************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29
30 U_NAMESPACE_BEGIN
31
32 //--------------------------------------------------------------------------
33 //
34 // RegexPattern Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
40 }
41
42
43 //--------------------------------------------------------------------------
44 //
45 // Copy Constructor Note: This is a rather inefficient implementation,
46 // but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
50 init();
51 *this = other;
52 }
53
54
55
56 //--------------------------------------------------------------------------
57 //
58 // Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == NULL) {
81 fPatternString = NULL;
82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
85 if (fPatternString == NULL) {
86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87 } else {
88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89 }
90 }
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
97 fMinMatchLen = other.fMinMatchLen;
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
100 fStaticSets = other.fStaticSets;
101 fStaticSets8 = other.fStaticSets8;
102
103 fStartType = other.fStartType;
104 fInitialStringIdx = other.fInitialStringIdx;
105 fInitialStringLen = other.fInitialStringLen;
106 *fInitialChars = *other.fInitialChars;
107 fInitialChar = other.fInitialChar;
108 *fInitialChars8 = *other.fInitialChars8;
109 fNeedsAltInput = other.fNeedsAltInput;
110
111 // Copy the pattern. It's just values, nothing deep to copy.
112 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114
115 // Copy the Unicode Sets.
116 // Could be made more efficient if the sets were reference counted and shared,
117 // but I doubt that pattern copying will be particularly common.
118 // Note: init() already added an empty element zero to fSets
119 int32_t i;
120 int32_t numSets = other.fSets->size();
121 fSets8 = new Regex8BitSet[numSets];
122 if (fSets8 == NULL) {
123 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
126 for (i=1; i<numSets; i++) {
127 if (U_FAILURE(fDeferredStatus)) {
128 return *this;
129 }
130 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
132 if (newSet == NULL) {
133 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134 break;
135 }
136 fSets->addElement(newSet, fDeferredStatus);
137 fSets8[i] = other.fSets8[i];
138 }
139
140 // Copy the named capture group hash map.
141 if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
142 int32_t hashPos = UHASH_FIRST;
143 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
144 if (U_FAILURE(fDeferredStatus)) {
145 break;
146 }
147 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
148 UnicodeString *key = new UnicodeString(*name);
149 int32_t val = hashEl->value.integer;
150 if (key == NULL) {
151 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
152 } else {
153 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
154 }
155 }
156 }
157 return *this;
158 }
159
160
161 //--------------------------------------------------------------------------
162 //
163 // init Shared initialization for use by constructors.
164 // Bring an uninitialized RegexPattern up to a default state.
165 //
166 //--------------------------------------------------------------------------
init()167 void RegexPattern::init() {
168 fFlags = 0;
169 fCompiledPat = 0;
170 fLiteralText.remove();
171 fSets = NULL;
172 fSets8 = NULL;
173 fDeferredStatus = U_ZERO_ERROR;
174 fMinMatchLen = 0;
175 fFrameSize = 0;
176 fDataSize = 0;
177 fGroupMap = NULL;
178 fStaticSets = NULL;
179 fStaticSets8 = NULL;
180 fStartType = START_NO_INFO;
181 fInitialStringIdx = 0;
182 fInitialStringLen = 0;
183 fInitialChars = NULL;
184 fInitialChar = 0;
185 fInitialChars8 = NULL;
186 fNeedsAltInput = FALSE;
187 fNamedCaptureMap = NULL;
188
189 fPattern = NULL; // will be set later
190 fPatternString = NULL; // may be set later
191 fCompiledPat = new UVector64(fDeferredStatus);
192 fGroupMap = new UVector32(fDeferredStatus);
193 fSets = new UVector(fDeferredStatus);
194 fInitialChars = new UnicodeSet;
195 fInitialChars8 = new Regex8BitSet;
196 if (U_FAILURE(fDeferredStatus)) {
197 return;
198 }
199 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
200 fInitialChars == NULL || fInitialChars8 == NULL) {
201 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204
205 // Slot zero of the vector of sets is reserved. Fill it here.
206 fSets->addElement((int32_t)0, fDeferredStatus);
207 }
208
209
initNamedCaptureMap()210 bool RegexPattern::initNamedCaptureMap() {
211 if (fNamedCaptureMap) {
212 return true;
213 }
214 fNamedCaptureMap = uhash_openSize(uhash_hashUnicodeString, // Key hash function
215 uhash_compareUnicodeString, // Key comparator function
216 uhash_compareLong, // Value comparator function
217 7, // Initial table capacity
218 &fDeferredStatus);
219 if (U_FAILURE(fDeferredStatus)) {
220 return false;
221 }
222
223 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
224 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
225 return true;
226 }
227
228 //--------------------------------------------------------------------------
229 //
230 // zap Delete everything owned by this RegexPattern.
231 //
232 //--------------------------------------------------------------------------
zap()233 void RegexPattern::zap() {
234 delete fCompiledPat;
235 fCompiledPat = NULL;
236 int i;
237 for (i=1; i<fSets->size(); i++) {
238 UnicodeSet *s;
239 s = (UnicodeSet *)fSets->elementAt(i);
240 if (s != NULL) {
241 delete s;
242 }
243 }
244 delete fSets;
245 fSets = NULL;
246 delete[] fSets8;
247 fSets8 = NULL;
248 delete fGroupMap;
249 fGroupMap = NULL;
250 delete fInitialChars;
251 fInitialChars = NULL;
252 delete fInitialChars8;
253 fInitialChars8 = NULL;
254 if (fPattern != NULL) {
255 utext_close(fPattern);
256 fPattern = NULL;
257 }
258 if (fPatternString != NULL) {
259 delete fPatternString;
260 fPatternString = NULL;
261 }
262 if (fNamedCaptureMap != NULL) {
263 uhash_close(fNamedCaptureMap);
264 fNamedCaptureMap = NULL;
265 }
266 }
267
268
269 //--------------------------------------------------------------------------
270 //
271 // Destructor
272 //
273 //--------------------------------------------------------------------------
~RegexPattern()274 RegexPattern::~RegexPattern() {
275 zap();
276 }
277
278
279 //--------------------------------------------------------------------------
280 //
281 // Clone
282 //
283 //--------------------------------------------------------------------------
clone() const284 RegexPattern *RegexPattern::clone() const {
285 RegexPattern *copy = new RegexPattern(*this);
286 return copy;
287 }
288
289
290 //--------------------------------------------------------------------------
291 //
292 // operator == (comparison) Consider to patterns to be == if the
293 // pattern strings and the flags are the same.
294 // Note that pattern strings with the same
295 // characters can still be considered different.
296 //
297 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const298 UBool RegexPattern::operator ==(const RegexPattern &other) const {
299 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
300 if (this->fPatternString != NULL && other.fPatternString != NULL) {
301 return *(this->fPatternString) == *(other.fPatternString);
302 } else if (this->fPattern == NULL) {
303 if (other.fPattern == NULL) {
304 return TRUE;
305 }
306 } else if (other.fPattern != NULL) {
307 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
308 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
309 return utext_equals(this->fPattern, other.fPattern);
310 }
311 }
312 return FALSE;
313 }
314
315 //---------------------------------------------------------------------
316 //
317 // compile
318 //
319 //---------------------------------------------------------------------
320 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)321 RegexPattern::compile(const UnicodeString ®ex,
322 uint32_t flags,
323 UParseError &pe,
324 UErrorCode &status)
325 {
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329
330 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
332 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333
334 if ((flags & ~allFlags) != 0) {
335 status = U_REGEX_INVALID_FLAG;
336 return NULL;
337 }
338
339 if ((flags & UREGEX_CANON_EQ) != 0) {
340 status = U_REGEX_UNIMPLEMENTED;
341 return NULL;
342 }
343
344 RegexPattern *This = new RegexPattern;
345 if (This == NULL) {
346 status = U_MEMORY_ALLOCATION_ERROR;
347 return NULL;
348 }
349 if (U_FAILURE(This->fDeferredStatus)) {
350 status = This->fDeferredStatus;
351 delete This;
352 return NULL;
353 }
354 This->fFlags = flags;
355
356 RegexCompile compiler(This, status);
357 compiler.compile(regex, pe, status);
358
359 if (U_FAILURE(status)) {
360 delete This;
361 This = NULL;
362 }
363
364 return This;
365 }
366
367
368 //
369 // compile, UText mode
370 //
371 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)372 RegexPattern::compile(UText *regex,
373 uint32_t flags,
374 UParseError &pe,
375 UErrorCode &status)
376 {
377 if (U_FAILURE(status)) {
378 return NULL;
379 }
380
381 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
382 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
383 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
384
385 if ((flags & ~allFlags) != 0) {
386 status = U_REGEX_INVALID_FLAG;
387 return NULL;
388 }
389
390 if ((flags & UREGEX_CANON_EQ) != 0) {
391 status = U_REGEX_UNIMPLEMENTED;
392 return NULL;
393 }
394
395 RegexPattern *This = new RegexPattern;
396 if (This == NULL) {
397 status = U_MEMORY_ALLOCATION_ERROR;
398 return NULL;
399 }
400 if (U_FAILURE(This->fDeferredStatus)) {
401 status = This->fDeferredStatus;
402 delete This;
403 return NULL;
404 }
405 This->fFlags = flags;
406
407 RegexCompile compiler(This, status);
408 compiler.compile(regex, pe, status);
409
410 if (U_FAILURE(status)) {
411 delete This;
412 This = NULL;
413 }
414
415 return This;
416 }
417
418 //
419 // compile with default flags.
420 //
421 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)422 RegexPattern::compile(const UnicodeString ®ex,
423 UParseError &pe,
424 UErrorCode &err)
425 {
426 return compile(regex, 0, pe, err);
427 }
428
429
430 //
431 // compile with default flags, UText mode
432 //
433 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)434 RegexPattern::compile(UText *regex,
435 UParseError &pe,
436 UErrorCode &err)
437 {
438 return compile(regex, 0, pe, err);
439 }
440
441
442 //
443 // compile with no UParseErr parameter.
444 //
445 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)446 RegexPattern::compile(const UnicodeString ®ex,
447 uint32_t flags,
448 UErrorCode &err)
449 {
450 UParseError pe;
451 return compile(regex, flags, pe, err);
452 }
453
454
455 //
456 // compile with no UParseErr parameter, UText mode
457 //
458 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)459 RegexPattern::compile(UText *regex,
460 uint32_t flags,
461 UErrorCode &err)
462 {
463 UParseError pe;
464 return compile(regex, flags, pe, err);
465 }
466
467
468 //---------------------------------------------------------------------
469 //
470 // flags
471 //
472 //---------------------------------------------------------------------
flags() const473 uint32_t RegexPattern::flags() const {
474 return fFlags;
475 }
476
477
478 //---------------------------------------------------------------------
479 //
480 // matcher(UnicodeString, err)
481 //
482 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const483 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
484 UErrorCode &status) const {
485 RegexMatcher *retMatcher = matcher(status);
486 if (retMatcher != NULL) {
487 retMatcher->fDeferredStatus = status;
488 retMatcher->reset(input);
489 }
490 return retMatcher;
491 }
492
493
494 //---------------------------------------------------------------------
495 //
496 // matcher(status)
497 //
498 //---------------------------------------------------------------------
matcher(UErrorCode & status) const499 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
500 RegexMatcher *retMatcher = NULL;
501
502 if (U_FAILURE(status)) {
503 return NULL;
504 }
505 if (U_FAILURE(fDeferredStatus)) {
506 status = fDeferredStatus;
507 return NULL;
508 }
509
510 retMatcher = new RegexMatcher(this);
511 if (retMatcher == NULL) {
512 status = U_MEMORY_ALLOCATION_ERROR;
513 return NULL;
514 }
515 return retMatcher;
516 }
517
518
519
520 //---------------------------------------------------------------------
521 //
522 // matches Convenience function to test for a match, starting
523 // with a pattern string and a data string.
524 //
525 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)526 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
527 const UnicodeString &input,
528 UParseError &pe,
529 UErrorCode &status) {
530
531 if (U_FAILURE(status)) {return FALSE;}
532
533 UBool retVal;
534 RegexPattern *pat = NULL;
535 RegexMatcher *matcher = NULL;
536
537 pat = RegexPattern::compile(regex, 0, pe, status);
538 matcher = pat->matcher(input, status);
539 retVal = matcher->matches(status);
540
541 delete matcher;
542 delete pat;
543 return retVal;
544 }
545
546
547 //
548 // matches, UText mode
549 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)550 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
551 UText *input,
552 UParseError &pe,
553 UErrorCode &status) {
554
555 if (U_FAILURE(status)) {return FALSE;}
556
557 UBool retVal = FALSE;
558 RegexPattern *pat = NULL;
559 RegexMatcher *matcher = NULL;
560
561 pat = RegexPattern::compile(regex, 0, pe, status);
562 matcher = pat->matcher(status);
563 if (U_SUCCESS(status)) {
564 matcher->reset(input);
565 retVal = matcher->matches(status);
566 }
567
568 delete matcher;
569 delete pat;
570 return retVal;
571 }
572
573
574
575
576
577 //---------------------------------------------------------------------
578 //
579 // pattern
580 //
581 //---------------------------------------------------------------------
pattern() const582 UnicodeString RegexPattern::pattern() const {
583 if (fPatternString != NULL) {
584 return *fPatternString;
585 } else if (fPattern == NULL) {
586 return UnicodeString();
587 } else {
588 UErrorCode status = U_ZERO_ERROR;
589 int64_t nativeLen = utext_nativeLength(fPattern);
590 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
591 UnicodeString result;
592
593 status = U_ZERO_ERROR;
594 UChar *resultChars = result.getBuffer(len16);
595 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
596 result.releaseBuffer(len16);
597
598 return result;
599 }
600 }
601
602
603
604
605 //---------------------------------------------------------------------
606 //
607 // patternText
608 //
609 //---------------------------------------------------------------------
patternText(UErrorCode & status) const610 UText *RegexPattern::patternText(UErrorCode &status) const {
611 if (U_FAILURE(status)) {return NULL;}
612 status = U_ZERO_ERROR;
613
614 if (fPattern != NULL) {
615 return fPattern;
616 } else {
617 RegexStaticSets::initGlobals(&status);
618 return RegexStaticSets::gStaticSets->fEmptyText;
619 }
620 }
621
622
623 //--------------------------------------------------------------------------------
624 //
625 // groupNumberFromName()
626 //
627 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const628 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
629 if (U_FAILURE(status)) {
630 return 0;
631 }
632
633 // No need to explicitly check for syntactically valid names.
634 // Invalid ones will never be in the map, and the lookup will fail.
635
636 int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
637 if (number == 0) {
638 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
639 }
640 return number;
641 }
642
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const643 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
644 if (U_FAILURE(status)) {
645 return 0;
646 }
647 UnicodeString name(groupName, nameLength, US_INV);
648 return groupNumberFromName(name, status);
649 }
650
651
652 //---------------------------------------------------------------------
653 //
654 // split
655 //
656 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const657 int32_t RegexPattern::split(const UnicodeString &input,
658 UnicodeString dest[],
659 int32_t destCapacity,
660 UErrorCode &status) const
661 {
662 if (U_FAILURE(status)) {
663 return 0;
664 }
665
666 RegexMatcher m(this);
667 int32_t r = 0;
668 // Check m's status to make sure all is ok.
669 if (U_SUCCESS(m.fDeferredStatus)) {
670 r = m.split(input, dest, destCapacity, status);
671 }
672 return r;
673 }
674
675 //
676 // split, UText mode
677 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const678 int32_t RegexPattern::split(UText *input,
679 UText *dest[],
680 int32_t destCapacity,
681 UErrorCode &status) const
682 {
683 if (U_FAILURE(status)) {
684 return 0;
685 }
686
687 RegexMatcher m(this);
688 int32_t r = 0;
689 // Check m's status to make sure all is ok.
690 if (U_SUCCESS(m.fDeferredStatus)) {
691 r = m.split(input, dest, destCapacity, status);
692 }
693 return r;
694 }
695
696
697 //---------------------------------------------------------------------
698 //
699 // dump Output the compiled form of the pattern.
700 // Debugging function only.
701 //
702 //---------------------------------------------------------------------
dumpOp(int32_t index) const703 void RegexPattern::dumpOp(int32_t index) const {
704 (void)index; // Suppress warnings in non-debug build.
705 #if defined(REGEX_DEBUG)
706 static const char * const opNames[] = {URX_OPCODE_NAMES};
707 int32_t op = fCompiledPat->elementAti(index);
708 int32_t val = URX_VAL(op);
709 int32_t type = URX_TYPE(op);
710 int32_t pinnedType = type;
711 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
712 pinnedType = 0;
713 }
714
715 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
716 switch (type) {
717 case URX_NOP:
718 case URX_DOTANY:
719 case URX_DOTANY_ALL:
720 case URX_FAIL:
721 case URX_CARET:
722 case URX_DOLLAR:
723 case URX_BACKSLASH_G:
724 case URX_BACKSLASH_X:
725 case URX_END:
726 case URX_DOLLAR_M:
727 case URX_CARET_M:
728 // Types with no operand field of interest.
729 break;
730
731 case URX_RESERVED_OP:
732 case URX_START_CAPTURE:
733 case URX_END_CAPTURE:
734 case URX_STATE_SAVE:
735 case URX_JMP:
736 case URX_JMP_SAV:
737 case URX_JMP_SAV_X:
738 case URX_BACKSLASH_B:
739 case URX_BACKSLASH_BU:
740 case URX_BACKSLASH_D:
741 case URX_BACKSLASH_Z:
742 case URX_STRING_LEN:
743 case URX_CTR_INIT:
744 case URX_CTR_INIT_NG:
745 case URX_CTR_LOOP:
746 case URX_CTR_LOOP_NG:
747 case URX_RELOC_OPRND:
748 case URX_STO_SP:
749 case URX_LD_SP:
750 case URX_BACKREF:
751 case URX_STO_INP_LOC:
752 case URX_JMPX:
753 case URX_LA_START:
754 case URX_LA_END:
755 case URX_BACKREF_I:
756 case URX_LB_START:
757 case URX_LB_CONT:
758 case URX_LB_END:
759 case URX_LBN_CONT:
760 case URX_LBN_END:
761 case URX_LOOP_C:
762 case URX_LOOP_DOT_I:
763 case URX_BACKSLASH_H:
764 case URX_BACKSLASH_R:
765 case URX_BACKSLASH_V:
766 // types with an integer operand field.
767 printf("%d", val);
768 break;
769
770 case URX_ONECHAR:
771 case URX_ONECHAR_I:
772 if (val < 0x20) {
773 printf("%#x", val);
774 } else {
775 printf("'%s'", CStr(UnicodeString(val))());
776 }
777 break;
778
779 case URX_STRING:
780 case URX_STRING_I:
781 {
782 int32_t lengthOp = fCompiledPat->elementAti(index+1);
783 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
784 int32_t length = URX_VAL(lengthOp);
785 UnicodeString str(fLiteralText, val, length);
786 printf("%s", CStr(str)());
787 }
788 break;
789
790 case URX_SETREF:
791 case URX_LOOP_SR_I:
792 {
793 UnicodeString s;
794 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
795 set->toPattern(s, TRUE);
796 printf("%s", CStr(s)());
797 }
798 break;
799
800 case URX_STATIC_SETREF:
801 case URX_STAT_SETREF_N:
802 {
803 UnicodeString s;
804 if (val & URX_NEG_SET) {
805 printf("NOT ");
806 val &= ~URX_NEG_SET;
807 }
808 UnicodeSet *set = fStaticSets[val];
809 set->toPattern(s, TRUE);
810 printf("%s", CStr(s)());
811 }
812 break;
813
814
815 default:
816 printf("??????");
817 break;
818 }
819 printf("\n");
820 #endif
821 }
822
823
dumpPattern() const824 void RegexPattern::dumpPattern() const {
825 #if defined(REGEX_DEBUG)
826 int index;
827
828 UnicodeString patStr;
829 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
830 patStr.append(c);
831 }
832 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
833 printf(" Min Match Length: %d\n", fMinMatchLen);
834 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
835 if (fStartType == START_STRING) {
836 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
837 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
838 } else if (fStartType == START_SET) {
839 UnicodeString s;
840 fInitialChars->toPattern(s, TRUE);
841 printf(" Match First Chars: %s\n", CStr(s)());
842
843 } else if (fStartType == START_CHAR) {
844 printf(" First char of Match: ");
845 if (fInitialChar > 0x20) {
846 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
847 } else {
848 printf("%#x\n", fInitialChar);
849 }
850 }
851
852 printf("Named Capture Groups:\n");
853 if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
854 printf(" None\n");
855 } else {
856 int32_t pos = UHASH_FIRST;
857 const UHashElement *el = NULL;
858 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
859 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
860 int32_t number = el->value.integer;
861 printf(" %d\t%s\n", number, CStr(*name)());
862 }
863 }
864
865 printf("\nIndex Binary Type Operand\n" \
866 "-------------------------------------------\n");
867 for (index = 0; index<fCompiledPat->size(); index++) {
868 dumpOp(index);
869 }
870 printf("\n\n");
871 #endif
872 }
873
874
875
876 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
877
878 U_NAMESPACE_END
879 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
880