1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // file: repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 * Copyright (C) 2002-2016 International Business Machines Corporation
9 * and others. All rights reserved.
10 ***************************************************************************
11 */
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29
30 U_NAMESPACE_BEGIN
31
32 //--------------------------------------------------------------------------
33 //
34 // RegexPattern Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
40 }
41
42
43 //--------------------------------------------------------------------------
44 //
45 // Copy Constructor Note: This is a rather inefficient implementation,
46 // but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
50 init();
51 *this = other;
52 }
53
54
55
56 //--------------------------------------------------------------------------
57 //
58 // Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == nullptr) {
81 fPatternString = nullptr;
82 fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus);
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
85 if (fPatternString == nullptr) {
86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87 } else {
88 fPattern = utext_openConstUnicodeString(nullptr, fPatternString, &fDeferredStatus);
89 }
90 }
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
97 fMinMatchLen = other.fMinMatchLen;
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
100
101 fStartType = other.fStartType;
102 fInitialStringIdx = other.fInitialStringIdx;
103 fInitialStringLen = other.fInitialStringLen;
104 *fInitialChars = *other.fInitialChars;
105 fInitialChar = other.fInitialChar;
106 *fInitialChars8 = *other.fInitialChars8;
107 fNeedsAltInput = other.fNeedsAltInput;
108
109 // Copy the pattern. It's just values, nothing deep to copy.
110 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
113 // Copy the Unicode Sets.
114 // Could be made more efficient if the sets were reference counted and shared,
115 // but I doubt that pattern copying will be particularly common.
116 // Note: init() already added an empty element zero to fSets
117 int32_t i;
118 int32_t numSets = other.fSets->size();
119 fSets8 = new Regex8BitSet[numSets];
120 if (fSets8 == nullptr) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 return *this;
123 }
124 for (i=1; i<numSets; i++) {
125 if (U_FAILURE(fDeferredStatus)) {
126 return *this;
127 }
128 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
130 if (newSet == nullptr) {
131 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132 break;
133 }
134 fSets->addElement(newSet, fDeferredStatus);
135 fSets8[i] = other.fSets8[i];
136 }
137
138 // Copy the named capture group hash map.
139 if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
140 int32_t hashPos = UHASH_FIRST;
141 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
142 if (U_FAILURE(fDeferredStatus)) {
143 break;
144 }
145 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
146 UnicodeString *key = new UnicodeString(*name);
147 int32_t val = hashEl->value.integer;
148 if (key == nullptr) {
149 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
150 } else {
151 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
152 }
153 }
154 }
155 return *this;
156 }
157
158
159 //--------------------------------------------------------------------------
160 //
161 // init Shared initialization for use by constructors.
162 // Bring an uninitialized RegexPattern up to a default state.
163 //
164 //--------------------------------------------------------------------------
init()165 void RegexPattern::init() {
166 fFlags = 0;
167 fCompiledPat = nullptr;
168 fLiteralText.remove();
169 fSets = nullptr;
170 fSets8 = nullptr;
171 fDeferredStatus = U_ZERO_ERROR;
172 fMinMatchLen = 0;
173 fFrameSize = 0;
174 fDataSize = 0;
175 fGroupMap = nullptr;
176 fStartType = START_NO_INFO;
177 fInitialStringIdx = 0;
178 fInitialStringLen = 0;
179 fInitialChars = nullptr;
180 fInitialChar = 0;
181 fInitialChars8 = nullptr;
182 fNeedsAltInput = false;
183 fNamedCaptureMap = nullptr;
184
185 fPattern = nullptr; // will be set later
186 fPatternString = nullptr; // may be set later
187 fCompiledPat = new UVector64(fDeferredStatus);
188 fGroupMap = new UVector32(fDeferredStatus);
189 fSets = new UVector(fDeferredStatus);
190 fInitialChars = new UnicodeSet;
191 fInitialChars8 = new Regex8BitSet;
192 if (U_FAILURE(fDeferredStatus)) {
193 return;
194 }
195 if (fCompiledPat == nullptr || fGroupMap == nullptr || fSets == nullptr ||
196 fInitialChars == nullptr || fInitialChars8 == nullptr) {
197 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
198 return;
199 }
200
201 // Slot zero of the vector of sets is reserved. Fill it here.
202 fSets->addElement((int32_t)0, fDeferredStatus);
203 }
204
205
initNamedCaptureMap()206 bool RegexPattern::initNamedCaptureMap() {
207 if (fNamedCaptureMap) {
208 return true;
209 }
210 fNamedCaptureMap = uhash_openSize(uhash_hashUnicodeString, // Key hash function
211 uhash_compareUnicodeString, // Key comparator function
212 uhash_compareLong, // Value comparator function
213 7, // Initial table capacity
214 &fDeferredStatus);
215 if (U_FAILURE(fDeferredStatus)) {
216 return false;
217 }
218
219 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
220 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
221 return true;
222 }
223
224 //--------------------------------------------------------------------------
225 //
226 // zap Delete everything owned by this RegexPattern.
227 //
228 //--------------------------------------------------------------------------
zap()229 void RegexPattern::zap() {
230 delete fCompiledPat;
231 fCompiledPat = nullptr;
232 int i;
233 for (i=1; i<fSets->size(); i++) {
234 UnicodeSet *s;
235 s = (UnicodeSet *)fSets->elementAt(i);
236 delete s;
237 }
238 delete fSets;
239 fSets = nullptr;
240 delete[] fSets8;
241 fSets8 = nullptr;
242 delete fGroupMap;
243 fGroupMap = nullptr;
244 delete fInitialChars;
245 fInitialChars = nullptr;
246 delete fInitialChars8;
247 fInitialChars8 = nullptr;
248 if (fPattern != nullptr) {
249 utext_close(fPattern);
250 fPattern = nullptr;
251 }
252 if (fPatternString != nullptr) {
253 delete fPatternString;
254 fPatternString = nullptr;
255 }
256 if (fNamedCaptureMap != nullptr) {
257 uhash_close(fNamedCaptureMap);
258 fNamedCaptureMap = nullptr;
259 }
260 }
261
262
263 //--------------------------------------------------------------------------
264 //
265 // Destructor
266 //
267 //--------------------------------------------------------------------------
~RegexPattern()268 RegexPattern::~RegexPattern() {
269 zap();
270 }
271
272
273 //--------------------------------------------------------------------------
274 //
275 // Clone
276 //
277 //--------------------------------------------------------------------------
clone() const278 RegexPattern *RegexPattern::clone() const {
279 RegexPattern *copy = new RegexPattern(*this);
280 return copy;
281 }
282
283
284 //--------------------------------------------------------------------------
285 //
286 // operator == (comparison) Consider to patterns to be == if the
287 // pattern strings and the flags are the same.
288 // Note that pattern strings with the same
289 // characters can still be considered different.
290 //
291 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const292 bool RegexPattern::operator ==(const RegexPattern &other) const {
293 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
294 if (this->fPatternString != nullptr && other.fPatternString != nullptr) {
295 return *(this->fPatternString) == *(other.fPatternString);
296 } else if (this->fPattern == nullptr) {
297 if (other.fPattern == nullptr) {
298 return true;
299 }
300 } else if (other.fPattern != nullptr) {
301 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
302 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
303 return utext_equals(this->fPattern, other.fPattern);
304 }
305 }
306 return false;
307 }
308
309 //---------------------------------------------------------------------
310 //
311 // compile
312 //
313 //---------------------------------------------------------------------
314 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)315 RegexPattern::compile(const UnicodeString ®ex,
316 uint32_t flags,
317 UParseError &pe,
318 UErrorCode &status)
319 {
320 if (U_FAILURE(status)) {
321 return nullptr;
322 }
323
324 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
325 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
326 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
327
328 if ((flags & ~allFlags) != 0) {
329 status = U_REGEX_INVALID_FLAG;
330 return nullptr;
331 }
332
333 if ((flags & UREGEX_CANON_EQ) != 0) {
334 status = U_REGEX_UNIMPLEMENTED;
335 return nullptr;
336 }
337
338 RegexPattern *This = new RegexPattern;
339 if (This == nullptr) {
340 status = U_MEMORY_ALLOCATION_ERROR;
341 return nullptr;
342 }
343 if (U_FAILURE(This->fDeferredStatus)) {
344 status = This->fDeferredStatus;
345 delete This;
346 return nullptr;
347 }
348 This->fFlags = flags;
349
350 RegexCompile compiler(This, status);
351 compiler.compile(regex, pe, status);
352
353 if (U_FAILURE(status)) {
354 delete This;
355 This = nullptr;
356 }
357
358 return This;
359 }
360
361
362 //
363 // compile, UText mode
364 //
365 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)366 RegexPattern::compile(UText *regex,
367 uint32_t flags,
368 UParseError &pe,
369 UErrorCode &status)
370 {
371 if (U_FAILURE(status)) {
372 return nullptr;
373 }
374
375 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
376 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
377 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
378
379 if ((flags & ~allFlags) != 0) {
380 status = U_REGEX_INVALID_FLAG;
381 return nullptr;
382 }
383
384 if ((flags & UREGEX_CANON_EQ) != 0) {
385 status = U_REGEX_UNIMPLEMENTED;
386 return nullptr;
387 }
388
389 RegexPattern *This = new RegexPattern;
390 if (This == nullptr) {
391 status = U_MEMORY_ALLOCATION_ERROR;
392 return nullptr;
393 }
394 if (U_FAILURE(This->fDeferredStatus)) {
395 status = This->fDeferredStatus;
396 delete This;
397 return nullptr;
398 }
399 This->fFlags = flags;
400
401 RegexCompile compiler(This, status);
402 compiler.compile(regex, pe, status);
403
404 if (U_FAILURE(status)) {
405 delete This;
406 This = nullptr;
407 }
408
409 return This;
410 }
411
412 //
413 // compile with default flags.
414 //
415 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)416 RegexPattern::compile(const UnicodeString ®ex,
417 UParseError &pe,
418 UErrorCode &err)
419 {
420 return compile(regex, 0, pe, err);
421 }
422
423
424 //
425 // compile with default flags, UText mode
426 //
427 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)428 RegexPattern::compile(UText *regex,
429 UParseError &pe,
430 UErrorCode &err)
431 {
432 return compile(regex, 0, pe, err);
433 }
434
435
436 //
437 // compile with no UParseErr parameter.
438 //
439 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)440 RegexPattern::compile(const UnicodeString ®ex,
441 uint32_t flags,
442 UErrorCode &err)
443 {
444 UParseError pe;
445 return compile(regex, flags, pe, err);
446 }
447
448
449 //
450 // compile with no UParseErr parameter, UText mode
451 //
452 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)453 RegexPattern::compile(UText *regex,
454 uint32_t flags,
455 UErrorCode &err)
456 {
457 UParseError pe;
458 return compile(regex, flags, pe, err);
459 }
460
461
462 //---------------------------------------------------------------------
463 //
464 // flags
465 //
466 //---------------------------------------------------------------------
flags() const467 uint32_t RegexPattern::flags() const {
468 return fFlags;
469 }
470
471
472 //---------------------------------------------------------------------
473 //
474 // matcher(UnicodeString, err)
475 //
476 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const477 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
478 UErrorCode &status) const {
479 RegexMatcher *retMatcher = matcher(status);
480 if (retMatcher != nullptr) {
481 retMatcher->fDeferredStatus = status;
482 retMatcher->reset(input);
483 }
484 return retMatcher;
485 }
486
487
488 //---------------------------------------------------------------------
489 //
490 // matcher(status)
491 //
492 //---------------------------------------------------------------------
matcher(UErrorCode & status) const493 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
494 RegexMatcher *retMatcher = nullptr;
495
496 if (U_FAILURE(status)) {
497 return nullptr;
498 }
499 if (U_FAILURE(fDeferredStatus)) {
500 status = fDeferredStatus;
501 return nullptr;
502 }
503
504 retMatcher = new RegexMatcher(this);
505 if (retMatcher == nullptr) {
506 status = U_MEMORY_ALLOCATION_ERROR;
507 return nullptr;
508 }
509 return retMatcher;
510 }
511
512
513
514 //---------------------------------------------------------------------
515 //
516 // matches Convenience function to test for a match, starting
517 // with a pattern string and a data string.
518 //
519 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)520 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex,
521 const UnicodeString &input,
522 UParseError &pe,
523 UErrorCode &status) {
524
525 if (U_FAILURE(status)) {return false;}
526
527 UBool retVal;
528 RegexPattern *pat = nullptr;
529 RegexMatcher *matcher = nullptr;
530
531 pat = RegexPattern::compile(regex, 0, pe, status);
532 matcher = pat->matcher(input, status);
533 retVal = matcher->matches(status);
534
535 delete matcher;
536 delete pat;
537 return retVal;
538 }
539
540
541 //
542 // matches, UText mode
543 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)544 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
545 UText *input,
546 UParseError &pe,
547 UErrorCode &status) {
548
549 if (U_FAILURE(status)) {return false;}
550
551 UBool retVal = false;
552 RegexPattern *pat = nullptr;
553 RegexMatcher *matcher = nullptr;
554
555 pat = RegexPattern::compile(regex, 0, pe, status);
556 matcher = pat->matcher(status);
557 if (U_SUCCESS(status)) {
558 matcher->reset(input);
559 retVal = matcher->matches(status);
560 }
561
562 delete matcher;
563 delete pat;
564 return retVal;
565 }
566
567
568
569
570
571 //---------------------------------------------------------------------
572 //
573 // pattern
574 //
575 //---------------------------------------------------------------------
pattern() const576 UnicodeString RegexPattern::pattern() const {
577 if (fPatternString != nullptr) {
578 return *fPatternString;
579 } else if (fPattern == nullptr) {
580 return {};
581 } else {
582 UErrorCode status = U_ZERO_ERROR;
583 int64_t nativeLen = utext_nativeLength(fPattern);
584 int32_t len16 = utext_extract(fPattern, 0, nativeLen, nullptr, 0, &status); // buffer overflow error
585 UnicodeString result;
586
587 status = U_ZERO_ERROR;
588 char16_t *resultChars = result.getBuffer(len16);
589 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
590 result.releaseBuffer(len16);
591
592 return result;
593 }
594 }
595
596
597
598
599 //---------------------------------------------------------------------
600 //
601 // patternText
602 //
603 //---------------------------------------------------------------------
patternText(UErrorCode & status) const604 UText *RegexPattern::patternText(UErrorCode &status) const {
605 if (U_FAILURE(status)) {return nullptr;}
606 status = U_ZERO_ERROR;
607
608 if (fPattern != nullptr) {
609 return fPattern;
610 } else {
611 RegexStaticSets::initGlobals(&status);
612 return RegexStaticSets::gStaticSets->fEmptyText;
613 }
614 }
615
616
617 //--------------------------------------------------------------------------------
618 //
619 // groupNumberFromName()
620 //
621 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const622 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
623 if (U_FAILURE(status)) {
624 return 0;
625 }
626
627 // No need to explicitly check for syntactically valid names.
628 // Invalid ones will never be in the map, and the lookup will fail.
629
630 int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
631 if (number == 0) {
632 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
633 }
634 return number;
635 }
636
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const637 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
638 if (U_FAILURE(status)) {
639 return 0;
640 }
641 UnicodeString name(groupName, nameLength, US_INV);
642 return groupNumberFromName(name, status);
643 }
644
645
646 //---------------------------------------------------------------------
647 //
648 // split
649 //
650 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const651 int32_t RegexPattern::split(const UnicodeString &input,
652 UnicodeString dest[],
653 int32_t destCapacity,
654 UErrorCode &status) const
655 {
656 if (U_FAILURE(status)) {
657 return 0;
658 }
659
660 RegexMatcher m(this);
661 int32_t r = 0;
662 // Check m's status to make sure all is ok.
663 if (U_SUCCESS(m.fDeferredStatus)) {
664 r = m.split(input, dest, destCapacity, status);
665 }
666 return r;
667 }
668
669 //
670 // split, UText mode
671 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const672 int32_t RegexPattern::split(UText *input,
673 UText *dest[],
674 int32_t destCapacity,
675 UErrorCode &status) const
676 {
677 if (U_FAILURE(status)) {
678 return 0;
679 }
680
681 RegexMatcher m(this);
682 int32_t r = 0;
683 // Check m's status to make sure all is ok.
684 if (U_SUCCESS(m.fDeferredStatus)) {
685 r = m.split(input, dest, destCapacity, status);
686 }
687 return r;
688 }
689
690
691 //---------------------------------------------------------------------
692 //
693 // dump Output the compiled form of the pattern.
694 // Debugging function only.
695 //
696 //---------------------------------------------------------------------
dumpOp(int32_t index) const697 void RegexPattern::dumpOp(int32_t index) const {
698 (void)index; // Suppress warnings in non-debug build.
699 #if defined(REGEX_DEBUG)
700 static const char * const opNames[] = {URX_OPCODE_NAMES};
701 int32_t op = fCompiledPat->elementAti(index);
702 int32_t val = URX_VAL(op);
703 int32_t type = URX_TYPE(op);
704 int32_t pinnedType = type;
705 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
706 pinnedType = 0;
707 }
708
709 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
710 switch (type) {
711 case URX_NOP:
712 case URX_DOTANY:
713 case URX_DOTANY_ALL:
714 case URX_FAIL:
715 case URX_CARET:
716 case URX_DOLLAR:
717 case URX_BACKSLASH_G:
718 case URX_BACKSLASH_X:
719 case URX_END:
720 case URX_DOLLAR_M:
721 case URX_CARET_M:
722 // Types with no operand field of interest.
723 break;
724
725 case URX_RESERVED_OP:
726 case URX_START_CAPTURE:
727 case URX_END_CAPTURE:
728 case URX_STATE_SAVE:
729 case URX_JMP:
730 case URX_JMP_SAV:
731 case URX_JMP_SAV_X:
732 case URX_BACKSLASH_B:
733 case URX_BACKSLASH_BU:
734 case URX_BACKSLASH_D:
735 case URX_BACKSLASH_Z:
736 case URX_STRING_LEN:
737 case URX_CTR_INIT:
738 case URX_CTR_INIT_NG:
739 case URX_CTR_LOOP:
740 case URX_CTR_LOOP_NG:
741 case URX_RELOC_OPRND:
742 case URX_STO_SP:
743 case URX_LD_SP:
744 case URX_BACKREF:
745 case URX_STO_INP_LOC:
746 case URX_JMPX:
747 case URX_LA_START:
748 case URX_LA_END:
749 case URX_BACKREF_I:
750 case URX_LB_START:
751 case URX_LB_CONT:
752 case URX_LB_END:
753 case URX_LBN_CONT:
754 case URX_LBN_END:
755 case URX_LOOP_C:
756 case URX_LOOP_DOT_I:
757 case URX_BACKSLASH_H:
758 case URX_BACKSLASH_R:
759 case URX_BACKSLASH_V:
760 // types with an integer operand field.
761 printf("%d", val);
762 break;
763
764 case URX_ONECHAR:
765 case URX_ONECHAR_I:
766 if (val < 0x20) {
767 printf("%#x", val);
768 } else {
769 printf("'%s'", CStr(UnicodeString(val))());
770 }
771 break;
772
773 case URX_STRING:
774 case URX_STRING_I:
775 {
776 int32_t lengthOp = fCompiledPat->elementAti(index+1);
777 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
778 int32_t length = URX_VAL(lengthOp);
779 UnicodeString str(fLiteralText, val, length);
780 printf("%s", CStr(str)());
781 }
782 break;
783
784 case URX_SETREF:
785 case URX_LOOP_SR_I:
786 {
787 UnicodeString s;
788 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
789 set->toPattern(s, true);
790 printf("%s", CStr(s)());
791 }
792 break;
793
794 case URX_STATIC_SETREF:
795 case URX_STAT_SETREF_N:
796 {
797 UnicodeString s;
798 if (val & URX_NEG_SET) {
799 printf("NOT ");
800 val &= ~URX_NEG_SET;
801 }
802 UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
803 set.toPattern(s, true);
804 printf("%s", CStr(s)());
805 }
806 break;
807
808
809 default:
810 printf("??????");
811 break;
812 }
813 printf("\n");
814 #endif
815 }
816
817
dumpPattern() const818 void RegexPattern::dumpPattern() const {
819 #if defined(REGEX_DEBUG)
820 int index;
821
822 UnicodeString patStr;
823 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
824 patStr.append(c);
825 }
826 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
827 printf(" Min Match Length: %d\n", fMinMatchLen);
828 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
829 if (fStartType == START_STRING) {
830 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
831 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
832 } else if (fStartType == START_SET) {
833 UnicodeString s;
834 fInitialChars->toPattern(s, true);
835 printf(" Match First Chars: %s\n", CStr(s)());
836
837 } else if (fStartType == START_CHAR) {
838 printf(" First char of Match: ");
839 if (fInitialChar > 0x20) {
840 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
841 } else {
842 printf("%#x\n", fInitialChar);
843 }
844 }
845
846 printf("Named Capture Groups:\n");
847 if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
848 printf(" None\n");
849 } else {
850 int32_t pos = UHASH_FIRST;
851 const UHashElement *el = nullptr;
852 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
853 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
854 int32_t number = el->value.integer;
855 printf(" %d\t%s\n", number, CStr(*name)());
856 }
857 }
858
859 printf("\nIndex Binary Type Operand\n" \
860 "-------------------------------------------\n");
861 for (index = 0; index<fCompiledPat->size(); index++) {
862 dumpOp(index);
863 }
864 printf("\n\n");
865 #endif
866 }
867
868
869
870 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
871
872 U_NAMESPACE_END
873 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
874