• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  file:  repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2016 International Business Machines Corporation
9 *   and others. All rights reserved.
10 ***************************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16 
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29 
30 U_NAMESPACE_BEGIN
31 
32 //--------------------------------------------------------------------------
33 //
34 //    RegexPattern    Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38     // Init all of this instances data.
39     init();
40 }
41 
42 
43 //--------------------------------------------------------------------------
44 //
45 //   Copy Constructor        Note:  This is a rather inefficient implementation,
46 //                                  but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50     init();
51     *this = other;
52 }
53 
54 
55 
56 //--------------------------------------------------------------------------
57 //
58 //    Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62     if (this == &other) {
63         // Source and destination are the same.  Don't do anything.
64         return *this;
65     }
66 
67     // Clean out any previous contents of object being assigned to.
68     zap();
69 
70     // Give target object a default initialization
71     init();
72 
73     // Copy simple fields
74     fDeferredStatus   = other.fDeferredStatus;
75 
76     if (U_FAILURE(fDeferredStatus)) {
77         return *this;
78     }
79 
80     if (other.fPatternString == nullptr) {
81         fPatternString = nullptr;
82         fPattern = utext_clone(fPattern, other.fPattern, false, true, &fDeferredStatus);
83     } else {
84         fPatternString = new UnicodeString(*(other.fPatternString));
85         if (fPatternString == nullptr) {
86             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87         } else {
88             fPattern = utext_openConstUnicodeString(nullptr, fPatternString, &fDeferredStatus);
89         }
90     }
91     if (U_FAILURE(fDeferredStatus)) {
92         return *this;
93     }
94 
95     fFlags            = other.fFlags;
96     fLiteralText      = other.fLiteralText;
97     fMinMatchLen      = other.fMinMatchLen;
98     fFrameSize        = other.fFrameSize;
99     fDataSize         = other.fDataSize;
100 
101     fStartType        = other.fStartType;
102     fInitialStringIdx = other.fInitialStringIdx;
103     fInitialStringLen = other.fInitialStringLen;
104     *fInitialChars    = *other.fInitialChars;
105     fInitialChar      = other.fInitialChar;
106     *fInitialChars8   = *other.fInitialChars8;
107     fNeedsAltInput    = other.fNeedsAltInput;
108 
109     //  Copy the pattern.  It's just values, nothing deep to copy.
110     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112 
113     //  Copy the Unicode Sets.
114     //    Could be made more efficient if the sets were reference counted and shared,
115     //    but I doubt that pattern copying will be particularly common.
116     //    Note:  init() already added an empty element zero to fSets
117     int32_t i;
118     int32_t  numSets = other.fSets->size();
119     fSets8 = new Regex8BitSet[numSets];
120     if (fSets8 == nullptr) {
121     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122     	return *this;
123     }
124     for (i=1; i<numSets; i++) {
125         if (U_FAILURE(fDeferredStatus)) {
126             return *this;
127         }
128         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
130         if (newSet == nullptr) {
131             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132             break;
133         }
134         fSets->addElement(newSet, fDeferredStatus);
135         fSets8[i] = other.fSets8[i];
136     }
137 
138     // Copy the named capture group hash map.
139     if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
140         int32_t hashPos = UHASH_FIRST;
141         while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
142             if (U_FAILURE(fDeferredStatus)) {
143                 break;
144             }
145             const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
146             UnicodeString *key = new UnicodeString(*name);
147             int32_t val = hashEl->value.integer;
148             if (key == nullptr) {
149                 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
150             } else {
151                 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
152             }
153         }
154     }
155     return *this;
156 }
157 
158 
159 //--------------------------------------------------------------------------
160 //
161 //    init        Shared initialization for use by constructors.
162 //                Bring an uninitialized RegexPattern up to a default state.
163 //
164 //--------------------------------------------------------------------------
init()165 void RegexPattern::init() {
166     fFlags            = 0;
167     fCompiledPat      = nullptr;
168     fLiteralText.remove();
169     fSets             = nullptr;
170     fSets8            = nullptr;
171     fDeferredStatus   = U_ZERO_ERROR;
172     fMinMatchLen      = 0;
173     fFrameSize        = 0;
174     fDataSize         = 0;
175     fGroupMap         = nullptr;
176     fStartType        = START_NO_INFO;
177     fInitialStringIdx = 0;
178     fInitialStringLen = 0;
179     fInitialChars     = nullptr;
180     fInitialChar      = 0;
181     fInitialChars8    = nullptr;
182     fNeedsAltInput    = false;
183     fNamedCaptureMap  = nullptr;
184 
185     fPattern          = nullptr; // will be set later
186     fPatternString    = nullptr; // may be set later
187     fCompiledPat      = new UVector64(fDeferredStatus);
188     fGroupMap         = new UVector32(fDeferredStatus);
189     fSets             = new UVector(fDeferredStatus);
190     fInitialChars     = new UnicodeSet;
191     fInitialChars8    = new Regex8BitSet;
192     if (U_FAILURE(fDeferredStatus)) {
193         return;
194     }
195     if (fCompiledPat == nullptr  || fGroupMap == nullptr || fSets == nullptr ||
196             fInitialChars == nullptr || fInitialChars8 == nullptr) {
197         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
198         return;
199     }
200 
201     // Slot zero of the vector of sets is reserved.  Fill it here.
202     fSets->addElement((int32_t)0, fDeferredStatus);
203 }
204 
205 
initNamedCaptureMap()206 bool RegexPattern::initNamedCaptureMap() {
207     if (fNamedCaptureMap) {
208         return true;
209     }
210     fNamedCaptureMap  = uhash_openSize(uhash_hashUnicodeString,     // Key hash function
211                                        uhash_compareUnicodeString,  // Key comparator function
212                                        uhash_compareLong,           // Value comparator function
213                                        7,                           // Initial table capacity
214                                        &fDeferredStatus);
215     if (U_FAILURE(fDeferredStatus)) {
216         return false;
217     }
218 
219     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
220     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
221     return true;
222 }
223 
224 //--------------------------------------------------------------------------
225 //
226 //   zap            Delete everything owned by this RegexPattern.
227 //
228 //--------------------------------------------------------------------------
zap()229 void RegexPattern::zap() {
230     delete fCompiledPat;
231     fCompiledPat = nullptr;
232     int i;
233     for (i=1; i<fSets->size(); i++) {
234         UnicodeSet *s;
235         s = (UnicodeSet *)fSets->elementAt(i);
236         delete s;
237     }
238     delete fSets;
239     fSets = nullptr;
240     delete[] fSets8;
241     fSets8 = nullptr;
242     delete fGroupMap;
243     fGroupMap = nullptr;
244     delete fInitialChars;
245     fInitialChars = nullptr;
246     delete fInitialChars8;
247     fInitialChars8 = nullptr;
248     if (fPattern != nullptr) {
249         utext_close(fPattern);
250         fPattern = nullptr;
251     }
252     if (fPatternString != nullptr) {
253         delete fPatternString;
254         fPatternString = nullptr;
255     }
256     if (fNamedCaptureMap != nullptr) {
257         uhash_close(fNamedCaptureMap);
258         fNamedCaptureMap = nullptr;
259     }
260 }
261 
262 
263 //--------------------------------------------------------------------------
264 //
265 //   Destructor
266 //
267 //--------------------------------------------------------------------------
~RegexPattern()268 RegexPattern::~RegexPattern() {
269     zap();
270 }
271 
272 
273 //--------------------------------------------------------------------------
274 //
275 //   Clone
276 //
277 //--------------------------------------------------------------------------
clone() const278 RegexPattern  *RegexPattern::clone() const {
279     RegexPattern  *copy = new RegexPattern(*this);
280     return copy;
281 }
282 
283 
284 //--------------------------------------------------------------------------
285 //
286 //   operator ==   (comparison)    Consider to patterns to be == if the
287 //                                 pattern strings and the flags are the same.
288 //                                 Note that pattern strings with the same
289 //                                 characters can still be considered different.
290 //
291 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const292 bool    RegexPattern::operator ==(const RegexPattern &other) const {
293     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
294         if (this->fPatternString != nullptr && other.fPatternString != nullptr) {
295             return *(this->fPatternString) == *(other.fPatternString);
296         } else if (this->fPattern == nullptr) {
297             if (other.fPattern == nullptr) {
298                 return true;
299             }
300         } else if (other.fPattern != nullptr) {
301             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
302             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
303             return utext_equals(this->fPattern, other.fPattern);
304         }
305     }
306     return false;
307 }
308 
309 //---------------------------------------------------------------------
310 //
311 //   compile
312 //
313 //---------------------------------------------------------------------
314 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)315 RegexPattern::compile(const UnicodeString &regex,
316                       uint32_t             flags,
317                       UParseError          &pe,
318                       UErrorCode           &status)
319 {
320     if (U_FAILURE(status)) {
321         return nullptr;
322     }
323 
324     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
325     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
326     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
327 
328     if ((flags & ~allFlags) != 0) {
329         status = U_REGEX_INVALID_FLAG;
330         return nullptr;
331     }
332 
333     if ((flags & UREGEX_CANON_EQ) != 0) {
334         status = U_REGEX_UNIMPLEMENTED;
335         return nullptr;
336     }
337 
338     RegexPattern *This = new RegexPattern;
339     if (This == nullptr) {
340         status = U_MEMORY_ALLOCATION_ERROR;
341         return nullptr;
342     }
343     if (U_FAILURE(This->fDeferredStatus)) {
344         status = This->fDeferredStatus;
345         delete This;
346         return nullptr;
347     }
348     This->fFlags = flags;
349 
350     RegexCompile     compiler(This, status);
351     compiler.compile(regex, pe, status);
352 
353     if (U_FAILURE(status)) {
354         delete This;
355         This = nullptr;
356     }
357 
358     return This;
359 }
360 
361 
362 //
363 //   compile, UText mode
364 //
365 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)366 RegexPattern::compile(UText                *regex,
367                       uint32_t             flags,
368                       UParseError          &pe,
369                       UErrorCode           &status)
370 {
371     if (U_FAILURE(status)) {
372         return nullptr;
373     }
374 
375     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
376                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
377                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
378 
379     if ((flags & ~allFlags) != 0) {
380         status = U_REGEX_INVALID_FLAG;
381         return nullptr;
382     }
383 
384     if ((flags & UREGEX_CANON_EQ) != 0) {
385         status = U_REGEX_UNIMPLEMENTED;
386         return nullptr;
387     }
388 
389     RegexPattern *This = new RegexPattern;
390     if (This == nullptr) {
391         status = U_MEMORY_ALLOCATION_ERROR;
392         return nullptr;
393     }
394     if (U_FAILURE(This->fDeferredStatus)) {
395         status = This->fDeferredStatus;
396         delete This;
397         return nullptr;
398     }
399     This->fFlags = flags;
400 
401     RegexCompile     compiler(This, status);
402     compiler.compile(regex, pe, status);
403 
404     if (U_FAILURE(status)) {
405         delete This;
406         This = nullptr;
407     }
408 
409     return This;
410 }
411 
412 //
413 //   compile with default flags.
414 //
415 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)416 RegexPattern::compile(const UnicodeString &regex,
417                       UParseError         &pe,
418                       UErrorCode          &err)
419 {
420     return compile(regex, 0, pe, err);
421 }
422 
423 
424 //
425 //   compile with default flags, UText mode
426 //
427 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)428 RegexPattern::compile(UText               *regex,
429                       UParseError         &pe,
430                       UErrorCode          &err)
431 {
432     return compile(regex, 0, pe, err);
433 }
434 
435 
436 //
437 //   compile with no UParseErr parameter.
438 //
439 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)440 RegexPattern::compile(const UnicodeString &regex,
441                       uint32_t             flags,
442                       UErrorCode          &err)
443 {
444     UParseError pe;
445     return compile(regex, flags, pe, err);
446 }
447 
448 
449 //
450 //   compile with no UParseErr parameter, UText mode
451 //
452 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)453 RegexPattern::compile(UText                *regex,
454                       uint32_t             flags,
455                       UErrorCode           &err)
456 {
457     UParseError pe;
458     return compile(regex, flags, pe, err);
459 }
460 
461 
462 //---------------------------------------------------------------------
463 //
464 //   flags
465 //
466 //---------------------------------------------------------------------
flags() const467 uint32_t RegexPattern::flags() const {
468     return fFlags;
469 }
470 
471 
472 //---------------------------------------------------------------------
473 //
474 //   matcher(UnicodeString, err)
475 //
476 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const477 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
478                                     UErrorCode          &status)  const {
479     RegexMatcher    *retMatcher = matcher(status);
480     if (retMatcher != nullptr) {
481         retMatcher->fDeferredStatus = status;
482         retMatcher->reset(input);
483     }
484     return retMatcher;
485 }
486 
487 
488 //---------------------------------------------------------------------
489 //
490 //   matcher(status)
491 //
492 //---------------------------------------------------------------------
matcher(UErrorCode & status) const493 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
494     RegexMatcher    *retMatcher = nullptr;
495 
496     if (U_FAILURE(status)) {
497         return nullptr;
498     }
499     if (U_FAILURE(fDeferredStatus)) {
500         status = fDeferredStatus;
501         return nullptr;
502     }
503 
504     retMatcher = new RegexMatcher(this);
505     if (retMatcher == nullptr) {
506         status = U_MEMORY_ALLOCATION_ERROR;
507         return nullptr;
508     }
509     return retMatcher;
510 }
511 
512 
513 
514 //---------------------------------------------------------------------
515 //
516 //   matches        Convenience function to test for a match, starting
517 //                  with a pattern string and a data string.
518 //
519 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)520 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
521               const UnicodeString   &input,
522                     UParseError     &pe,
523                     UErrorCode      &status) {
524 
525     if (U_FAILURE(status)) {return false;}
526 
527     UBool         retVal;
528     RegexPattern *pat     = nullptr;
529     RegexMatcher *matcher = nullptr;
530 
531     pat     = RegexPattern::compile(regex, 0, pe, status);
532     matcher = pat->matcher(input, status);
533     retVal  = matcher->matches(status);
534 
535     delete matcher;
536     delete pat;
537     return retVal;
538 }
539 
540 
541 //
542 //   matches, UText mode
543 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)544 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
545                     UText           *input,
546                     UParseError     &pe,
547                     UErrorCode      &status) {
548 
549     if (U_FAILURE(status)) {return false;}
550 
551     UBool         retVal  = false;
552     RegexPattern *pat     = nullptr;
553     RegexMatcher *matcher = nullptr;
554 
555     pat     = RegexPattern::compile(regex, 0, pe, status);
556     matcher = pat->matcher(status);
557     if (U_SUCCESS(status)) {
558         matcher->reset(input);
559         retVal  = matcher->matches(status);
560     }
561 
562     delete matcher;
563     delete pat;
564     return retVal;
565 }
566 
567 
568 
569 
570 
571 //---------------------------------------------------------------------
572 //
573 //   pattern
574 //
575 //---------------------------------------------------------------------
pattern() const576 UnicodeString RegexPattern::pattern() const {
577     if (fPatternString != nullptr) {
578         return *fPatternString;
579     } else if (fPattern == nullptr) {
580         return {};
581     } else {
582         UErrorCode status = U_ZERO_ERROR;
583         int64_t nativeLen = utext_nativeLength(fPattern);
584         int32_t len16 = utext_extract(fPattern, 0, nativeLen, nullptr, 0, &status); // buffer overflow error
585         UnicodeString result;
586 
587         status = U_ZERO_ERROR;
588         char16_t *resultChars = result.getBuffer(len16);
589         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
590         result.releaseBuffer(len16);
591 
592         return result;
593     }
594 }
595 
596 
597 
598 
599 //---------------------------------------------------------------------
600 //
601 //   patternText
602 //
603 //---------------------------------------------------------------------
patternText(UErrorCode & status) const604 UText *RegexPattern::patternText(UErrorCode      &status) const {
605     if (U_FAILURE(status)) {return nullptr;}
606     status = U_ZERO_ERROR;
607 
608     if (fPattern != nullptr) {
609         return fPattern;
610     } else {
611         RegexStaticSets::initGlobals(&status);
612         return RegexStaticSets::gStaticSets->fEmptyText;
613     }
614 }
615 
616 
617 //--------------------------------------------------------------------------------
618 //
619 //  groupNumberFromName()
620 //
621 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const622 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
623     if (U_FAILURE(status)) {
624         return 0;
625     }
626 
627     // No need to explicitly check for syntactically valid names.
628     // Invalid ones will never be in the map, and the lookup will fail.
629 
630     int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
631     if (number == 0) {
632         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
633     }
634     return number;
635 }
636 
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const637 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
638     if (U_FAILURE(status)) {
639         return 0;
640     }
641     UnicodeString name(groupName, nameLength, US_INV);
642     return groupNumberFromName(name, status);
643 }
644 
645 
646 //---------------------------------------------------------------------
647 //
648 //   split
649 //
650 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const651 int32_t  RegexPattern::split(const UnicodeString &input,
652         UnicodeString    dest[],
653         int32_t          destCapacity,
654         UErrorCode      &status) const
655 {
656     if (U_FAILURE(status)) {
657         return 0;
658     }
659 
660     RegexMatcher  m(this);
661     int32_t r = 0;
662     // Check m's status to make sure all is ok.
663     if (U_SUCCESS(m.fDeferredStatus)) {
664     	r = m.split(input, dest, destCapacity, status);
665     }
666     return r;
667 }
668 
669 //
670 //   split, UText mode
671 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const672 int32_t  RegexPattern::split(UText *input,
673         UText           *dest[],
674         int32_t          destCapacity,
675         UErrorCode      &status) const
676 {
677     if (U_FAILURE(status)) {
678         return 0;
679     }
680 
681     RegexMatcher  m(this);
682     int32_t r = 0;
683     // Check m's status to make sure all is ok.
684     if (U_SUCCESS(m.fDeferredStatus)) {
685     	r = m.split(input, dest, destCapacity, status);
686     }
687     return r;
688 }
689 
690 
691 //---------------------------------------------------------------------
692 //
693 //   dump    Output the compiled form of the pattern.
694 //           Debugging function only.
695 //
696 //---------------------------------------------------------------------
dumpOp(int32_t index) const697 void   RegexPattern::dumpOp(int32_t index) const {
698     (void)index;  // Suppress warnings in non-debug build.
699 #if defined(REGEX_DEBUG)
700     static const char * const opNames[] = {URX_OPCODE_NAMES};
701     int32_t op          = fCompiledPat->elementAti(index);
702     int32_t val         = URX_VAL(op);
703     int32_t type        = URX_TYPE(op);
704     int32_t pinnedType  = type;
705     if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
706         pinnedType = 0;
707     }
708 
709     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
710     switch (type) {
711     case URX_NOP:
712     case URX_DOTANY:
713     case URX_DOTANY_ALL:
714     case URX_FAIL:
715     case URX_CARET:
716     case URX_DOLLAR:
717     case URX_BACKSLASH_G:
718     case URX_BACKSLASH_X:
719     case URX_END:
720     case URX_DOLLAR_M:
721     case URX_CARET_M:
722         // Types with no operand field of interest.
723         break;
724 
725     case URX_RESERVED_OP:
726     case URX_START_CAPTURE:
727     case URX_END_CAPTURE:
728     case URX_STATE_SAVE:
729     case URX_JMP:
730     case URX_JMP_SAV:
731     case URX_JMP_SAV_X:
732     case URX_BACKSLASH_B:
733     case URX_BACKSLASH_BU:
734     case URX_BACKSLASH_D:
735     case URX_BACKSLASH_Z:
736     case URX_STRING_LEN:
737     case URX_CTR_INIT:
738     case URX_CTR_INIT_NG:
739     case URX_CTR_LOOP:
740     case URX_CTR_LOOP_NG:
741     case URX_RELOC_OPRND:
742     case URX_STO_SP:
743     case URX_LD_SP:
744     case URX_BACKREF:
745     case URX_STO_INP_LOC:
746     case URX_JMPX:
747     case URX_LA_START:
748     case URX_LA_END:
749     case URX_BACKREF_I:
750     case URX_LB_START:
751     case URX_LB_CONT:
752     case URX_LB_END:
753     case URX_LBN_CONT:
754     case URX_LBN_END:
755     case URX_LOOP_C:
756     case URX_LOOP_DOT_I:
757     case URX_BACKSLASH_H:
758     case URX_BACKSLASH_R:
759     case URX_BACKSLASH_V:
760         // types with an integer operand field.
761         printf("%d", val);
762         break;
763 
764     case URX_ONECHAR:
765     case URX_ONECHAR_I:
766         if (val < 0x20) {
767             printf("%#x", val);
768         } else {
769             printf("'%s'", CStr(UnicodeString(val))());
770         }
771         break;
772 
773     case URX_STRING:
774     case URX_STRING_I:
775         {
776             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
777             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
778             int32_t length = URX_VAL(lengthOp);
779             UnicodeString str(fLiteralText, val, length);
780             printf("%s", CStr(str)());
781         }
782         break;
783 
784     case URX_SETREF:
785     case URX_LOOP_SR_I:
786         {
787             UnicodeString s;
788             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
789             set->toPattern(s, true);
790             printf("%s", CStr(s)());
791         }
792         break;
793 
794     case URX_STATIC_SETREF:
795     case URX_STAT_SETREF_N:
796         {
797             UnicodeString s;
798             if (val & URX_NEG_SET) {
799                 printf("NOT ");
800                 val &= ~URX_NEG_SET;
801             }
802             UnicodeSet &set = RegexStaticSets::gStaticSets->fPropSets[val];
803             set.toPattern(s, true);
804             printf("%s", CStr(s)());
805         }
806         break;
807 
808 
809     default:
810         printf("??????");
811         break;
812     }
813     printf("\n");
814 #endif
815 }
816 
817 
dumpPattern() const818 void RegexPattern::dumpPattern() const {
819 #if defined(REGEX_DEBUG)
820     int      index;
821 
822     UnicodeString patStr;
823     for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
824         patStr.append(c);
825     }
826     printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
827     printf("   Min Match Length:  %d\n", fMinMatchLen);
828     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
829     if (fStartType == START_STRING) {
830         UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
831         printf("   Initial match string: \"%s\"\n", CStr(initialString)());
832     } else if (fStartType == START_SET) {
833         UnicodeString s;
834         fInitialChars->toPattern(s, true);
835         printf("    Match First Chars: %s\n", CStr(s)());
836 
837     } else if (fStartType == START_CHAR) {
838         printf("    First char of Match: ");
839         if (fInitialChar > 0x20) {
840                 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
841             } else {
842                 printf("%#x\n", fInitialChar);
843             }
844     }
845 
846     printf("Named Capture Groups:\n");
847     if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
848         printf("   None\n");
849     } else {
850         int32_t pos = UHASH_FIRST;
851         const UHashElement *el = nullptr;
852         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
853             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
854             int32_t number = el->value.integer;
855             printf("   %d\t%s\n", number, CStr(*name)());
856         }
857     }
858 
859     printf("\nIndex   Binary     Type             Operand\n" \
860            "-------------------------------------------\n");
861     for (index = 0; index<fCompiledPat->size(); index++) {
862         dumpOp(index);
863     }
864     printf("\n\n");
865 #endif
866 }
867 
868 
869 
870 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
871 
872 U_NAMESPACE_END
873 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
874