• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 //  file:  repattrn.cpp
5 //
6 /*
7 ***************************************************************************
8 *   Copyright (C) 2002-2016 International Business Machines Corporation
9 *   and others. All rights reserved.
10 ***************************************************************************
11 */
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
16 
17 #include "unicode/regex.h"
18 #include "unicode/uclean.h"
19 #include "cmemory.h"
20 #include "cstr.h"
21 #include "uassert.h"
22 #include "uhash.h"
23 #include "uvector.h"
24 #include "uvectr32.h"
25 #include "uvectr64.h"
26 #include "regexcmp.h"
27 #include "regeximp.h"
28 #include "regexst.h"
29 
30 U_NAMESPACE_BEGIN
31 
32 //--------------------------------------------------------------------------
33 //
34 //    RegexPattern    Default Constructor
35 //
36 //--------------------------------------------------------------------------
RegexPattern()37 RegexPattern::RegexPattern() {
38     // Init all of this instances data.
39     init();
40 }
41 
42 
43 //--------------------------------------------------------------------------
44 //
45 //   Copy Constructor        Note:  This is a rather inefficient implementation,
46 //                                  but it probably doesn't matter.
47 //
48 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)49 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
50     init();
51     *this = other;
52 }
53 
54 
55 
56 //--------------------------------------------------------------------------
57 //
58 //    Assignment Operator
59 //
60 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)61 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62     if (this == &other) {
63         // Source and destination are the same.  Don't do anything.
64         return *this;
65     }
66 
67     // Clean out any previous contents of object being assigned to.
68     zap();
69 
70     // Give target object a default initialization
71     init();
72 
73     // Copy simple fields
74     fDeferredStatus   = other.fDeferredStatus;
75 
76     if (U_FAILURE(fDeferredStatus)) {
77         return *this;
78     }
79 
80     if (other.fPatternString == NULL) {
81         fPatternString = NULL;
82         fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
83     } else {
84         fPatternString = new UnicodeString(*(other.fPatternString));
85         if (fPatternString == NULL) {
86             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
87         } else {
88             fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
89         }
90     }
91     if (U_FAILURE(fDeferredStatus)) {
92         return *this;
93     }
94 
95     fFlags            = other.fFlags;
96     fLiteralText      = other.fLiteralText;
97     fMinMatchLen      = other.fMinMatchLen;
98     fFrameSize        = other.fFrameSize;
99     fDataSize         = other.fDataSize;
100     fStaticSets       = other.fStaticSets;
101     fStaticSets8      = other.fStaticSets8;
102 
103     fStartType        = other.fStartType;
104     fInitialStringIdx = other.fInitialStringIdx;
105     fInitialStringLen = other.fInitialStringLen;
106     *fInitialChars    = *other.fInitialChars;
107     fInitialChar      = other.fInitialChar;
108     *fInitialChars8   = *other.fInitialChars8;
109     fNeedsAltInput    = other.fNeedsAltInput;
110 
111     //  Copy the pattern.  It's just values, nothing deep to copy.
112     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114 
115     //  Copy the Unicode Sets.
116     //    Could be made more efficient if the sets were reference counted and shared,
117     //    but I doubt that pattern copying will be particularly common.
118     //    Note:  init() already added an empty element zero to fSets
119     int32_t i;
120     int32_t  numSets = other.fSets->size();
121     fSets8 = new Regex8BitSet[numSets];
122     if (fSets8 == NULL) {
123     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124     	return *this;
125     }
126     for (i=1; i<numSets; i++) {
127         if (U_FAILURE(fDeferredStatus)) {
128             return *this;
129         }
130         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
132         if (newSet == NULL) {
133             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134             break;
135         }
136         fSets->addElement(newSet, fDeferredStatus);
137         fSets8[i] = other.fSets8[i];
138     }
139 
140     // Copy the named capture group hash map.
141     if (other.fNamedCaptureMap != nullptr && initNamedCaptureMap()) {
142         int32_t hashPos = UHASH_FIRST;
143         while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
144             if (U_FAILURE(fDeferredStatus)) {
145                 break;
146             }
147             const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
148             UnicodeString *key = new UnicodeString(*name);
149             int32_t val = hashEl->value.integer;
150             if (key == NULL) {
151                 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
152             } else {
153                 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
154             }
155         }
156     }
157     return *this;
158 }
159 
160 
161 //--------------------------------------------------------------------------
162 //
163 //    init        Shared initialization for use by constructors.
164 //                Bring an uninitialized RegexPattern up to a default state.
165 //
166 //--------------------------------------------------------------------------
init()167 void RegexPattern::init() {
168     fFlags            = 0;
169     fCompiledPat      = 0;
170     fLiteralText.remove();
171     fSets             = NULL;
172     fSets8            = NULL;
173     fDeferredStatus   = U_ZERO_ERROR;
174     fMinMatchLen      = 0;
175     fFrameSize        = 0;
176     fDataSize         = 0;
177     fGroupMap         = NULL;
178     fStaticSets       = NULL;
179     fStaticSets8      = NULL;
180     fStartType        = START_NO_INFO;
181     fInitialStringIdx = 0;
182     fInitialStringLen = 0;
183     fInitialChars     = NULL;
184     fInitialChar      = 0;
185     fInitialChars8    = NULL;
186     fNeedsAltInput    = FALSE;
187     fNamedCaptureMap  = NULL;
188 
189     fPattern          = NULL; // will be set later
190     fPatternString    = NULL; // may be set later
191     fCompiledPat      = new UVector64(fDeferredStatus);
192     fGroupMap         = new UVector32(fDeferredStatus);
193     fSets             = new UVector(fDeferredStatus);
194     fInitialChars     = new UnicodeSet;
195     fInitialChars8    = new Regex8BitSet;
196     if (U_FAILURE(fDeferredStatus)) {
197         return;
198     }
199     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
200             fInitialChars == NULL || fInitialChars8 == NULL) {
201         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
202         return;
203     }
204 
205     // Slot zero of the vector of sets is reserved.  Fill it here.
206     fSets->addElement((int32_t)0, fDeferredStatus);
207 }
208 
209 
initNamedCaptureMap()210 bool RegexPattern::initNamedCaptureMap() {
211     if (fNamedCaptureMap) {
212         return true;
213     }
214     fNamedCaptureMap  = uhash_openSize(uhash_hashUnicodeString,     // Key hash function
215                                        uhash_compareUnicodeString,  // Key comparator function
216                                        uhash_compareLong,           // Value comparator function
217                                        7,                           // Initial table capacity
218                                        &fDeferredStatus);
219     if (U_FAILURE(fDeferredStatus)) {
220         return false;
221     }
222 
223     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
224     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
225     return true;
226 }
227 
228 //--------------------------------------------------------------------------
229 //
230 //   zap            Delete everything owned by this RegexPattern.
231 //
232 //--------------------------------------------------------------------------
zap()233 void RegexPattern::zap() {
234     delete fCompiledPat;
235     fCompiledPat = NULL;
236     int i;
237     for (i=1; i<fSets->size(); i++) {
238         UnicodeSet *s;
239         s = (UnicodeSet *)fSets->elementAt(i);
240         if (s != NULL) {
241             delete s;
242         }
243     }
244     delete fSets;
245     fSets = NULL;
246     delete[] fSets8;
247     fSets8 = NULL;
248     delete fGroupMap;
249     fGroupMap = NULL;
250     delete fInitialChars;
251     fInitialChars = NULL;
252     delete fInitialChars8;
253     fInitialChars8 = NULL;
254     if (fPattern != NULL) {
255         utext_close(fPattern);
256         fPattern = NULL;
257     }
258     if (fPatternString != NULL) {
259         delete fPatternString;
260         fPatternString = NULL;
261     }
262     if (fNamedCaptureMap != NULL) {
263         uhash_close(fNamedCaptureMap);
264         fNamedCaptureMap = NULL;
265     }
266 }
267 
268 
269 //--------------------------------------------------------------------------
270 //
271 //   Destructor
272 //
273 //--------------------------------------------------------------------------
~RegexPattern()274 RegexPattern::~RegexPattern() {
275     zap();
276 }
277 
278 
279 //--------------------------------------------------------------------------
280 //
281 //   Clone
282 //
283 //--------------------------------------------------------------------------
clone() const284 RegexPattern  *RegexPattern::clone() const {
285     RegexPattern  *copy = new RegexPattern(*this);
286     return copy;
287 }
288 
289 
290 //--------------------------------------------------------------------------
291 //
292 //   operator ==   (comparison)    Consider to patterns to be == if the
293 //                                 pattern strings and the flags are the same.
294 //                                 Note that pattern strings with the same
295 //                                 characters can still be considered different.
296 //
297 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const298 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
299     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
300         if (this->fPatternString != NULL && other.fPatternString != NULL) {
301             return *(this->fPatternString) == *(other.fPatternString);
302         } else if (this->fPattern == NULL) {
303             if (other.fPattern == NULL) {
304                 return TRUE;
305             }
306         } else if (other.fPattern != NULL) {
307             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
308             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
309             return utext_equals(this->fPattern, other.fPattern);
310         }
311     }
312     return FALSE;
313 }
314 
315 //---------------------------------------------------------------------
316 //
317 //   compile
318 //
319 //---------------------------------------------------------------------
320 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)321 RegexPattern::compile(const UnicodeString &regex,
322                       uint32_t             flags,
323                       UParseError          &pe,
324                       UErrorCode           &status)
325 {
326     if (U_FAILURE(status)) {
327         return NULL;
328     }
329 
330     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
331     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
332     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
333 
334     if ((flags & ~allFlags) != 0) {
335         status = U_REGEX_INVALID_FLAG;
336         return NULL;
337     }
338 
339     if ((flags & UREGEX_CANON_EQ) != 0) {
340         status = U_REGEX_UNIMPLEMENTED;
341         return NULL;
342     }
343 
344     RegexPattern *This = new RegexPattern;
345     if (This == NULL) {
346         status = U_MEMORY_ALLOCATION_ERROR;
347         return NULL;
348     }
349     if (U_FAILURE(This->fDeferredStatus)) {
350         status = This->fDeferredStatus;
351         delete This;
352         return NULL;
353     }
354     This->fFlags = flags;
355 
356     RegexCompile     compiler(This, status);
357     compiler.compile(regex, pe, status);
358 
359     if (U_FAILURE(status)) {
360         delete This;
361         This = NULL;
362     }
363 
364     return This;
365 }
366 
367 
368 //
369 //   compile, UText mode
370 //
371 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)372 RegexPattern::compile(UText                *regex,
373                       uint32_t             flags,
374                       UParseError          &pe,
375                       UErrorCode           &status)
376 {
377     if (U_FAILURE(status)) {
378         return NULL;
379     }
380 
381     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
382                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
383                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
384 
385     if ((flags & ~allFlags) != 0) {
386         status = U_REGEX_INVALID_FLAG;
387         return NULL;
388     }
389 
390     if ((flags & UREGEX_CANON_EQ) != 0) {
391         status = U_REGEX_UNIMPLEMENTED;
392         return NULL;
393     }
394 
395     RegexPattern *This = new RegexPattern;
396     if (This == NULL) {
397         status = U_MEMORY_ALLOCATION_ERROR;
398         return NULL;
399     }
400     if (U_FAILURE(This->fDeferredStatus)) {
401         status = This->fDeferredStatus;
402         delete This;
403         return NULL;
404     }
405     This->fFlags = flags;
406 
407     RegexCompile     compiler(This, status);
408     compiler.compile(regex, pe, status);
409 
410     if (U_FAILURE(status)) {
411         delete This;
412         This = NULL;
413     }
414 
415     return This;
416 }
417 
418 //
419 //   compile with default flags.
420 //
421 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)422 RegexPattern::compile(const UnicodeString &regex,
423                       UParseError         &pe,
424                       UErrorCode          &err)
425 {
426     return compile(regex, 0, pe, err);
427 }
428 
429 
430 //
431 //   compile with default flags, UText mode
432 //
433 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)434 RegexPattern::compile(UText               *regex,
435                       UParseError         &pe,
436                       UErrorCode          &err)
437 {
438     return compile(regex, 0, pe, err);
439 }
440 
441 
442 //
443 //   compile with no UParseErr parameter.
444 //
445 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)446 RegexPattern::compile(const UnicodeString &regex,
447                       uint32_t             flags,
448                       UErrorCode          &err)
449 {
450     UParseError pe;
451     return compile(regex, flags, pe, err);
452 }
453 
454 
455 //
456 //   compile with no UParseErr parameter, UText mode
457 //
458 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)459 RegexPattern::compile(UText                *regex,
460                       uint32_t             flags,
461                       UErrorCode           &err)
462 {
463     UParseError pe;
464     return compile(regex, flags, pe, err);
465 }
466 
467 
468 //---------------------------------------------------------------------
469 //
470 //   flags
471 //
472 //---------------------------------------------------------------------
flags() const473 uint32_t RegexPattern::flags() const {
474     return fFlags;
475 }
476 
477 
478 //---------------------------------------------------------------------
479 //
480 //   matcher(UnicodeString, err)
481 //
482 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const483 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
484                                     UErrorCode          &status)  const {
485     RegexMatcher    *retMatcher = matcher(status);
486     if (retMatcher != NULL) {
487         retMatcher->fDeferredStatus = status;
488         retMatcher->reset(input);
489     }
490     return retMatcher;
491 }
492 
493 
494 //---------------------------------------------------------------------
495 //
496 //   matcher(status)
497 //
498 //---------------------------------------------------------------------
matcher(UErrorCode & status) const499 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
500     RegexMatcher    *retMatcher = NULL;
501 
502     if (U_FAILURE(status)) {
503         return NULL;
504     }
505     if (U_FAILURE(fDeferredStatus)) {
506         status = fDeferredStatus;
507         return NULL;
508     }
509 
510     retMatcher = new RegexMatcher(this);
511     if (retMatcher == NULL) {
512         status = U_MEMORY_ALLOCATION_ERROR;
513         return NULL;
514     }
515     return retMatcher;
516 }
517 
518 
519 
520 //---------------------------------------------------------------------
521 //
522 //   matches        Convenience function to test for a match, starting
523 //                  with a pattern string and a data string.
524 //
525 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)526 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
527               const UnicodeString   &input,
528                     UParseError     &pe,
529                     UErrorCode      &status) {
530 
531     if (U_FAILURE(status)) {return FALSE;}
532 
533     UBool         retVal;
534     RegexPattern *pat     = NULL;
535     RegexMatcher *matcher = NULL;
536 
537     pat     = RegexPattern::compile(regex, 0, pe, status);
538     matcher = pat->matcher(input, status);
539     retVal  = matcher->matches(status);
540 
541     delete matcher;
542     delete pat;
543     return retVal;
544 }
545 
546 
547 //
548 //   matches, UText mode
549 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)550 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
551                     UText           *input,
552                     UParseError     &pe,
553                     UErrorCode      &status) {
554 
555     if (U_FAILURE(status)) {return FALSE;}
556 
557     UBool         retVal  = FALSE;
558     RegexPattern *pat     = NULL;
559     RegexMatcher *matcher = NULL;
560 
561     pat     = RegexPattern::compile(regex, 0, pe, status);
562     matcher = pat->matcher(status);
563     if (U_SUCCESS(status)) {
564         matcher->reset(input);
565         retVal  = matcher->matches(status);
566     }
567 
568     delete matcher;
569     delete pat;
570     return retVal;
571 }
572 
573 
574 
575 
576 
577 //---------------------------------------------------------------------
578 //
579 //   pattern
580 //
581 //---------------------------------------------------------------------
pattern() const582 UnicodeString RegexPattern::pattern() const {
583     if (fPatternString != NULL) {
584         return *fPatternString;
585     } else if (fPattern == NULL) {
586         return UnicodeString();
587     } else {
588         UErrorCode status = U_ZERO_ERROR;
589         int64_t nativeLen = utext_nativeLength(fPattern);
590         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
591         UnicodeString result;
592 
593         status = U_ZERO_ERROR;
594         UChar *resultChars = result.getBuffer(len16);
595         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
596         result.releaseBuffer(len16);
597 
598         return result;
599     }
600 }
601 
602 
603 
604 
605 //---------------------------------------------------------------------
606 //
607 //   patternText
608 //
609 //---------------------------------------------------------------------
patternText(UErrorCode & status) const610 UText *RegexPattern::patternText(UErrorCode      &status) const {
611     if (U_FAILURE(status)) {return NULL;}
612     status = U_ZERO_ERROR;
613 
614     if (fPattern != NULL) {
615         return fPattern;
616     } else {
617         RegexStaticSets::initGlobals(&status);
618         return RegexStaticSets::gStaticSets->fEmptyText;
619     }
620 }
621 
622 
623 //--------------------------------------------------------------------------------
624 //
625 //  groupNumberFromName()
626 //
627 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const628 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
629     if (U_FAILURE(status)) {
630         return 0;
631     }
632 
633     // No need to explicitly check for syntactically valid names.
634     // Invalid ones will never be in the map, and the lookup will fail.
635 
636     int32_t number = fNamedCaptureMap ? uhash_geti(fNamedCaptureMap, &groupName) : 0;
637     if (number == 0) {
638         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
639     }
640     return number;
641 }
642 
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const643 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
644     if (U_FAILURE(status)) {
645         return 0;
646     }
647     UnicodeString name(groupName, nameLength, US_INV);
648     return groupNumberFromName(name, status);
649 }
650 
651 
652 //---------------------------------------------------------------------
653 //
654 //   split
655 //
656 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const657 int32_t  RegexPattern::split(const UnicodeString &input,
658         UnicodeString    dest[],
659         int32_t          destCapacity,
660         UErrorCode      &status) const
661 {
662     if (U_FAILURE(status)) {
663         return 0;
664     }
665 
666     RegexMatcher  m(this);
667     int32_t r = 0;
668     // Check m's status to make sure all is ok.
669     if (U_SUCCESS(m.fDeferredStatus)) {
670     	r = m.split(input, dest, destCapacity, status);
671     }
672     return r;
673 }
674 
675 //
676 //   split, UText mode
677 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const678 int32_t  RegexPattern::split(UText *input,
679         UText           *dest[],
680         int32_t          destCapacity,
681         UErrorCode      &status) const
682 {
683     if (U_FAILURE(status)) {
684         return 0;
685     }
686 
687     RegexMatcher  m(this);
688     int32_t r = 0;
689     // Check m's status to make sure all is ok.
690     if (U_SUCCESS(m.fDeferredStatus)) {
691     	r = m.split(input, dest, destCapacity, status);
692     }
693     return r;
694 }
695 
696 
697 //---------------------------------------------------------------------
698 //
699 //   dump    Output the compiled form of the pattern.
700 //           Debugging function only.
701 //
702 //---------------------------------------------------------------------
dumpOp(int32_t index) const703 void   RegexPattern::dumpOp(int32_t index) const {
704     (void)index;  // Suppress warnings in non-debug build.
705 #if defined(REGEX_DEBUG)
706     static const char * const opNames[] = {URX_OPCODE_NAMES};
707     int32_t op          = fCompiledPat->elementAti(index);
708     int32_t val         = URX_VAL(op);
709     int32_t type        = URX_TYPE(op);
710     int32_t pinnedType  = type;
711     if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
712         pinnedType = 0;
713     }
714 
715     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
716     switch (type) {
717     case URX_NOP:
718     case URX_DOTANY:
719     case URX_DOTANY_ALL:
720     case URX_FAIL:
721     case URX_CARET:
722     case URX_DOLLAR:
723     case URX_BACKSLASH_G:
724     case URX_BACKSLASH_X:
725     case URX_END:
726     case URX_DOLLAR_M:
727     case URX_CARET_M:
728         // Types with no operand field of interest.
729         break;
730 
731     case URX_RESERVED_OP:
732     case URX_START_CAPTURE:
733     case URX_END_CAPTURE:
734     case URX_STATE_SAVE:
735     case URX_JMP:
736     case URX_JMP_SAV:
737     case URX_JMP_SAV_X:
738     case URX_BACKSLASH_B:
739     case URX_BACKSLASH_BU:
740     case URX_BACKSLASH_D:
741     case URX_BACKSLASH_Z:
742     case URX_STRING_LEN:
743     case URX_CTR_INIT:
744     case URX_CTR_INIT_NG:
745     case URX_CTR_LOOP:
746     case URX_CTR_LOOP_NG:
747     case URX_RELOC_OPRND:
748     case URX_STO_SP:
749     case URX_LD_SP:
750     case URX_BACKREF:
751     case URX_STO_INP_LOC:
752     case URX_JMPX:
753     case URX_LA_START:
754     case URX_LA_END:
755     case URX_BACKREF_I:
756     case URX_LB_START:
757     case URX_LB_CONT:
758     case URX_LB_END:
759     case URX_LBN_CONT:
760     case URX_LBN_END:
761     case URX_LOOP_C:
762     case URX_LOOP_DOT_I:
763     case URX_BACKSLASH_H:
764     case URX_BACKSLASH_R:
765     case URX_BACKSLASH_V:
766         // types with an integer operand field.
767         printf("%d", val);
768         break;
769 
770     case URX_ONECHAR:
771     case URX_ONECHAR_I:
772         if (val < 0x20) {
773             printf("%#x", val);
774         } else {
775             printf("'%s'", CStr(UnicodeString(val))());
776         }
777         break;
778 
779     case URX_STRING:
780     case URX_STRING_I:
781         {
782             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
783             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
784             int32_t length = URX_VAL(lengthOp);
785             UnicodeString str(fLiteralText, val, length);
786             printf("%s", CStr(str)());
787         }
788         break;
789 
790     case URX_SETREF:
791     case URX_LOOP_SR_I:
792         {
793             UnicodeString s;
794             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
795             set->toPattern(s, TRUE);
796             printf("%s", CStr(s)());
797         }
798         break;
799 
800     case URX_STATIC_SETREF:
801     case URX_STAT_SETREF_N:
802         {
803             UnicodeString s;
804             if (val & URX_NEG_SET) {
805                 printf("NOT ");
806                 val &= ~URX_NEG_SET;
807             }
808             UnicodeSet *set = fStaticSets[val];
809             set->toPattern(s, TRUE);
810             printf("%s", CStr(s)());
811         }
812         break;
813 
814 
815     default:
816         printf("??????");
817         break;
818     }
819     printf("\n");
820 #endif
821 }
822 
823 
dumpPattern() const824 void RegexPattern::dumpPattern() const {
825 #if defined(REGEX_DEBUG)
826     int      index;
827 
828     UnicodeString patStr;
829     for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
830         patStr.append(c);
831     }
832     printf("Original Pattern:  \"%s\"\n", CStr(patStr)());
833     printf("   Min Match Length:  %d\n", fMinMatchLen);
834     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
835     if (fStartType == START_STRING) {
836         UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
837         printf("   Initial match string: \"%s\"\n", CStr(initialString)());
838     } else if (fStartType == START_SET) {
839         UnicodeString s;
840         fInitialChars->toPattern(s, TRUE);
841         printf("    Match First Chars: %s\n", CStr(s)());
842 
843     } else if (fStartType == START_CHAR) {
844         printf("    First char of Match: ");
845         if (fInitialChar > 0x20) {
846                 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
847             } else {
848                 printf("%#x\n", fInitialChar);
849             }
850     }
851 
852     printf("Named Capture Groups:\n");
853     if (!fNamedCaptureMap || uhash_count(fNamedCaptureMap) == 0) {
854         printf("   None\n");
855     } else {
856         int32_t pos = UHASH_FIRST;
857         const UHashElement *el = NULL;
858         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
859             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
860             int32_t number = el->value.integer;
861             printf("   %d\t%s\n", number, CStr(*name)());
862         }
863     }
864 
865     printf("\nIndex   Binary     Type             Operand\n" \
866            "-------------------------------------------\n");
867     for (index = 0; index<fCompiledPat->size(); index++) {
868         dumpOp(index);
869     }
870     printf("\n\n");
871 #endif
872 }
873 
874 
875 
876 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
877 
878 U_NAMESPACE_END
879 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
880