• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 //  file:  repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 *   Copyright (C) 2002-2015 International Business Machines Corporation   *
7 *   and others. All rights reserved.                                      *
8 ***************************************************************************
9 */
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14 
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "uassert.h"
18 #include "uhash.h"
19 #include "uvector.h"
20 #include "uvectr32.h"
21 #include "uvectr64.h"
22 #include "regexcmp.h"
23 #include "regeximp.h"
24 #include "regexst.h"
25 
26 U_NAMESPACE_BEGIN
27 
28 //--------------------------------------------------------------------------
29 //
30 //    RegexPattern    Default Constructor
31 //
32 //--------------------------------------------------------------------------
RegexPattern()33 RegexPattern::RegexPattern() {
34     // Init all of this instances data.
35     init();
36 }
37 
38 
39 //--------------------------------------------------------------------------
40 //
41 //   Copy Constructor        Note:  This is a rather inefficient implementation,
42 //                                  but it probably doesn't matter.
43 //
44 //--------------------------------------------------------------------------
RegexPattern(const RegexPattern & other)45 RegexPattern::RegexPattern(const RegexPattern &other) :  UObject(other) {
46     init();
47     *this = other;
48 }
49 
50 
51 
52 //--------------------------------------------------------------------------
53 //
54 //    Assignment Operator
55 //
56 //--------------------------------------------------------------------------
operator =(const RegexPattern & other)57 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
58     if (this == &other) {
59         // Source and destination are the same.  Don't do anything.
60         return *this;
61     }
62 
63     // Clean out any previous contents of object being assigned to.
64     zap();
65 
66     // Give target object a default initialization
67     init();
68 
69     // Copy simple fields
70     fDeferredStatus   = other.fDeferredStatus;
71 
72     if (U_FAILURE(fDeferredStatus)) {
73         return *this;
74     }
75 
76     if (other.fPatternString == NULL) {
77         fPatternString = NULL;
78         fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
79     } else {
80         fPatternString = new UnicodeString(*(other.fPatternString));
81         if (fPatternString == NULL) {
82             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
83         } else {
84             fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
85         }
86     }
87     if (U_FAILURE(fDeferredStatus)) {
88         return *this;
89     }
90 
91     fFlags            = other.fFlags;
92     fLiteralText      = other.fLiteralText;
93     fMinMatchLen      = other.fMinMatchLen;
94     fFrameSize        = other.fFrameSize;
95     fDataSize         = other.fDataSize;
96     fStaticSets       = other.fStaticSets;
97     fStaticSets8      = other.fStaticSets8;
98 
99     fStartType        = other.fStartType;
100     fInitialStringIdx = other.fInitialStringIdx;
101     fInitialStringLen = other.fInitialStringLen;
102     *fInitialChars    = *other.fInitialChars;
103     fInitialChar      = other.fInitialChar;
104     *fInitialChars8   = *other.fInitialChars8;
105     fNeedsAltInput    = other.fNeedsAltInput;
106 
107     //  Copy the pattern.  It's just values, nothing deep to copy.
108     fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
109     fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
110 
111     //  Copy the Unicode Sets.
112     //    Could be made more efficient if the sets were reference counted and shared,
113     //    but I doubt that pattern copying will be particularly common.
114     //    Note:  init() already added an empty element zero to fSets
115     int32_t i;
116     int32_t  numSets = other.fSets->size();
117     fSets8 = new Regex8BitSet[numSets];
118     if (fSets8 == NULL) {
119     	fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
120     	return *this;
121     }
122     for (i=1; i<numSets; i++) {
123         if (U_FAILURE(fDeferredStatus)) {
124             return *this;
125         }
126         UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
127         UnicodeSet *newSet    = new UnicodeSet(*sourceSet);
128         if (newSet == NULL) {
129             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
130             break;
131         }
132         fSets->addElement(newSet, fDeferredStatus);
133         fSets8[i] = other.fSets8[i];
134     }
135 
136     // Copy the named capture group hash map.
137     int32_t hashPos = UHASH_FIRST;
138     while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
139         if (U_FAILURE(fDeferredStatus)) {
140             break;
141         }
142         const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
143         UnicodeString *key = new UnicodeString(*name);
144         int32_t val = hashEl->value.integer;
145         if (key == NULL) {
146             fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
147         } else {
148             uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
149         }
150     }
151     return *this;
152 }
153 
154 
155 //--------------------------------------------------------------------------
156 //
157 //    init        Shared initialization for use by constructors.
158 //                Bring an uninitialized RegexPattern up to a default state.
159 //
160 //--------------------------------------------------------------------------
init()161 void RegexPattern::init() {
162     fFlags            = 0;
163     fCompiledPat      = 0;
164     fLiteralText.remove();
165     fSets             = NULL;
166     fSets8            = NULL;
167     fDeferredStatus   = U_ZERO_ERROR;
168     fMinMatchLen      = 0;
169     fFrameSize        = 0;
170     fDataSize         = 0;
171     fGroupMap         = NULL;
172     fStaticSets       = NULL;
173     fStaticSets8      = NULL;
174     fStartType        = START_NO_INFO;
175     fInitialStringIdx = 0;
176     fInitialStringLen = 0;
177     fInitialChars     = NULL;
178     fInitialChar      = 0;
179     fInitialChars8    = NULL;
180     fNeedsAltInput    = FALSE;
181     fNamedCaptureMap  = NULL;
182 
183     fPattern          = NULL; // will be set later
184     fPatternString    = NULL; // may be set later
185     fCompiledPat      = new UVector64(fDeferredStatus);
186     fGroupMap         = new UVector32(fDeferredStatus);
187     fSets             = new UVector(fDeferredStatus);
188     fInitialChars     = new UnicodeSet;
189     fInitialChars8    = new Regex8BitSet;
190     fNamedCaptureMap  = uhash_open(uhash_hashUnicodeString,     // Key hash function
191                                    uhash_compareUnicodeString,  // Key comparator function
192                                    uhash_compareLong,           // Value comparator function
193                                    &fDeferredStatus);
194     if (U_FAILURE(fDeferredStatus)) {
195         return;
196     }
197     if (fCompiledPat == NULL  || fGroupMap == NULL || fSets == NULL ||
198             fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
199         fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
200         return;
201     }
202 
203     // Slot zero of the vector of sets is reserved.  Fill it here.
204     fSets->addElement((int32_t)0, fDeferredStatus);
205 
206     // fNamedCaptureMap owns its key strings, type (UnicodeString *)
207     uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
208 }
209 
210 
211 //--------------------------------------------------------------------------
212 //
213 //   zap            Delete everything owned by this RegexPattern.
214 //
215 //--------------------------------------------------------------------------
zap()216 void RegexPattern::zap() {
217     delete fCompiledPat;
218     fCompiledPat = NULL;
219     int i;
220     for (i=1; i<fSets->size(); i++) {
221         UnicodeSet *s;
222         s = (UnicodeSet *)fSets->elementAt(i);
223         if (s != NULL) {
224             delete s;
225         }
226     }
227     delete fSets;
228     fSets = NULL;
229     delete[] fSets8;
230     fSets8 = NULL;
231     delete fGroupMap;
232     fGroupMap = NULL;
233     delete fInitialChars;
234     fInitialChars = NULL;
235     delete fInitialChars8;
236     fInitialChars8 = NULL;
237     if (fPattern != NULL) {
238         utext_close(fPattern);
239         fPattern = NULL;
240     }
241     if (fPatternString != NULL) {
242         delete fPatternString;
243         fPatternString = NULL;
244     }
245     uhash_close(fNamedCaptureMap);
246     fNamedCaptureMap = NULL;
247 }
248 
249 
250 //--------------------------------------------------------------------------
251 //
252 //   Destructor
253 //
254 //--------------------------------------------------------------------------
~RegexPattern()255 RegexPattern::~RegexPattern() {
256     zap();
257 }
258 
259 
260 //--------------------------------------------------------------------------
261 //
262 //   Clone
263 //
264 //--------------------------------------------------------------------------
clone() const265 RegexPattern  *RegexPattern::clone() const {
266     RegexPattern  *copy = new RegexPattern(*this);
267     return copy;
268 }
269 
270 
271 //--------------------------------------------------------------------------
272 //
273 //   operator ==   (comparison)    Consider to patterns to be == if the
274 //                                 pattern strings and the flags are the same.
275 //                                 Note that pattern strings with the same
276 //                                 characters can still be considered different.
277 //
278 //--------------------------------------------------------------------------
operator ==(const RegexPattern & other) const279 UBool   RegexPattern::operator ==(const RegexPattern &other) const {
280     if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
281         if (this->fPatternString != NULL && other.fPatternString != NULL) {
282             return *(this->fPatternString) == *(other.fPatternString);
283         } else if (this->fPattern == NULL) {
284             if (other.fPattern == NULL) {
285                 return TRUE;
286             }
287         } else if (other.fPattern != NULL) {
288             UTEXT_SETNATIVEINDEX(this->fPattern, 0);
289             UTEXT_SETNATIVEINDEX(other.fPattern, 0);
290             return utext_equals(this->fPattern, other.fPattern);
291         }
292     }
293     return FALSE;
294 }
295 
296 //---------------------------------------------------------------------
297 //
298 //   compile
299 //
300 //---------------------------------------------------------------------
301 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UParseError & pe,UErrorCode & status)302 RegexPattern::compile(const UnicodeString &regex,
303                       uint32_t             flags,
304                       UParseError          &pe,
305                       UErrorCode           &status)
306 {
307     if (U_FAILURE(status)) {
308         return NULL;
309     }
310 
311     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
312     UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
313     UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
314 
315     if ((flags & ~allFlags) != 0) {
316         status = U_REGEX_INVALID_FLAG;
317         return NULL;
318     }
319 
320     if ((flags & UREGEX_CANON_EQ) != 0) {
321         status = U_REGEX_UNIMPLEMENTED;
322         return NULL;
323     }
324 
325     RegexPattern *This = new RegexPattern;
326     if (This == NULL) {
327         status = U_MEMORY_ALLOCATION_ERROR;
328         return NULL;
329     }
330     if (U_FAILURE(This->fDeferredStatus)) {
331         status = This->fDeferredStatus;
332         delete This;
333         return NULL;
334     }
335     This->fFlags = flags;
336 
337     RegexCompile     compiler(This, status);
338     compiler.compile(regex, pe, status);
339 
340     if (U_FAILURE(status)) {
341         delete This;
342         This = NULL;
343     }
344 
345     return This;
346 }
347 
348 
349 //
350 //   compile, UText mode
351 //
352 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UParseError & pe,UErrorCode & status)353 RegexPattern::compile(UText                *regex,
354                       uint32_t             flags,
355                       UParseError          &pe,
356                       UErrorCode           &status)
357 {
358     if (U_FAILURE(status)) {
359         return NULL;
360     }
361 
362     const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
363                               UREGEX_DOTALL   | UREGEX_MULTILINE        | UREGEX_UWORD |
364                               UREGEX_ERROR_ON_UNKNOWN_ESCAPES           | UREGEX_UNIX_LINES | UREGEX_LITERAL;
365 
366     if ((flags & ~allFlags) != 0) {
367         status = U_REGEX_INVALID_FLAG;
368         return NULL;
369     }
370 
371     if ((flags & UREGEX_CANON_EQ) != 0) {
372         status = U_REGEX_UNIMPLEMENTED;
373         return NULL;
374     }
375 
376     RegexPattern *This = new RegexPattern;
377     if (This == NULL) {
378         status = U_MEMORY_ALLOCATION_ERROR;
379         return NULL;
380     }
381     if (U_FAILURE(This->fDeferredStatus)) {
382         status = This->fDeferredStatus;
383         delete This;
384         return NULL;
385     }
386     This->fFlags = flags;
387 
388     RegexCompile     compiler(This, status);
389     compiler.compile(regex, pe, status);
390 
391     if (U_FAILURE(status)) {
392         delete This;
393         This = NULL;
394     }
395 
396     return This;
397 }
398 
399 //
400 //   compile with default flags.
401 //
402 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,UParseError & pe,UErrorCode & err)403 RegexPattern::compile(const UnicodeString &regex,
404                       UParseError         &pe,
405                       UErrorCode          &err)
406 {
407     return compile(regex, 0, pe, err);
408 }
409 
410 
411 //
412 //   compile with default flags, UText mode
413 //
414 RegexPattern * U_EXPORT2
compile(UText * regex,UParseError & pe,UErrorCode & err)415 RegexPattern::compile(UText               *regex,
416                       UParseError         &pe,
417                       UErrorCode          &err)
418 {
419     return compile(regex, 0, pe, err);
420 }
421 
422 
423 //
424 //   compile with no UParseErr parameter.
425 //
426 RegexPattern * U_EXPORT2
compile(const UnicodeString & regex,uint32_t flags,UErrorCode & err)427 RegexPattern::compile(const UnicodeString &regex,
428                       uint32_t             flags,
429                       UErrorCode          &err)
430 {
431     UParseError pe;
432     return compile(regex, flags, pe, err);
433 }
434 
435 
436 //
437 //   compile with no UParseErr parameter, UText mode
438 //
439 RegexPattern * U_EXPORT2
compile(UText * regex,uint32_t flags,UErrorCode & err)440 RegexPattern::compile(UText                *regex,
441                       uint32_t             flags,
442                       UErrorCode           &err)
443 {
444     UParseError pe;
445     return compile(regex, flags, pe, err);
446 }
447 
448 
449 //---------------------------------------------------------------------
450 //
451 //   flags
452 //
453 //---------------------------------------------------------------------
flags() const454 uint32_t RegexPattern::flags() const {
455     return fFlags;
456 }
457 
458 
459 //---------------------------------------------------------------------
460 //
461 //   matcher(UnicodeString, err)
462 //
463 //---------------------------------------------------------------------
matcher(const UnicodeString & input,UErrorCode & status) const464 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
465                                     UErrorCode          &status)  const {
466     RegexMatcher    *retMatcher = matcher(status);
467     if (retMatcher != NULL) {
468         retMatcher->fDeferredStatus = status;
469         retMatcher->reset(input);
470     }
471     return retMatcher;
472 }
473 
474 
475 //---------------------------------------------------------------------
476 //
477 //   matcher(status)
478 //
479 //---------------------------------------------------------------------
matcher(UErrorCode & status) const480 RegexMatcher *RegexPattern::matcher(UErrorCode &status)  const {
481     RegexMatcher    *retMatcher = NULL;
482 
483     if (U_FAILURE(status)) {
484         return NULL;
485     }
486     if (U_FAILURE(fDeferredStatus)) {
487         status = fDeferredStatus;
488         return NULL;
489     }
490 
491     retMatcher = new RegexMatcher(this);
492     if (retMatcher == NULL) {
493         status = U_MEMORY_ALLOCATION_ERROR;
494         return NULL;
495     }
496     return retMatcher;
497 }
498 
499 
500 
501 //---------------------------------------------------------------------
502 //
503 //   matches        Convenience function to test for a match, starting
504 //                  with a pattern string and a data string.
505 //
506 //---------------------------------------------------------------------
matches(const UnicodeString & regex,const UnicodeString & input,UParseError & pe,UErrorCode & status)507 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString   &regex,
508               const UnicodeString   &input,
509                     UParseError     &pe,
510                     UErrorCode      &status) {
511 
512     if (U_FAILURE(status)) {return FALSE;}
513 
514     UBool         retVal;
515     RegexPattern *pat     = NULL;
516     RegexMatcher *matcher = NULL;
517 
518     pat     = RegexPattern::compile(regex, 0, pe, status);
519     matcher = pat->matcher(input, status);
520     retVal  = matcher->matches(status);
521 
522     delete matcher;
523     delete pat;
524     return retVal;
525 }
526 
527 
528 //
529 //   matches, UText mode
530 //
matches(UText * regex,UText * input,UParseError & pe,UErrorCode & status)531 UBool U_EXPORT2 RegexPattern::matches(UText                *regex,
532                     UText           *input,
533                     UParseError     &pe,
534                     UErrorCode      &status) {
535 
536     if (U_FAILURE(status)) {return FALSE;}
537 
538     UBool         retVal  = FALSE;
539     RegexPattern *pat     = NULL;
540     RegexMatcher *matcher = NULL;
541 
542     pat     = RegexPattern::compile(regex, 0, pe, status);
543     matcher = pat->matcher(status);
544     if (U_SUCCESS(status)) {
545         matcher->reset(input);
546         retVal  = matcher->matches(status);
547     }
548 
549     delete matcher;
550     delete pat;
551     return retVal;
552 }
553 
554 
555 
556 
557 
558 //---------------------------------------------------------------------
559 //
560 //   pattern
561 //
562 //---------------------------------------------------------------------
pattern() const563 UnicodeString RegexPattern::pattern() const {
564     if (fPatternString != NULL) {
565         return *fPatternString;
566     } else if (fPattern == NULL) {
567         return UnicodeString();
568     } else {
569         UErrorCode status = U_ZERO_ERROR;
570         int64_t nativeLen = utext_nativeLength(fPattern);
571         int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
572         UnicodeString result;
573 
574         status = U_ZERO_ERROR;
575         UChar *resultChars = result.getBuffer(len16);
576         utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
577         result.releaseBuffer(len16);
578 
579         return result;
580     }
581 }
582 
583 
584 
585 
586 //---------------------------------------------------------------------
587 //
588 //   patternText
589 //
590 //---------------------------------------------------------------------
patternText(UErrorCode & status) const591 UText *RegexPattern::patternText(UErrorCode      &status) const {
592     if (U_FAILURE(status)) {return NULL;}
593     status = U_ZERO_ERROR;
594 
595     if (fPattern != NULL) {
596         return fPattern;
597     } else {
598         RegexStaticSets::initGlobals(&status);
599         return RegexStaticSets::gStaticSets->fEmptyText;
600     }
601 }
602 
603 
604 //--------------------------------------------------------------------------------
605 //
606 //  groupNumberFromName()
607 //
608 //--------------------------------------------------------------------------------
groupNumberFromName(const UnicodeString & groupName,UErrorCode & status) const609 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
610     if (U_FAILURE(status)) {
611         return 0;
612     }
613 
614     // No need to explicitly check for syntactically valid names.
615     // Invalid ones will never be in the map, and the lookup will fail.
616 
617     int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
618     if (number == 0) {
619         status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
620     }
621     return number;
622 }
623 
groupNumberFromName(const char * groupName,int32_t nameLength,UErrorCode & status) const624 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
625     if (U_FAILURE(status)) {
626         return 0;
627     }
628     UnicodeString name(groupName, nameLength, US_INV);
629     return groupNumberFromName(name, status);
630 }
631 
632 
633 //---------------------------------------------------------------------
634 //
635 //   split
636 //
637 //---------------------------------------------------------------------
split(const UnicodeString & input,UnicodeString dest[],int32_t destCapacity,UErrorCode & status) const638 int32_t  RegexPattern::split(const UnicodeString &input,
639         UnicodeString    dest[],
640         int32_t          destCapacity,
641         UErrorCode      &status) const
642 {
643     if (U_FAILURE(status)) {
644         return 0;
645     };
646 
647     RegexMatcher  m(this);
648     int32_t r = 0;
649     // Check m's status to make sure all is ok.
650     if (U_SUCCESS(m.fDeferredStatus)) {
651     	r = m.split(input, dest, destCapacity, status);
652     }
653     return r;
654 }
655 
656 //
657 //   split, UText mode
658 //
split(UText * input,UText * dest[],int32_t destCapacity,UErrorCode & status) const659 int32_t  RegexPattern::split(UText *input,
660         UText           *dest[],
661         int32_t          destCapacity,
662         UErrorCode      &status) const
663 {
664     if (U_FAILURE(status)) {
665         return 0;
666     };
667 
668     RegexMatcher  m(this);
669     int32_t r = 0;
670     // Check m's status to make sure all is ok.
671     if (U_SUCCESS(m.fDeferredStatus)) {
672     	r = m.split(input, dest, destCapacity, status);
673     }
674     return r;
675 }
676 
677 
678 
679 //---------------------------------------------------------------------
680 //
681 //   dump    Output the compiled form of the pattern.
682 //           Debugging function only.
683 //
684 //---------------------------------------------------------------------
dumpOp(int32_t index) const685 void   RegexPattern::dumpOp(int32_t index) const {
686     (void)index;  // Suppress warnings in non-debug build.
687 #if defined(REGEX_DEBUG)
688     static const char * const opNames[] = {URX_OPCODE_NAMES};
689     int32_t op          = fCompiledPat->elementAti(index);
690     int32_t val         = URX_VAL(op);
691     int32_t type        = URX_TYPE(op);
692     int32_t pinnedType  = type;
693     if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
694         pinnedType = 0;
695     }
696 
697     printf("%4d   %08x    %-15s  ", index, op, opNames[pinnedType]);
698     switch (type) {
699     case URX_NOP:
700     case URX_DOTANY:
701     case URX_DOTANY_ALL:
702     case URX_FAIL:
703     case URX_CARET:
704     case URX_DOLLAR:
705     case URX_BACKSLASH_G:
706     case URX_BACKSLASH_X:
707     case URX_END:
708     case URX_DOLLAR_M:
709     case URX_CARET_M:
710         // Types with no operand field of interest.
711         break;
712 
713     case URX_RESERVED_OP:
714     case URX_START_CAPTURE:
715     case URX_END_CAPTURE:
716     case URX_STATE_SAVE:
717     case URX_JMP:
718     case URX_JMP_SAV:
719     case URX_JMP_SAV_X:
720     case URX_BACKSLASH_B:
721     case URX_BACKSLASH_BU:
722     case URX_BACKSLASH_D:
723     case URX_BACKSLASH_Z:
724     case URX_STRING_LEN:
725     case URX_CTR_INIT:
726     case URX_CTR_INIT_NG:
727     case URX_CTR_LOOP:
728     case URX_CTR_LOOP_NG:
729     case URX_RELOC_OPRND:
730     case URX_STO_SP:
731     case URX_LD_SP:
732     case URX_BACKREF:
733     case URX_STO_INP_LOC:
734     case URX_JMPX:
735     case URX_LA_START:
736     case URX_LA_END:
737     case URX_BACKREF_I:
738     case URX_LB_START:
739     case URX_LB_CONT:
740     case URX_LB_END:
741     case URX_LBN_CONT:
742     case URX_LBN_END:
743     case URX_LOOP_C:
744     case URX_LOOP_DOT_I:
745     case URX_BACKSLASH_H:
746     case URX_BACKSLASH_R:
747     case URX_BACKSLASH_V:
748         // types with an integer operand field.
749         printf("%d", val);
750         break;
751 
752     case URX_ONECHAR:
753     case URX_ONECHAR_I:
754         printf("%c", val<256?val:'?');
755         break;
756 
757     case URX_STRING:
758     case URX_STRING_I:
759         {
760             int32_t lengthOp       = fCompiledPat->elementAti(index+1);
761             U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
762             int32_t length = URX_VAL(lengthOp);
763             int32_t i;
764             for (i=val; i<val+length; i++) {
765                 UChar c = fLiteralText[i];
766                 if (c < 32 || c >= 256) {c = '.';}
767                 printf("%c", c);
768             }
769         }
770         break;
771 
772     case URX_SETREF:
773     case URX_LOOP_SR_I:
774         {
775             UnicodeString s;
776             UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
777             set->toPattern(s, TRUE);
778             for (int32_t i=0; i<s.length(); i++) {
779                 printf("%c", s.charAt(i));
780             }
781         }
782         break;
783 
784     case URX_STATIC_SETREF:
785     case URX_STAT_SETREF_N:
786         {
787             UnicodeString s;
788             if (val & URX_NEG_SET) {
789                 printf("NOT ");
790                 val &= ~URX_NEG_SET;
791             }
792             UnicodeSet *set = fStaticSets[val];
793             set->toPattern(s, TRUE);
794             for (int32_t i=0; i<s.length(); i++) {
795                 printf("%c", s.charAt(i));
796             }
797         }
798         break;
799 
800 
801     default:
802         printf("??????");
803         break;
804     }
805     printf("\n");
806 #endif
807 }
808 
809 
dumpPattern() const810 void RegexPattern::dumpPattern() const {
811 #if defined(REGEX_DEBUG)
812     // TODO: This function assumes an ASCII based charset.
813     int      index;
814     int      i;
815 
816     printf("Original Pattern:  ");
817     UChar32 c = utext_next32From(fPattern, 0);
818     while (c != U_SENTINEL) {
819         if (c<32 || c>256) {
820             c = '.';
821         }
822         printf("%c", c);
823 
824         c = UTEXT_NEXT32(fPattern);
825     }
826     printf("\n");
827     printf("   Min Match Length:  %d\n", fMinMatchLen);
828     printf("   Match Start Type:  %s\n", START_OF_MATCH_STR(fStartType));
829     if (fStartType == START_STRING) {
830         printf("    Initial match string: \"");
831         for (i=fInitialStringIdx; i<fInitialStringIdx+fInitialStringLen; i++) {
832             printf("%c", fLiteralText[i]);   // TODO:  non-printables, surrogates.
833         }
834         printf("\"\n");
835 
836     } else if (fStartType == START_SET) {
837         int32_t numSetChars = fInitialChars->size();
838         if (numSetChars > 20) {
839             numSetChars = 20;
840         }
841         printf("     Match First Chars : ");
842         for (i=0; i<numSetChars; i++) {
843             UChar32 c = fInitialChars->charAt(i);
844             if (0x20<c && c <0x7e) {
845                 printf("%c ", c);
846             } else {
847                 printf("%#x ", c);
848             }
849         }
850         if (numSetChars < fInitialChars->size()) {
851             printf(" ...");
852         }
853         printf("\n");
854 
855     } else if (fStartType == START_CHAR) {
856         printf("    First char of Match : ");
857         if (0x20 < fInitialChar && fInitialChar<0x7e) {
858                 printf("%c\n", fInitialChar);
859             } else {
860                 printf("%#x\n", fInitialChar);
861             }
862     }
863 
864     printf("Named Capture Groups:\n");
865     if (uhash_count(fNamedCaptureMap) == 0) {
866         printf("   None\n");
867     } else {
868         int32_t pos = UHASH_FIRST;
869         const UHashElement *el = NULL;
870         while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
871             const UnicodeString *name = (const UnicodeString *)el->key.pointer;
872             char s[100];
873             name->extract(0, 99, s, sizeof(s), US_INV);  // capture group names are invariant.
874             int32_t number = el->value.integer;
875             printf("   %d\t%s\n", number, s);
876         }
877     }
878 
879     printf("\nIndex   Binary     Type             Operand\n" \
880            "-------------------------------------------\n");
881     for (index = 0; index<fCompiledPat->size(); index++) {
882         dumpOp(index);
883     }
884     printf("\n\n");
885 #endif
886 }
887 
888 
889 
890 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
891 
892 U_NAMESPACE_END
893 #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS
894