• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Rewrite POSIX and other features in re
6 // to use simple extended regular expression features.
7 // Also sort and simplify character classes.
8 
9 #include <stddef.h>
10 
11 #include <algorithm>
12 #include <string>
13 
14 #include "absl/log/absl_log.h"
15 #include "absl/strings/string_view.h"
16 #include "re2/pod_array.h"
17 #include "re2/regexp.h"
18 #include "re2/walker-inl.h"
19 #include "util/utf.h"
20 
21 namespace re2 {
22 
23 // Parses the regexp src and then simplifies it and sets *dst to the
24 // string representation of the simplified form.  Returns true on success.
25 // Returns false and sets *error (if error != NULL) on error.
SimplifyRegexp(absl::string_view src,ParseFlags flags,std::string * dst,RegexpStatus * status)26 bool Regexp::SimplifyRegexp(absl::string_view src, ParseFlags flags,
27                             std::string* dst, RegexpStatus* status) {
28   Regexp* re = Parse(src, flags, status);
29   if (re == NULL)
30     return false;
31   Regexp* sre = re->Simplify();
32   re->Decref();
33   if (sre == NULL) {
34     if (status) {
35       status->set_code(kRegexpInternalError);
36       status->set_error_arg(src);
37     }
38     return false;
39   }
40   *dst = sre->ToString();
41   sre->Decref();
42   return true;
43 }
44 
45 // Assuming the simple_ flags on the children are accurate,
46 // is this Regexp* simple?
ComputeSimple()47 bool Regexp::ComputeSimple() {
48   Regexp** subs;
49   switch (op_) {
50     case kRegexpNoMatch:
51     case kRegexpEmptyMatch:
52     case kRegexpLiteral:
53     case kRegexpLiteralString:
54     case kRegexpBeginLine:
55     case kRegexpEndLine:
56     case kRegexpBeginText:
57     case kRegexpWordBoundary:
58     case kRegexpNoWordBoundary:
59     case kRegexpEndText:
60     case kRegexpAnyChar:
61     case kRegexpAnyByte:
62     case kRegexpHaveMatch:
63       return true;
64     case kRegexpConcat:
65     case kRegexpAlternate:
66       // These are simple as long as the subpieces are simple.
67       subs = sub();
68       for (int i = 0; i < nsub_; i++)
69         if (!subs[i]->simple())
70           return false;
71       return true;
72     case kRegexpCharClass:
73       // Simple as long as the char class is not empty, not full.
74       if (ccb_ != NULL)
75         return !ccb_->empty() && !ccb_->full();
76       return !cc_->empty() && !cc_->full();
77     case kRegexpCapture:
78       subs = sub();
79       return subs[0]->simple();
80     case kRegexpStar:
81     case kRegexpPlus:
82     case kRegexpQuest:
83       subs = sub();
84       if (!subs[0]->simple())
85         return false;
86       switch (subs[0]->op_) {
87         case kRegexpStar:
88         case kRegexpPlus:
89         case kRegexpQuest:
90         case kRegexpEmptyMatch:
91         case kRegexpNoMatch:
92           return false;
93         default:
94           break;
95       }
96       return true;
97     case kRegexpRepeat:
98       return false;
99   }
100   ABSL_LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
101   return false;
102 }
103 
104 // Walker subclass used by Simplify.
105 // Coalesces runs of star/plus/quest/repeat of the same literal along with any
106 // occurrences of that literal into repeats of that literal. It also works for
107 // char classes, any char and any byte.
108 // PostVisit creates the coalesced result, which should then be simplified.
109 class CoalesceWalker : public Regexp::Walker<Regexp*> {
110  public:
CoalesceWalker()111   CoalesceWalker() {}
112   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
113                             Regexp** child_args, int nchild_args);
114   virtual Regexp* Copy(Regexp* re);
115   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
116 
117  private:
118   // These functions are declared inside CoalesceWalker so that
119   // they can edit the private fields of the Regexps they construct.
120 
121   // Returns true if r1 and r2 can be coalesced. In particular, ensures that
122   // the parse flags are consistent. (They will not be checked again later.)
123   static bool CanCoalesce(Regexp* r1, Regexp* r2);
124 
125   // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
126   // will be empty match and the coalesced op. In other cases, where part of a
127   // literal string was removed to be coalesced, the array elements afterwards
128   // will be the coalesced op and the remainder of the literal string.
129   static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
130 
131   CoalesceWalker(const CoalesceWalker&) = delete;
132   CoalesceWalker& operator=(const CoalesceWalker&) = delete;
133 };
134 
135 // Walker subclass used by Simplify.
136 // The simplify walk is purely post-recursive: given the simplified children,
137 // PostVisit creates the simplified result.
138 // The child_args are simplified Regexp*s.
139 class SimplifyWalker : public Regexp::Walker<Regexp*> {
140  public:
SimplifyWalker()141   SimplifyWalker() {}
142   virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
143   virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
144                             Regexp** child_args, int nchild_args);
145   virtual Regexp* Copy(Regexp* re);
146   virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
147 
148  private:
149   // These functions are declared inside SimplifyWalker so that
150   // they can edit the private fields of the Regexps they construct.
151 
152   // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
153   // Caller must Decref return value when done with it.
154   static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
155 
156   // Simplifies the expression re{min,max} in terms of *, +, and ?.
157   // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
158   // Caller must Decref return value when done with it.
159   static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
160                                 Regexp::ParseFlags parse_flags);
161 
162   // Simplifies a character class by expanding any named classes
163   // into rune ranges.  Does not edit re.  Does not consume ref to re.
164   // Caller must Decref return value when done with it.
165   static Regexp* SimplifyCharClass(Regexp* re);
166 
167   SimplifyWalker(const SimplifyWalker&) = delete;
168   SimplifyWalker& operator=(const SimplifyWalker&) = delete;
169 };
170 
171 // Simplifies a regular expression, returning a new regexp.
172 // The new regexp uses traditional Unix egrep features only,
173 // plus the Perl (?:) non-capturing parentheses.
174 // Otherwise, no POSIX or Perl additions.  The new regexp
175 // captures exactly the same subexpressions (with the same indices)
176 // as the original.
177 // Does not edit current object.
178 // Caller must Decref() return value when done with it.
179 
Simplify()180 Regexp* Regexp::Simplify() {
181   CoalesceWalker cw;
182   Regexp* cre = cw.Walk(this, NULL);
183   if (cre == NULL)
184     return NULL;
185   if (cw.stopped_early()) {
186     cre->Decref();
187     return NULL;
188   }
189   SimplifyWalker sw;
190   Regexp* sre = sw.Walk(cre, NULL);
191   cre->Decref();
192   if (sre == NULL)
193     return NULL;
194   if (sw.stopped_early()) {
195     sre->Decref();
196     return NULL;
197   }
198   return sre;
199 }
200 
201 #define Simplify DontCallSimplify  // Avoid accidental recursion
202 
203 // Utility function for PostVisit implementations that compares re->sub() with
204 // child_args to determine whether any child_args changed. In the common case,
205 // where nothing changed, calls Decref() for all child_args and returns false,
206 // so PostVisit must return re->Incref(). Otherwise, returns true.
ChildArgsChanged(Regexp * re,Regexp ** child_args)207 static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
208   for (int i = 0; i < re->nsub(); i++) {
209     Regexp* sub = re->sub()[i];
210     Regexp* newsub = child_args[i];
211     if (newsub != sub)
212       return true;
213   }
214   for (int i = 0; i < re->nsub(); i++) {
215     Regexp* newsub = child_args[i];
216     newsub->Decref();
217   }
218   return false;
219 }
220 
Copy(Regexp * re)221 Regexp* CoalesceWalker::Copy(Regexp* re) {
222   return re->Incref();
223 }
224 
ShortVisit(Regexp * re,Regexp * parent_arg)225 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
226   // Should never be called: we use Walk(), not WalkExponential().
227 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
228   ABSL_LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
229 #endif
230   return re->Incref();
231 }
232 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)233 Regexp* CoalesceWalker::PostVisit(Regexp* re,
234                                   Regexp* parent_arg,
235                                   Regexp* pre_arg,
236                                   Regexp** child_args,
237                                   int nchild_args) {
238   if (re->nsub() == 0)
239     return re->Incref();
240 
241   if (re->op() != kRegexpConcat) {
242     if (!ChildArgsChanged(re, child_args))
243       return re->Incref();
244 
245     // Something changed. Build a new op.
246     Regexp* nre = new Regexp(re->op(), re->parse_flags());
247     nre->AllocSub(re->nsub());
248     Regexp** nre_subs = nre->sub();
249     for (int i = 0; i < re->nsub(); i++)
250       nre_subs[i] = child_args[i];
251     // Repeats and Captures have additional data that must be copied.
252     if (re->op() == kRegexpRepeat) {
253       nre->min_ = re->min();
254       nre->max_ = re->max();
255     } else if (re->op() == kRegexpCapture) {
256       nre->cap_ = re->cap();
257     }
258     return nre;
259   }
260 
261   bool can_coalesce = false;
262   for (int i = 0; i < re->nsub(); i++) {
263     if (i+1 < re->nsub() &&
264         CanCoalesce(child_args[i], child_args[i+1])) {
265       can_coalesce = true;
266       break;
267     }
268   }
269   if (!can_coalesce) {
270     if (!ChildArgsChanged(re, child_args))
271       return re->Incref();
272 
273     // Something changed. Build a new op.
274     Regexp* nre = new Regexp(re->op(), re->parse_flags());
275     nre->AllocSub(re->nsub());
276     Regexp** nre_subs = nre->sub();
277     for (int i = 0; i < re->nsub(); i++)
278       nre_subs[i] = child_args[i];
279     return nre;
280   }
281 
282   for (int i = 0; i < re->nsub(); i++) {
283     if (i+1 < re->nsub() &&
284         CanCoalesce(child_args[i], child_args[i+1]))
285       DoCoalesce(&child_args[i], &child_args[i+1]);
286   }
287   // Determine how many empty matches were left by DoCoalesce.
288   int n = 0;
289   for (int i = n; i < re->nsub(); i++) {
290     if (child_args[i]->op() == kRegexpEmptyMatch)
291       n++;
292   }
293   // Build a new op.
294   Regexp* nre = new Regexp(re->op(), re->parse_flags());
295   nre->AllocSub(re->nsub() - n);
296   Regexp** nre_subs = nre->sub();
297   for (int i = 0, j = 0; i < re->nsub(); i++) {
298     if (child_args[i]->op() == kRegexpEmptyMatch) {
299       child_args[i]->Decref();
300       continue;
301     }
302     nre_subs[j] = child_args[i];
303     j++;
304   }
305   return nre;
306 }
307 
CanCoalesce(Regexp * r1,Regexp * r2)308 bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
309   // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
310   // any byte.
311   if ((r1->op() == kRegexpStar ||
312        r1->op() == kRegexpPlus ||
313        r1->op() == kRegexpQuest ||
314        r1->op() == kRegexpRepeat) &&
315       (r1->sub()[0]->op() == kRegexpLiteral ||
316        r1->sub()[0]->op() == kRegexpCharClass ||
317        r1->sub()[0]->op() == kRegexpAnyChar ||
318        r1->sub()[0]->op() == kRegexpAnyByte)) {
319     // r2 must be a star/plus/quest/repeat of the same literal, char class,
320     // any char or any byte.
321     if ((r2->op() == kRegexpStar ||
322          r2->op() == kRegexpPlus ||
323          r2->op() == kRegexpQuest ||
324          r2->op() == kRegexpRepeat) &&
325         Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
326         // The parse flags must be consistent.
327         ((r1->parse_flags() & Regexp::NonGreedy) ==
328          (r2->parse_flags() & Regexp::NonGreedy))) {
329       return true;
330     }
331     // ... OR an occurrence of that literal, char class, any char or any byte
332     if (Regexp::Equal(r1->sub()[0], r2)) {
333       return true;
334     }
335     // ... OR a literal string that begins with that literal.
336     if (r1->sub()[0]->op() == kRegexpLiteral &&
337         r2->op() == kRegexpLiteralString &&
338         r2->runes()[0] == r1->sub()[0]->rune() &&
339         // The parse flags must be consistent.
340         ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
341          (r2->parse_flags() & Regexp::FoldCase))) {
342       return true;
343     }
344   }
345   return false;
346 }
347 
DoCoalesce(Regexp ** r1ptr,Regexp ** r2ptr)348 void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
349   Regexp* r1 = *r1ptr;
350   Regexp* r2 = *r2ptr;
351 
352   Regexp* nre = Regexp::Repeat(
353       r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
354 
355   switch (r1->op()) {
356     case kRegexpStar:
357       nre->min_ = 0;
358       nre->max_ = -1;
359       break;
360 
361     case kRegexpPlus:
362       nre->min_ = 1;
363       nre->max_ = -1;
364       break;
365 
366     case kRegexpQuest:
367       nre->min_ = 0;
368       nre->max_ = 1;
369       break;
370 
371     case kRegexpRepeat:
372       nre->min_ = r1->min();
373       nre->max_ = r1->max();
374       break;
375 
376     default:
377       nre->Decref();
378       ABSL_LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
379       return;
380   }
381 
382   switch (r2->op()) {
383     case kRegexpStar:
384       nre->max_ = -1;
385       goto LeaveEmpty;
386 
387     case kRegexpPlus:
388       nre->min_++;
389       nre->max_ = -1;
390       goto LeaveEmpty;
391 
392     case kRegexpQuest:
393       if (nre->max() != -1)
394         nre->max_++;
395       goto LeaveEmpty;
396 
397     case kRegexpRepeat:
398       nre->min_ += r2->min();
399       if (r2->max() == -1)
400         nre->max_ = -1;
401       else if (nre->max() != -1)
402         nre->max_ += r2->max();
403       goto LeaveEmpty;
404 
405     case kRegexpLiteral:
406     case kRegexpCharClass:
407     case kRegexpAnyChar:
408     case kRegexpAnyByte:
409       nre->min_++;
410       if (nre->max() != -1)
411         nre->max_++;
412       goto LeaveEmpty;
413 
414     LeaveEmpty:
415       *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
416       *r2ptr = nre;
417       break;
418 
419     case kRegexpLiteralString: {
420       Rune r = r1->sub()[0]->rune();
421       // Determine how much of the literal string is removed.
422       // We know that we have at least one rune. :)
423       int n = 1;
424       while (n < r2->nrunes() && r2->runes()[n] == r)
425         n++;
426       nre->min_ += n;
427       if (nre->max() != -1)
428         nre->max_ += n;
429       if (n == r2->nrunes())
430         goto LeaveEmpty;
431       *r1ptr = nre;
432       *r2ptr = Regexp::LiteralString(
433           &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
434       break;
435     }
436 
437     default:
438       nre->Decref();
439       ABSL_LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
440       return;
441   }
442 
443   r1->Decref();
444   r2->Decref();
445 }
446 
Copy(Regexp * re)447 Regexp* SimplifyWalker::Copy(Regexp* re) {
448   return re->Incref();
449 }
450 
ShortVisit(Regexp * re,Regexp * parent_arg)451 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
452   // Should never be called: we use Walk(), not WalkExponential().
453 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
454   ABSL_LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
455 #endif
456   return re->Incref();
457 }
458 
PreVisit(Regexp * re,Regexp * parent_arg,bool * stop)459 Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
460   if (re->simple()) {
461     *stop = true;
462     return re->Incref();
463   }
464   return NULL;
465 }
466 
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)467 Regexp* SimplifyWalker::PostVisit(Regexp* re,
468                                   Regexp* parent_arg,
469                                   Regexp* pre_arg,
470                                   Regexp** child_args,
471                                   int nchild_args) {
472   switch (re->op()) {
473     case kRegexpNoMatch:
474     case kRegexpEmptyMatch:
475     case kRegexpLiteral:
476     case kRegexpLiteralString:
477     case kRegexpBeginLine:
478     case kRegexpEndLine:
479     case kRegexpBeginText:
480     case kRegexpWordBoundary:
481     case kRegexpNoWordBoundary:
482     case kRegexpEndText:
483     case kRegexpAnyChar:
484     case kRegexpAnyByte:
485     case kRegexpHaveMatch:
486       // All these are always simple.
487       re->simple_ = true;
488       return re->Incref();
489 
490     case kRegexpConcat:
491     case kRegexpAlternate: {
492       // These are simple as long as the subpieces are simple.
493       if (!ChildArgsChanged(re, child_args)) {
494         re->simple_ = true;
495         return re->Incref();
496       }
497       Regexp* nre = new Regexp(re->op(), re->parse_flags());
498       nre->AllocSub(re->nsub());
499       Regexp** nre_subs = nre->sub();
500       for (int i = 0; i < re->nsub(); i++)
501         nre_subs[i] = child_args[i];
502       nre->simple_ = true;
503       return nre;
504     }
505 
506     case kRegexpCapture: {
507       Regexp* newsub = child_args[0];
508       if (newsub == re->sub()[0]) {
509         newsub->Decref();
510         re->simple_ = true;
511         return re->Incref();
512       }
513       Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
514       nre->AllocSub(1);
515       nre->sub()[0] = newsub;
516       nre->cap_ = re->cap();
517       nre->simple_ = true;
518       return nre;
519     }
520 
521     case kRegexpStar:
522     case kRegexpPlus:
523     case kRegexpQuest: {
524       Regexp* newsub = child_args[0];
525       // Special case: repeat the empty string as much as
526       // you want, but it's still the empty string.
527       if (newsub->op() == kRegexpEmptyMatch)
528         return newsub;
529 
530       // These are simple as long as the subpiece is simple.
531       if (newsub == re->sub()[0]) {
532         newsub->Decref();
533         re->simple_ = true;
534         return re->Incref();
535       }
536 
537       // These are also idempotent if flags are constant.
538       if (re->op() == newsub->op() &&
539           re->parse_flags() == newsub->parse_flags())
540         return newsub;
541 
542       Regexp* nre = new Regexp(re->op(), re->parse_flags());
543       nre->AllocSub(1);
544       nre->sub()[0] = newsub;
545       nre->simple_ = true;
546       return nre;
547     }
548 
549     case kRegexpRepeat: {
550       Regexp* newsub = child_args[0];
551       // Special case: repeat the empty string as much as
552       // you want, but it's still the empty string.
553       if (newsub->op() == kRegexpEmptyMatch)
554         return newsub;
555 
556       Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
557                                    re->parse_flags());
558       newsub->Decref();
559       nre->simple_ = true;
560       return nre;
561     }
562 
563     case kRegexpCharClass: {
564       Regexp* nre = SimplifyCharClass(re);
565       nre->simple_ = true;
566       return nre;
567     }
568   }
569 
570   ABSL_LOG(ERROR) << "Simplify case not handled: " << re->op();
571   return re->Incref();
572 }
573 
574 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
575 // Returns a new Regexp, handing the ref to the caller.
Concat2(Regexp * re1,Regexp * re2,Regexp::ParseFlags parse_flags)576 Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
577                                 Regexp::ParseFlags parse_flags) {
578   Regexp* re = new Regexp(kRegexpConcat, parse_flags);
579   re->AllocSub(2);
580   Regexp** subs = re->sub();
581   subs[0] = re1;
582   subs[1] = re2;
583   return re;
584 }
585 
586 // Returns true if re is an empty-width op.
IsEmptyOp(Regexp * re)587 static bool IsEmptyOp(Regexp* re) {
588   return (re->op() == kRegexpBeginLine ||
589           re->op() == kRegexpEndLine ||
590           re->op() == kRegexpWordBoundary ||
591           re->op() == kRegexpNoWordBoundary ||
592           re->op() == kRegexpBeginText ||
593           re->op() == kRegexpEndText);
594 }
595 
596 // Simplifies the expression re{min,max} in terms of *, +, and ?.
597 // Returns a new regexp.  Does not edit re.  Does not consume reference to re.
598 // Caller must Decref return value when done with it.
599 // The result will *not* necessarily have the right capturing parens
600 // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
601 // but in the Regexp* representation, both (x) are marked as $1.
SimplifyRepeat(Regexp * re,int min,int max,Regexp::ParseFlags f)602 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
603                                        Regexp::ParseFlags f) {
604   // For an empty-width op OR a concatenation or alternation of empty-width
605   // ops, cap the repetition count at 1.
606   if (IsEmptyOp(re) ||
607       ((re->op() == kRegexpConcat ||
608         re->op() == kRegexpAlternate) &&
609        std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) {
610     min = std::min(min, 1);
611     max = std::min(max, 1);
612   }
613 
614   // x{n,} means at least n matches of x.
615   if (max == -1) {
616     // Special case: x{0,} is x*
617     if (min == 0)
618       return Regexp::Star(re->Incref(), f);
619 
620     // Special case: x{1,} is x+
621     if (min == 1)
622       return Regexp::Plus(re->Incref(), f);
623 
624     // General case: x{4,} is xxxx+
625     PODArray<Regexp*> nre_subs(min);
626     for (int i = 0; i < min-1; i++)
627       nre_subs[i] = re->Incref();
628     nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
629     return Regexp::Concat(nre_subs.data(), min, f);
630   }
631 
632   // Special case: (x){0} matches only empty string.
633   if (min == 0 && max == 0)
634     return new Regexp(kRegexpEmptyMatch, f);
635 
636   // Special case: x{1} is just x.
637   if (min == 1 && max == 1)
638     return re->Incref();
639 
640   // General case: x{n,m} means n copies of x and m copies of x?.
641   // The machine will do less work if we nest the final m copies,
642   // so that x{2,5} = xx(x(x(x)?)?)?
643 
644   // Build leading prefix: xx.  Capturing only on the last one.
645   Regexp* nre = NULL;
646   if (min > 0) {
647     PODArray<Regexp*> nre_subs(min);
648     for (int i = 0; i < min; i++)
649       nre_subs[i] = re->Incref();
650     nre = Regexp::Concat(nre_subs.data(), min, f);
651   }
652 
653   // Build and attach suffix: (x(x(x)?)?)?
654   if (max > min) {
655     Regexp* suf = Regexp::Quest(re->Incref(), f);
656     for (int i = min+1; i < max; i++)
657       suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
658     if (nre == NULL)
659       nre = suf;
660     else
661       nre = Concat2(nre, suf, f);
662   }
663 
664   if (nre == NULL) {
665     // Some degenerate case, like min > max, or min < max < 0.
666     // This shouldn't happen, because the parser rejects such regexps.
667     ABSL_LOG(DFATAL) << "Malformed repeat of " << re->ToString()
668                      << " min " << min << " max " << max;
669     return new Regexp(kRegexpNoMatch, f);
670   }
671 
672   return nre;
673 }
674 
675 // Simplifies a character class.
676 // Caller must Decref return value when done with it.
SimplifyCharClass(Regexp * re)677 Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
678   CharClass* cc = re->cc();
679 
680   // Special cases
681   if (cc->empty())
682     return new Regexp(kRegexpNoMatch, re->parse_flags());
683   if (cc->full())
684     return new Regexp(kRegexpAnyChar, re->parse_flags());
685 
686   return re->Incref();
687 }
688 
689 }  // namespace re2
690