1 // Copyright 2006 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Rewrite POSIX and other features in re
6 // to use simple extended regular expression features.
7 // Also sort and simplify character classes.
8
9 #include <stddef.h>
10
11 #include <algorithm>
12 #include <string>
13
14 #include "absl/log/absl_log.h"
15 #include "absl/strings/string_view.h"
16 #include "re2/pod_array.h"
17 #include "re2/regexp.h"
18 #include "re2/walker-inl.h"
19 #include "util/utf.h"
20
21 namespace re2 {
22
23 // Parses the regexp src and then simplifies it and sets *dst to the
24 // string representation of the simplified form. Returns true on success.
25 // Returns false and sets *error (if error != NULL) on error.
SimplifyRegexp(absl::string_view src,ParseFlags flags,std::string * dst,RegexpStatus * status)26 bool Regexp::SimplifyRegexp(absl::string_view src, ParseFlags flags,
27 std::string* dst, RegexpStatus* status) {
28 Regexp* re = Parse(src, flags, status);
29 if (re == NULL)
30 return false;
31 Regexp* sre = re->Simplify();
32 re->Decref();
33 if (sre == NULL) {
34 if (status) {
35 status->set_code(kRegexpInternalError);
36 status->set_error_arg(src);
37 }
38 return false;
39 }
40 *dst = sre->ToString();
41 sre->Decref();
42 return true;
43 }
44
45 // Assuming the simple_ flags on the children are accurate,
46 // is this Regexp* simple?
ComputeSimple()47 bool Regexp::ComputeSimple() {
48 Regexp** subs;
49 switch (op_) {
50 case kRegexpNoMatch:
51 case kRegexpEmptyMatch:
52 case kRegexpLiteral:
53 case kRegexpLiteralString:
54 case kRegexpBeginLine:
55 case kRegexpEndLine:
56 case kRegexpBeginText:
57 case kRegexpWordBoundary:
58 case kRegexpNoWordBoundary:
59 case kRegexpEndText:
60 case kRegexpAnyChar:
61 case kRegexpAnyByte:
62 case kRegexpHaveMatch:
63 return true;
64 case kRegexpConcat:
65 case kRegexpAlternate:
66 // These are simple as long as the subpieces are simple.
67 subs = sub();
68 for (int i = 0; i < nsub_; i++)
69 if (!subs[i]->simple())
70 return false;
71 return true;
72 case kRegexpCharClass:
73 // Simple as long as the char class is not empty, not full.
74 if (ccb_ != NULL)
75 return !ccb_->empty() && !ccb_->full();
76 return !cc_->empty() && !cc_->full();
77 case kRegexpCapture:
78 subs = sub();
79 return subs[0]->simple();
80 case kRegexpStar:
81 case kRegexpPlus:
82 case kRegexpQuest:
83 subs = sub();
84 if (!subs[0]->simple())
85 return false;
86 switch (subs[0]->op_) {
87 case kRegexpStar:
88 case kRegexpPlus:
89 case kRegexpQuest:
90 case kRegexpEmptyMatch:
91 case kRegexpNoMatch:
92 return false;
93 default:
94 break;
95 }
96 return true;
97 case kRegexpRepeat:
98 return false;
99 }
100 ABSL_LOG(DFATAL) << "Case not handled in ComputeSimple: " << op_;
101 return false;
102 }
103
104 // Walker subclass used by Simplify.
105 // Coalesces runs of star/plus/quest/repeat of the same literal along with any
106 // occurrences of that literal into repeats of that literal. It also works for
107 // char classes, any char and any byte.
108 // PostVisit creates the coalesced result, which should then be simplified.
109 class CoalesceWalker : public Regexp::Walker<Regexp*> {
110 public:
CoalesceWalker()111 CoalesceWalker() {}
112 virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
113 Regexp** child_args, int nchild_args);
114 virtual Regexp* Copy(Regexp* re);
115 virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
116
117 private:
118 // These functions are declared inside CoalesceWalker so that
119 // they can edit the private fields of the Regexps they construct.
120
121 // Returns true if r1 and r2 can be coalesced. In particular, ensures that
122 // the parse flags are consistent. (They will not be checked again later.)
123 static bool CanCoalesce(Regexp* r1, Regexp* r2);
124
125 // Coalesces *r1ptr and *r2ptr. In most cases, the array elements afterwards
126 // will be empty match and the coalesced op. In other cases, where part of a
127 // literal string was removed to be coalesced, the array elements afterwards
128 // will be the coalesced op and the remainder of the literal string.
129 static void DoCoalesce(Regexp** r1ptr, Regexp** r2ptr);
130
131 CoalesceWalker(const CoalesceWalker&) = delete;
132 CoalesceWalker& operator=(const CoalesceWalker&) = delete;
133 };
134
135 // Walker subclass used by Simplify.
136 // The simplify walk is purely post-recursive: given the simplified children,
137 // PostVisit creates the simplified result.
138 // The child_args are simplified Regexp*s.
139 class SimplifyWalker : public Regexp::Walker<Regexp*> {
140 public:
SimplifyWalker()141 SimplifyWalker() {}
142 virtual Regexp* PreVisit(Regexp* re, Regexp* parent_arg, bool* stop);
143 virtual Regexp* PostVisit(Regexp* re, Regexp* parent_arg, Regexp* pre_arg,
144 Regexp** child_args, int nchild_args);
145 virtual Regexp* Copy(Regexp* re);
146 virtual Regexp* ShortVisit(Regexp* re, Regexp* parent_arg);
147
148 private:
149 // These functions are declared inside SimplifyWalker so that
150 // they can edit the private fields of the Regexps they construct.
151
152 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
153 // Caller must Decref return value when done with it.
154 static Regexp* Concat2(Regexp* re1, Regexp* re2, Regexp::ParseFlags flags);
155
156 // Simplifies the expression re{min,max} in terms of *, +, and ?.
157 // Returns a new regexp. Does not edit re. Does not consume reference to re.
158 // Caller must Decref return value when done with it.
159 static Regexp* SimplifyRepeat(Regexp* re, int min, int max,
160 Regexp::ParseFlags parse_flags);
161
162 // Simplifies a character class by expanding any named classes
163 // into rune ranges. Does not edit re. Does not consume ref to re.
164 // Caller must Decref return value when done with it.
165 static Regexp* SimplifyCharClass(Regexp* re);
166
167 SimplifyWalker(const SimplifyWalker&) = delete;
168 SimplifyWalker& operator=(const SimplifyWalker&) = delete;
169 };
170
171 // Simplifies a regular expression, returning a new regexp.
172 // The new regexp uses traditional Unix egrep features only,
173 // plus the Perl (?:) non-capturing parentheses.
174 // Otherwise, no POSIX or Perl additions. The new regexp
175 // captures exactly the same subexpressions (with the same indices)
176 // as the original.
177 // Does not edit current object.
178 // Caller must Decref() return value when done with it.
179
Simplify()180 Regexp* Regexp::Simplify() {
181 CoalesceWalker cw;
182 Regexp* cre = cw.Walk(this, NULL);
183 if (cre == NULL)
184 return NULL;
185 if (cw.stopped_early()) {
186 cre->Decref();
187 return NULL;
188 }
189 SimplifyWalker sw;
190 Regexp* sre = sw.Walk(cre, NULL);
191 cre->Decref();
192 if (sre == NULL)
193 return NULL;
194 if (sw.stopped_early()) {
195 sre->Decref();
196 return NULL;
197 }
198 return sre;
199 }
200
201 #define Simplify DontCallSimplify // Avoid accidental recursion
202
203 // Utility function for PostVisit implementations that compares re->sub() with
204 // child_args to determine whether any child_args changed. In the common case,
205 // where nothing changed, calls Decref() for all child_args and returns false,
206 // so PostVisit must return re->Incref(). Otherwise, returns true.
ChildArgsChanged(Regexp * re,Regexp ** child_args)207 static bool ChildArgsChanged(Regexp* re, Regexp** child_args) {
208 for (int i = 0; i < re->nsub(); i++) {
209 Regexp* sub = re->sub()[i];
210 Regexp* newsub = child_args[i];
211 if (newsub != sub)
212 return true;
213 }
214 for (int i = 0; i < re->nsub(); i++) {
215 Regexp* newsub = child_args[i];
216 newsub->Decref();
217 }
218 return false;
219 }
220
Copy(Regexp * re)221 Regexp* CoalesceWalker::Copy(Regexp* re) {
222 return re->Incref();
223 }
224
ShortVisit(Regexp * re,Regexp * parent_arg)225 Regexp* CoalesceWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
226 // Should never be called: we use Walk(), not WalkExponential().
227 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
228 ABSL_LOG(DFATAL) << "CoalesceWalker::ShortVisit called";
229 #endif
230 return re->Incref();
231 }
232
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)233 Regexp* CoalesceWalker::PostVisit(Regexp* re,
234 Regexp* parent_arg,
235 Regexp* pre_arg,
236 Regexp** child_args,
237 int nchild_args) {
238 if (re->nsub() == 0)
239 return re->Incref();
240
241 if (re->op() != kRegexpConcat) {
242 if (!ChildArgsChanged(re, child_args))
243 return re->Incref();
244
245 // Something changed. Build a new op.
246 Regexp* nre = new Regexp(re->op(), re->parse_flags());
247 nre->AllocSub(re->nsub());
248 Regexp** nre_subs = nre->sub();
249 for (int i = 0; i < re->nsub(); i++)
250 nre_subs[i] = child_args[i];
251 // Repeats and Captures have additional data that must be copied.
252 if (re->op() == kRegexpRepeat) {
253 nre->min_ = re->min();
254 nre->max_ = re->max();
255 } else if (re->op() == kRegexpCapture) {
256 nre->cap_ = re->cap();
257 }
258 return nre;
259 }
260
261 bool can_coalesce = false;
262 for (int i = 0; i < re->nsub(); i++) {
263 if (i+1 < re->nsub() &&
264 CanCoalesce(child_args[i], child_args[i+1])) {
265 can_coalesce = true;
266 break;
267 }
268 }
269 if (!can_coalesce) {
270 if (!ChildArgsChanged(re, child_args))
271 return re->Incref();
272
273 // Something changed. Build a new op.
274 Regexp* nre = new Regexp(re->op(), re->parse_flags());
275 nre->AllocSub(re->nsub());
276 Regexp** nre_subs = nre->sub();
277 for (int i = 0; i < re->nsub(); i++)
278 nre_subs[i] = child_args[i];
279 return nre;
280 }
281
282 for (int i = 0; i < re->nsub(); i++) {
283 if (i+1 < re->nsub() &&
284 CanCoalesce(child_args[i], child_args[i+1]))
285 DoCoalesce(&child_args[i], &child_args[i+1]);
286 }
287 // Determine how many empty matches were left by DoCoalesce.
288 int n = 0;
289 for (int i = n; i < re->nsub(); i++) {
290 if (child_args[i]->op() == kRegexpEmptyMatch)
291 n++;
292 }
293 // Build a new op.
294 Regexp* nre = new Regexp(re->op(), re->parse_flags());
295 nre->AllocSub(re->nsub() - n);
296 Regexp** nre_subs = nre->sub();
297 for (int i = 0, j = 0; i < re->nsub(); i++) {
298 if (child_args[i]->op() == kRegexpEmptyMatch) {
299 child_args[i]->Decref();
300 continue;
301 }
302 nre_subs[j] = child_args[i];
303 j++;
304 }
305 return nre;
306 }
307
CanCoalesce(Regexp * r1,Regexp * r2)308 bool CoalesceWalker::CanCoalesce(Regexp* r1, Regexp* r2) {
309 // r1 must be a star/plus/quest/repeat of a literal, char class, any char or
310 // any byte.
311 if ((r1->op() == kRegexpStar ||
312 r1->op() == kRegexpPlus ||
313 r1->op() == kRegexpQuest ||
314 r1->op() == kRegexpRepeat) &&
315 (r1->sub()[0]->op() == kRegexpLiteral ||
316 r1->sub()[0]->op() == kRegexpCharClass ||
317 r1->sub()[0]->op() == kRegexpAnyChar ||
318 r1->sub()[0]->op() == kRegexpAnyByte)) {
319 // r2 must be a star/plus/quest/repeat of the same literal, char class,
320 // any char or any byte.
321 if ((r2->op() == kRegexpStar ||
322 r2->op() == kRegexpPlus ||
323 r2->op() == kRegexpQuest ||
324 r2->op() == kRegexpRepeat) &&
325 Regexp::Equal(r1->sub()[0], r2->sub()[0]) &&
326 // The parse flags must be consistent.
327 ((r1->parse_flags() & Regexp::NonGreedy) ==
328 (r2->parse_flags() & Regexp::NonGreedy))) {
329 return true;
330 }
331 // ... OR an occurrence of that literal, char class, any char or any byte
332 if (Regexp::Equal(r1->sub()[0], r2)) {
333 return true;
334 }
335 // ... OR a literal string that begins with that literal.
336 if (r1->sub()[0]->op() == kRegexpLiteral &&
337 r2->op() == kRegexpLiteralString &&
338 r2->runes()[0] == r1->sub()[0]->rune() &&
339 // The parse flags must be consistent.
340 ((r1->sub()[0]->parse_flags() & Regexp::FoldCase) ==
341 (r2->parse_flags() & Regexp::FoldCase))) {
342 return true;
343 }
344 }
345 return false;
346 }
347
DoCoalesce(Regexp ** r1ptr,Regexp ** r2ptr)348 void CoalesceWalker::DoCoalesce(Regexp** r1ptr, Regexp** r2ptr) {
349 Regexp* r1 = *r1ptr;
350 Regexp* r2 = *r2ptr;
351
352 Regexp* nre = Regexp::Repeat(
353 r1->sub()[0]->Incref(), r1->parse_flags(), 0, 0);
354
355 switch (r1->op()) {
356 case kRegexpStar:
357 nre->min_ = 0;
358 nre->max_ = -1;
359 break;
360
361 case kRegexpPlus:
362 nre->min_ = 1;
363 nre->max_ = -1;
364 break;
365
366 case kRegexpQuest:
367 nre->min_ = 0;
368 nre->max_ = 1;
369 break;
370
371 case kRegexpRepeat:
372 nre->min_ = r1->min();
373 nre->max_ = r1->max();
374 break;
375
376 default:
377 nre->Decref();
378 ABSL_LOG(DFATAL) << "DoCoalesce failed: r1->op() is " << r1->op();
379 return;
380 }
381
382 switch (r2->op()) {
383 case kRegexpStar:
384 nre->max_ = -1;
385 goto LeaveEmpty;
386
387 case kRegexpPlus:
388 nre->min_++;
389 nre->max_ = -1;
390 goto LeaveEmpty;
391
392 case kRegexpQuest:
393 if (nre->max() != -1)
394 nre->max_++;
395 goto LeaveEmpty;
396
397 case kRegexpRepeat:
398 nre->min_ += r2->min();
399 if (r2->max() == -1)
400 nre->max_ = -1;
401 else if (nre->max() != -1)
402 nre->max_ += r2->max();
403 goto LeaveEmpty;
404
405 case kRegexpLiteral:
406 case kRegexpCharClass:
407 case kRegexpAnyChar:
408 case kRegexpAnyByte:
409 nre->min_++;
410 if (nre->max() != -1)
411 nre->max_++;
412 goto LeaveEmpty;
413
414 LeaveEmpty:
415 *r1ptr = new Regexp(kRegexpEmptyMatch, Regexp::NoParseFlags);
416 *r2ptr = nre;
417 break;
418
419 case kRegexpLiteralString: {
420 Rune r = r1->sub()[0]->rune();
421 // Determine how much of the literal string is removed.
422 // We know that we have at least one rune. :)
423 int n = 1;
424 while (n < r2->nrunes() && r2->runes()[n] == r)
425 n++;
426 nre->min_ += n;
427 if (nre->max() != -1)
428 nre->max_ += n;
429 if (n == r2->nrunes())
430 goto LeaveEmpty;
431 *r1ptr = nre;
432 *r2ptr = Regexp::LiteralString(
433 &r2->runes()[n], r2->nrunes() - n, r2->parse_flags());
434 break;
435 }
436
437 default:
438 nre->Decref();
439 ABSL_LOG(DFATAL) << "DoCoalesce failed: r2->op() is " << r2->op();
440 return;
441 }
442
443 r1->Decref();
444 r2->Decref();
445 }
446
Copy(Regexp * re)447 Regexp* SimplifyWalker::Copy(Regexp* re) {
448 return re->Incref();
449 }
450
ShortVisit(Regexp * re,Regexp * parent_arg)451 Regexp* SimplifyWalker::ShortVisit(Regexp* re, Regexp* parent_arg) {
452 // Should never be called: we use Walk(), not WalkExponential().
453 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
454 ABSL_LOG(DFATAL) << "SimplifyWalker::ShortVisit called";
455 #endif
456 return re->Incref();
457 }
458
PreVisit(Regexp * re,Regexp * parent_arg,bool * stop)459 Regexp* SimplifyWalker::PreVisit(Regexp* re, Regexp* parent_arg, bool* stop) {
460 if (re->simple()) {
461 *stop = true;
462 return re->Incref();
463 }
464 return NULL;
465 }
466
PostVisit(Regexp * re,Regexp * parent_arg,Regexp * pre_arg,Regexp ** child_args,int nchild_args)467 Regexp* SimplifyWalker::PostVisit(Regexp* re,
468 Regexp* parent_arg,
469 Regexp* pre_arg,
470 Regexp** child_args,
471 int nchild_args) {
472 switch (re->op()) {
473 case kRegexpNoMatch:
474 case kRegexpEmptyMatch:
475 case kRegexpLiteral:
476 case kRegexpLiteralString:
477 case kRegexpBeginLine:
478 case kRegexpEndLine:
479 case kRegexpBeginText:
480 case kRegexpWordBoundary:
481 case kRegexpNoWordBoundary:
482 case kRegexpEndText:
483 case kRegexpAnyChar:
484 case kRegexpAnyByte:
485 case kRegexpHaveMatch:
486 // All these are always simple.
487 re->simple_ = true;
488 return re->Incref();
489
490 case kRegexpConcat:
491 case kRegexpAlternate: {
492 // These are simple as long as the subpieces are simple.
493 if (!ChildArgsChanged(re, child_args)) {
494 re->simple_ = true;
495 return re->Incref();
496 }
497 Regexp* nre = new Regexp(re->op(), re->parse_flags());
498 nre->AllocSub(re->nsub());
499 Regexp** nre_subs = nre->sub();
500 for (int i = 0; i < re->nsub(); i++)
501 nre_subs[i] = child_args[i];
502 nre->simple_ = true;
503 return nre;
504 }
505
506 case kRegexpCapture: {
507 Regexp* newsub = child_args[0];
508 if (newsub == re->sub()[0]) {
509 newsub->Decref();
510 re->simple_ = true;
511 return re->Incref();
512 }
513 Regexp* nre = new Regexp(kRegexpCapture, re->parse_flags());
514 nre->AllocSub(1);
515 nre->sub()[0] = newsub;
516 nre->cap_ = re->cap();
517 nre->simple_ = true;
518 return nre;
519 }
520
521 case kRegexpStar:
522 case kRegexpPlus:
523 case kRegexpQuest: {
524 Regexp* newsub = child_args[0];
525 // Special case: repeat the empty string as much as
526 // you want, but it's still the empty string.
527 if (newsub->op() == kRegexpEmptyMatch)
528 return newsub;
529
530 // These are simple as long as the subpiece is simple.
531 if (newsub == re->sub()[0]) {
532 newsub->Decref();
533 re->simple_ = true;
534 return re->Incref();
535 }
536
537 // These are also idempotent if flags are constant.
538 if (re->op() == newsub->op() &&
539 re->parse_flags() == newsub->parse_flags())
540 return newsub;
541
542 Regexp* nre = new Regexp(re->op(), re->parse_flags());
543 nre->AllocSub(1);
544 nre->sub()[0] = newsub;
545 nre->simple_ = true;
546 return nre;
547 }
548
549 case kRegexpRepeat: {
550 Regexp* newsub = child_args[0];
551 // Special case: repeat the empty string as much as
552 // you want, but it's still the empty string.
553 if (newsub->op() == kRegexpEmptyMatch)
554 return newsub;
555
556 Regexp* nre = SimplifyRepeat(newsub, re->min_, re->max_,
557 re->parse_flags());
558 newsub->Decref();
559 nre->simple_ = true;
560 return nre;
561 }
562
563 case kRegexpCharClass: {
564 Regexp* nre = SimplifyCharClass(re);
565 nre->simple_ = true;
566 return nre;
567 }
568 }
569
570 ABSL_LOG(ERROR) << "Simplify case not handled: " << re->op();
571 return re->Incref();
572 }
573
574 // Creates a concatenation of two Regexp, consuming refs to re1 and re2.
575 // Returns a new Regexp, handing the ref to the caller.
Concat2(Regexp * re1,Regexp * re2,Regexp::ParseFlags parse_flags)576 Regexp* SimplifyWalker::Concat2(Regexp* re1, Regexp* re2,
577 Regexp::ParseFlags parse_flags) {
578 Regexp* re = new Regexp(kRegexpConcat, parse_flags);
579 re->AllocSub(2);
580 Regexp** subs = re->sub();
581 subs[0] = re1;
582 subs[1] = re2;
583 return re;
584 }
585
586 // Returns true if re is an empty-width op.
IsEmptyOp(Regexp * re)587 static bool IsEmptyOp(Regexp* re) {
588 return (re->op() == kRegexpBeginLine ||
589 re->op() == kRegexpEndLine ||
590 re->op() == kRegexpWordBoundary ||
591 re->op() == kRegexpNoWordBoundary ||
592 re->op() == kRegexpBeginText ||
593 re->op() == kRegexpEndText);
594 }
595
596 // Simplifies the expression re{min,max} in terms of *, +, and ?.
597 // Returns a new regexp. Does not edit re. Does not consume reference to re.
598 // Caller must Decref return value when done with it.
599 // The result will *not* necessarily have the right capturing parens
600 // if you call ToString() and re-parse it: (x){2} becomes (x)(x),
601 // but in the Regexp* representation, both (x) are marked as $1.
SimplifyRepeat(Regexp * re,int min,int max,Regexp::ParseFlags f)602 Regexp* SimplifyWalker::SimplifyRepeat(Regexp* re, int min, int max,
603 Regexp::ParseFlags f) {
604 // For an empty-width op OR a concatenation or alternation of empty-width
605 // ops, cap the repetition count at 1.
606 if (IsEmptyOp(re) ||
607 ((re->op() == kRegexpConcat ||
608 re->op() == kRegexpAlternate) &&
609 std::all_of(re->sub(), re->sub() + re->nsub(), IsEmptyOp))) {
610 min = std::min(min, 1);
611 max = std::min(max, 1);
612 }
613
614 // x{n,} means at least n matches of x.
615 if (max == -1) {
616 // Special case: x{0,} is x*
617 if (min == 0)
618 return Regexp::Star(re->Incref(), f);
619
620 // Special case: x{1,} is x+
621 if (min == 1)
622 return Regexp::Plus(re->Incref(), f);
623
624 // General case: x{4,} is xxxx+
625 PODArray<Regexp*> nre_subs(min);
626 for (int i = 0; i < min-1; i++)
627 nre_subs[i] = re->Incref();
628 nre_subs[min-1] = Regexp::Plus(re->Incref(), f);
629 return Regexp::Concat(nre_subs.data(), min, f);
630 }
631
632 // Special case: (x){0} matches only empty string.
633 if (min == 0 && max == 0)
634 return new Regexp(kRegexpEmptyMatch, f);
635
636 // Special case: x{1} is just x.
637 if (min == 1 && max == 1)
638 return re->Incref();
639
640 // General case: x{n,m} means n copies of x and m copies of x?.
641 // The machine will do less work if we nest the final m copies,
642 // so that x{2,5} = xx(x(x(x)?)?)?
643
644 // Build leading prefix: xx. Capturing only on the last one.
645 Regexp* nre = NULL;
646 if (min > 0) {
647 PODArray<Regexp*> nre_subs(min);
648 for (int i = 0; i < min; i++)
649 nre_subs[i] = re->Incref();
650 nre = Regexp::Concat(nre_subs.data(), min, f);
651 }
652
653 // Build and attach suffix: (x(x(x)?)?)?
654 if (max > min) {
655 Regexp* suf = Regexp::Quest(re->Incref(), f);
656 for (int i = min+1; i < max; i++)
657 suf = Regexp::Quest(Concat2(re->Incref(), suf, f), f);
658 if (nre == NULL)
659 nre = suf;
660 else
661 nre = Concat2(nre, suf, f);
662 }
663
664 if (nre == NULL) {
665 // Some degenerate case, like min > max, or min < max < 0.
666 // This shouldn't happen, because the parser rejects such regexps.
667 ABSL_LOG(DFATAL) << "Malformed repeat of " << re->ToString()
668 << " min " << min << " max " << max;
669 return new Regexp(kRegexpNoMatch, f);
670 }
671
672 return nre;
673 }
674
675 // Simplifies a character class.
676 // Caller must Decref return value when done with it.
SimplifyCharClass(Regexp * re)677 Regexp* SimplifyWalker::SimplifyCharClass(Regexp* re) {
678 CharClass* cc = re->cc();
679
680 // Special cases
681 if (cc->empty())
682 return new Regexp(kRegexpNoMatch, re->parse_flags());
683 if (cc->full())
684 return new Regexp(kRegexpAnyChar, re->parse_flags());
685
686 return re->Incref();
687 }
688
689 } // namespace re2
690