• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #ifndef RE2_WALKER_INL_H_
6 #define RE2_WALKER_INL_H_
7 
8 // Helper class for traversing Regexps without recursion.
9 // Clients should declare their own subclasses that override
10 // the PreVisit and PostVisit methods, which are called before
11 // and after visiting the subexpressions.
12 
13 // Not quite the Visitor pattern, because (among other things)
14 // the Visitor pattern is recursive.
15 
16 #include <stack>
17 
18 #include "absl/base/macros.h"
19 #include "absl/log/absl_check.h"
20 #include "absl/log/absl_log.h"
21 #include "re2/regexp.h"
22 
23 namespace re2 {
24 
25 template<typename T> struct WalkState;
26 
27 template<typename T> class Regexp::Walker {
28  public:
29   Walker();
30   virtual ~Walker();
31 
32   // Virtual method called before visiting re's children.
33   // PreVisit passes ownership of its return value to its caller.
34   // The Arg* that PreVisit returns will be passed to PostVisit as pre_arg
35   // and passed to the child PreVisits and PostVisits as parent_arg.
36   // At the top-most Regexp, parent_arg is arg passed to walk.
37   // If PreVisit sets *stop to true, the walk does not recurse
38   // into the children.  Instead it behaves as though the return
39   // value from PreVisit is the return value from PostVisit.
40   // The default PreVisit returns parent_arg.
41   virtual T PreVisit(Regexp* re, T parent_arg, bool* stop);
42 
43   // Virtual method called after visiting re's children.
44   // The pre_arg is the T that PreVisit returned.
45   // The child_args is a vector of the T that the child PostVisits returned.
46   // PostVisit takes ownership of pre_arg.
47   // PostVisit takes ownership of the Ts
48   // in *child_args, but not the vector itself.
49   // PostVisit passes ownership of its return value
50   // to its caller.
51   // The default PostVisit simply returns pre_arg.
52   virtual T PostVisit(Regexp* re, T parent_arg, T pre_arg,
53                       T* child_args, int nchild_args);
54 
55   // Virtual method called to copy a T,
56   // when Walk notices that more than one child is the same re.
57   virtual T Copy(T arg);
58 
59   // Virtual method called to do a "quick visit" of the re,
60   // but not its children.  Only called once the visit budget
61   // has been used up and we're trying to abort the walk
62   // as quickly as possible.  Should return a value that
63   // makes sense for the parent PostVisits still to be run.
64   // This function is (hopefully) only called by
65   // WalkExponential, but must be implemented by all clients,
66   // just in case.
67   virtual T ShortVisit(Regexp* re, T parent_arg) = 0;
68 
69   // Walks over a regular expression.
70   // Top_arg is passed as parent_arg to PreVisit and PostVisit of re.
71   // Returns the T returned by PostVisit on re.
72   T Walk(Regexp* re, T top_arg);
73 
74   // Like Walk, but doesn't use Copy.  This can lead to
75   // exponential runtimes on cross-linked Regexps like the
76   // ones generated by Simplify.  To help limit this,
77   // at most max_visits nodes will be visited and then
78   // the walk will be cut off early.
79   // If the walk *is* cut off early, ShortVisit(re)
80   // will be called on regexps that cannot be fully
81   // visited rather than calling PreVisit/PostVisit.
82   T WalkExponential(Regexp* re, T top_arg, int max_visits);
83 
84   // Clears the stack.  Should never be necessary, since
85   // Walk always enters and exits with an empty stack.
86   // Logs DFATAL if stack is not already clear.
87   void Reset();
88 
89   // Returns whether walk was cut off.
stopped_early()90   bool stopped_early() { return stopped_early_; }
91 
92  private:
93   // Walk state for the entire traversal.
94   std::stack<WalkState<T>> stack_;
95   bool stopped_early_;
96   int max_visits_;
97 
98   T WalkInternal(Regexp* re, T top_arg, bool use_copy);
99 
100   Walker(const Walker&) = delete;
101   Walker& operator=(const Walker&) = delete;
102 };
103 
PreVisit(Regexp * re,T parent_arg,bool * stop)104 template<typename T> T Regexp::Walker<T>::PreVisit(Regexp* re,
105                                                    T parent_arg,
106                                                    bool* stop) {
107   return parent_arg;
108 }
109 
PostVisit(Regexp * re,T parent_arg,T pre_arg,T * child_args,int nchild_args)110 template<typename T> T Regexp::Walker<T>::PostVisit(Regexp* re,
111                                                     T parent_arg,
112                                                     T pre_arg,
113                                                     T* child_args,
114                                                     int nchild_args) {
115   return pre_arg;
116 }
117 
Copy(T arg)118 template<typename T> T Regexp::Walker<T>::Copy(T arg) {
119   return arg;
120 }
121 
122 // State about a single level in the traversal.
123 template<typename T> struct WalkState {
WalkStateWalkState124   WalkState(Regexp* re, T parent)
125     : re(re),
126       n(-1),
127       parent_arg(parent),
128       child_args(NULL) { }
129 
130   Regexp* re;  // The regexp
131   int n;  // The index of the next child to process; -1 means need to PreVisit
132   T parent_arg;  // Accumulated arguments.
133   T pre_arg;
134   T child_arg;  // One-element buffer for child_args.
135   T* child_args;
136 };
137 
Walker()138 template<typename T> Regexp::Walker<T>::Walker() {
139   stopped_early_ = false;
140 }
141 
~Walker()142 template<typename T> Regexp::Walker<T>::~Walker() {
143   Reset();
144 }
145 
146 // Clears the stack.  Should never be necessary, since
147 // Walk always enters and exits with an empty stack.
148 // Logs DFATAL if stack is not already clear.
Reset()149 template<typename T> void Regexp::Walker<T>::Reset() {
150   if (!stack_.empty()) {
151     ABSL_LOG(DFATAL) << "Stack not empty.";
152     while (!stack_.empty()) {
153       if (stack_.top().re->nsub_ > 1)
154         delete[] stack_.top().child_args;
155       stack_.pop();
156     }
157   }
158 }
159 
WalkInternal(Regexp * re,T top_arg,bool use_copy)160 template<typename T> T Regexp::Walker<T>::WalkInternal(Regexp* re, T top_arg,
161                                                        bool use_copy) {
162   Reset();
163 
164   if (re == NULL) {
165     ABSL_LOG(DFATAL) << "Walk NULL";
166     return top_arg;
167   }
168 
169   stack_.push(WalkState<T>(re, top_arg));
170 
171   WalkState<T>* s;
172   for (;;) {
173     T t;
174     s = &stack_.top();
175     re = s->re;
176     switch (s->n) {
177       case -1: {
178         if (--max_visits_ < 0) {
179           stopped_early_ = true;
180           t = ShortVisit(re, s->parent_arg);
181           break;
182         }
183         bool stop = false;
184         s->pre_arg = PreVisit(re, s->parent_arg, &stop);
185         if (stop) {
186           t = s->pre_arg;
187           break;
188         }
189         s->n = 0;
190         s->child_args = NULL;
191         if (re->nsub_ == 1)
192           s->child_args = &s->child_arg;
193         else if (re->nsub_ > 1)
194           s->child_args = new T[re->nsub_];
195         ABSL_FALLTHROUGH_INTENDED;
196       }
197       default: {
198         if (re->nsub_ > 0) {
199           Regexp** sub = re->sub();
200           if (s->n < re->nsub_) {
201             if (use_copy && s->n > 0 && sub[s->n - 1] == sub[s->n]) {
202               s->child_args[s->n] = Copy(s->child_args[s->n - 1]);
203               s->n++;
204             } else {
205               stack_.push(WalkState<T>(sub[s->n], s->pre_arg));
206             }
207             continue;
208           }
209         }
210 
211         t = PostVisit(re, s->parent_arg, s->pre_arg, s->child_args, s->n);
212         if (re->nsub_ > 1)
213           delete[] s->child_args;
214         break;
215       }
216     }
217 
218     // We've finished stack_.top().
219     // Update next guy down.
220     stack_.pop();
221     if (stack_.empty())
222       return t;
223     s = &stack_.top();
224     if (s->child_args != NULL)
225       s->child_args[s->n] = t;
226     else
227       s->child_arg = t;
228     s->n++;
229   }
230 }
231 
Walk(Regexp * re,T top_arg)232 template<typename T> T Regexp::Walker<T>::Walk(Regexp* re, T top_arg) {
233   // Without the exponential walking behavior,
234   // this budget should be more than enough for any
235   // regexp, and yet not enough to get us in trouble
236   // as far as CPU time.
237   max_visits_ = 1000000;
238   return WalkInternal(re, top_arg, true);
239 }
240 
WalkExponential(Regexp * re,T top_arg,int max_visits)241 template<typename T> T Regexp::Walker<T>::WalkExponential(Regexp* re, T top_arg,
242                                                           int max_visits) {
243   max_visits_ = max_visits;
244   return WalkInternal(re, top_arg, false);
245 }
246 
247 }  // namespace re2
248 
249 #endif  // RE2_WALKER_INL_H_
250