• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2003-2009 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Regular expression interface RE2.
6 //
7 // Originally the PCRE C++ wrapper, but adapted to use
8 // the new automata-based regular expression engines.
9 
10 #include "re2/re2.h"
11 
12 #include <errno.h>
13 #include <stddef.h>
14 #include <stdint.h>
15 #include <stdlib.h>
16 #include <string.h>
17 
18 #include <algorithm>
19 #include <atomic>
20 #include <map>
21 #include <string>
22 #include <utility>
23 #include <vector>
24 
25 #include "absl/base/call_once.h"
26 #include "absl/base/macros.h"
27 #include "absl/container/fixed_array.h"
28 #include "absl/log/absl_check.h"
29 #include "absl/log/absl_log.h"
30 #include "absl/strings/ascii.h"
31 #include "absl/strings/str_format.h"
32 #include "absl/strings/string_view.h"
33 #include "re2/prog.h"
34 #include "re2/regexp.h"
35 #include "re2/sparse_array.h"
36 #include "util/strutil.h"
37 #include "util/utf.h"
38 
39 #ifdef _MSC_VER
40 #include <intrin.h>
41 #endif
42 
43 namespace re2 {
44 
45 // Controls the maximum count permitted by GlobalReplace(); -1 is unlimited.
46 static int maximum_global_replace_count = -1;
47 
FUZZING_ONLY_set_maximum_global_replace_count(int i)48 void RE2::FUZZING_ONLY_set_maximum_global_replace_count(int i) {
49   maximum_global_replace_count = i;
50 }
51 
52 // Maximum number of args we can set
53 static const int kMaxArgs = 16;
54 static const int kVecSize = 1+kMaxArgs;
55 
56 const int RE2::Options::kDefaultMaxMem;  // initialized in re2.h
57 
Options(RE2::CannedOptions opt)58 RE2::Options::Options(RE2::CannedOptions opt)
59   : max_mem_(kDefaultMaxMem),
60     encoding_(opt == RE2::Latin1 ? EncodingLatin1 : EncodingUTF8),
61     posix_syntax_(opt == RE2::POSIX),
62     longest_match_(opt == RE2::POSIX),
63     log_errors_(opt != RE2::Quiet),
64     literal_(false),
65     never_nl_(false),
66     dot_nl_(false),
67     never_capture_(false),
68     case_sensitive_(true),
69     perl_classes_(false),
70     word_boundary_(false),
71     one_line_(false) {
72 }
73 
74 // Empty objects for use as const references.
75 // Statically allocating the storage and then
76 // lazily constructing the objects (in a once
77 // in RE2::Init()) avoids global constructors
78 // and the false positives (thanks, Valgrind)
79 // about memory leaks at program termination.
80 struct EmptyStorage {
81   std::string empty_string;
82   std::map<std::string, int> empty_named_groups;
83   std::map<int, std::string> empty_group_names;
84 };
85 alignas(EmptyStorage) static char empty_storage[sizeof(EmptyStorage)];
86 
empty_string()87 static inline std::string* empty_string() {
88   return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_string;
89 }
90 
empty_named_groups()91 static inline std::map<std::string, int>* empty_named_groups() {
92   return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_named_groups;
93 }
94 
empty_group_names()95 static inline std::map<int, std::string>* empty_group_names() {
96   return &reinterpret_cast<EmptyStorage*>(empty_storage)->empty_group_names;
97 }
98 
99 // Converts from Regexp error code to RE2 error code.
100 // Maybe some day they will diverge.  In any event, this
101 // hides the existence of Regexp from RE2 users.
RegexpErrorToRE2(re2::RegexpStatusCode code)102 static RE2::ErrorCode RegexpErrorToRE2(re2::RegexpStatusCode code) {
103   switch (code) {
104     case re2::kRegexpSuccess:
105       return RE2::NoError;
106     case re2::kRegexpInternalError:
107       return RE2::ErrorInternal;
108     case re2::kRegexpBadEscape:
109       return RE2::ErrorBadEscape;
110     case re2::kRegexpBadCharClass:
111       return RE2::ErrorBadCharClass;
112     case re2::kRegexpBadCharRange:
113       return RE2::ErrorBadCharRange;
114     case re2::kRegexpMissingBracket:
115       return RE2::ErrorMissingBracket;
116     case re2::kRegexpMissingParen:
117       return RE2::ErrorMissingParen;
118     case re2::kRegexpUnexpectedParen:
119       return RE2::ErrorUnexpectedParen;
120     case re2::kRegexpTrailingBackslash:
121       return RE2::ErrorTrailingBackslash;
122     case re2::kRegexpRepeatArgument:
123       return RE2::ErrorRepeatArgument;
124     case re2::kRegexpRepeatSize:
125       return RE2::ErrorRepeatSize;
126     case re2::kRegexpRepeatOp:
127       return RE2::ErrorRepeatOp;
128     case re2::kRegexpBadPerlOp:
129       return RE2::ErrorBadPerlOp;
130     case re2::kRegexpBadUTF8:
131       return RE2::ErrorBadUTF8;
132     case re2::kRegexpBadNamedCapture:
133       return RE2::ErrorBadNamedCapture;
134   }
135   return RE2::ErrorInternal;
136 }
137 
trunc(absl::string_view pattern)138 static std::string trunc(absl::string_view pattern) {
139   if (pattern.size() < 100)
140     return std::string(pattern);
141   return std::string(pattern.substr(0, 100)) + "...";
142 }
143 
144 
RE2(const char * pattern)145 RE2::RE2(const char* pattern) {
146   Init(pattern, DefaultOptions);
147 }
148 
RE2(const std::string & pattern)149 RE2::RE2(const std::string& pattern) {
150   Init(pattern, DefaultOptions);
151 }
152 
RE2(absl::string_view pattern)153 RE2::RE2(absl::string_view pattern) {
154   Init(pattern, DefaultOptions);
155 }
156 
RE2(absl::string_view pattern,const Options & options)157 RE2::RE2(absl::string_view pattern, const Options& options) {
158   Init(pattern, options);
159 }
160 
ParseFlags() const161 int RE2::Options::ParseFlags() const {
162   int flags = Regexp::ClassNL;
163   switch (encoding()) {
164     default:
165       if (log_errors())
166         ABSL_LOG(ERROR) << "Unknown encoding " << encoding();
167       break;
168     case RE2::Options::EncodingUTF8:
169       break;
170     case RE2::Options::EncodingLatin1:
171       flags |= Regexp::Latin1;
172       break;
173   }
174 
175   if (!posix_syntax())
176     flags |= Regexp::LikePerl;
177 
178   if (literal())
179     flags |= Regexp::Literal;
180 
181   if (never_nl())
182     flags |= Regexp::NeverNL;
183 
184   if (dot_nl())
185     flags |= Regexp::DotNL;
186 
187   if (never_capture())
188     flags |= Regexp::NeverCapture;
189 
190   if (!case_sensitive())
191     flags |= Regexp::FoldCase;
192 
193   if (perl_classes())
194     flags |= Regexp::PerlClasses;
195 
196   if (word_boundary())
197     flags |= Regexp::PerlB;
198 
199   if (one_line())
200     flags |= Regexp::OneLine;
201 
202   return flags;
203 }
204 
Init(absl::string_view pattern,const Options & options)205 void RE2::Init(absl::string_view pattern, const Options& options) {
206   static absl::once_flag empty_once;
207   absl::call_once(empty_once, []() {
208     (void) new (empty_storage) EmptyStorage;
209   });
210 
211   pattern_ = new std::string(pattern);
212   options_.Copy(options);
213   entire_regexp_ = NULL;
214   suffix_regexp_ = NULL;
215   error_ = empty_string();
216   error_arg_ = empty_string();
217 
218   num_captures_ = -1;
219   error_code_ = NoError;
220   longest_match_ = options_.longest_match();
221   is_one_pass_ = false;
222   prefix_foldcase_ = false;
223   prefix_.clear();
224   prog_ = NULL;
225 
226   rprog_ = NULL;
227   named_groups_ = NULL;
228   group_names_ = NULL;
229 
230   RegexpStatus status;
231   entire_regexp_ = Regexp::Parse(
232     *pattern_,
233     static_cast<Regexp::ParseFlags>(options_.ParseFlags()),
234     &status);
235   if (entire_regexp_ == NULL) {
236     if (options_.log_errors()) {
237       ABSL_LOG(ERROR) << "Error parsing '" << trunc(*pattern_) << "': "
238                       << status.Text();
239     }
240     error_ = new std::string(status.Text());
241     error_code_ = RegexpErrorToRE2(status.code());
242     error_arg_ = new std::string(status.error_arg());
243     return;
244   }
245 
246   bool foldcase;
247   re2::Regexp* suffix;
248   if (entire_regexp_->RequiredPrefix(&prefix_, &foldcase, &suffix)) {
249     prefix_foldcase_ = foldcase;
250     suffix_regexp_ = suffix;
251   }
252   else {
253     suffix_regexp_ = entire_regexp_->Incref();
254   }
255 
256   // Two thirds of the memory goes to the forward Prog,
257   // one third to the reverse prog, because the forward
258   // Prog has two DFAs but the reverse prog has one.
259   prog_ = suffix_regexp_->CompileToProg(options_.max_mem()*2/3);
260   if (prog_ == NULL) {
261     if (options_.log_errors())
262       ABSL_LOG(ERROR) << "Error compiling '" << trunc(*pattern_) << "'";
263     error_ = new std::string("pattern too large - compile failed");
264     error_code_ = RE2::ErrorPatternTooLarge;
265     return;
266   }
267 
268   // We used to compute this lazily, but it's used during the
269   // typical control flow for a match call, so we now compute
270   // it eagerly, which avoids the overhead of absl::once_flag.
271   num_captures_ = suffix_regexp_->NumCaptures();
272 
273   // Could delay this until the first match call that
274   // cares about submatch information, but the one-pass
275   // machine's memory gets cut from the DFA memory budget,
276   // and that is harder to do if the DFA has already
277   // been built.
278   is_one_pass_ = prog_->IsOnePass();
279 }
280 
281 // Returns rprog_, computing it if needed.
ReverseProg() const282 re2::Prog* RE2::ReverseProg() const {
283   absl::call_once(rprog_once_, [](const RE2* re) {
284     re->rprog_ =
285         re->suffix_regexp_->CompileToReverseProg(re->options_.max_mem() / 3);
286     if (re->rprog_ == NULL) {
287       if (re->options_.log_errors())
288         ABSL_LOG(ERROR) << "Error reverse compiling '" << trunc(*re->pattern_)
289                         << "'";
290       // We no longer touch error_ and error_code_ because failing to compile
291       // the reverse Prog is not a showstopper: falling back to NFA execution
292       // is fine. More importantly, an RE2 object is supposed to be logically
293       // immutable: whatever ok() would have returned after Init() completed,
294       // it should continue to return that no matter what ReverseProg() does.
295     }
296   }, this);
297   return rprog_;
298 }
299 
~RE2()300 RE2::~RE2() {
301   if (group_names_ != empty_group_names())
302     delete group_names_;
303   if (named_groups_ != empty_named_groups())
304     delete named_groups_;
305   delete rprog_;
306   delete prog_;
307   if (error_arg_ != empty_string())
308     delete error_arg_;
309   if (error_ != empty_string())
310     delete error_;
311   if (suffix_regexp_)
312     suffix_regexp_->Decref();
313   if (entire_regexp_)
314     entire_regexp_->Decref();
315   delete pattern_;
316 }
317 
ProgramSize() const318 int RE2::ProgramSize() const {
319   if (prog_ == NULL)
320     return -1;
321   return prog_->size();
322 }
323 
ReverseProgramSize() const324 int RE2::ReverseProgramSize() const {
325   if (prog_ == NULL)
326     return -1;
327   Prog* prog = ReverseProg();
328   if (prog == NULL)
329     return -1;
330   return prog->size();
331 }
332 
333 // Finds the most significant non-zero bit in n.
FindMSBSet(uint32_t n)334 static int FindMSBSet(uint32_t n) {
335   ABSL_DCHECK_NE(n, uint32_t{0});
336 #if defined(__GNUC__)
337   return 31 ^ __builtin_clz(n);
338 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
339   unsigned long c;
340   _BitScanReverse(&c, n);
341   return static_cast<int>(c);
342 #else
343   int c = 0;
344   for (int shift = 1 << 4; shift != 0; shift >>= 1) {
345     uint32_t word = n >> shift;
346     if (word != 0) {
347       n = word;
348       c += shift;
349     }
350   }
351   return c;
352 #endif
353 }
354 
Fanout(Prog * prog,std::vector<int> * histogram)355 static int Fanout(Prog* prog, std::vector<int>* histogram) {
356   SparseArray<int> fanout(prog->size());
357   prog->Fanout(&fanout);
358   int data[32] = {};
359   int size = 0;
360   for (SparseArray<int>::iterator i = fanout.begin(); i != fanout.end(); ++i) {
361     if (i->value() == 0)
362       continue;
363     uint32_t value = i->value();
364     int bucket = FindMSBSet(value);
365     bucket += value & (value-1) ? 1 : 0;
366     ++data[bucket];
367     size = std::max(size, bucket+1);
368   }
369   if (histogram != NULL)
370     histogram->assign(data, data+size);
371   return size-1;
372 }
373 
ProgramFanout(std::vector<int> * histogram) const374 int RE2::ProgramFanout(std::vector<int>* histogram) const {
375   if (prog_ == NULL)
376     return -1;
377   return Fanout(prog_, histogram);
378 }
379 
ReverseProgramFanout(std::vector<int> * histogram) const380 int RE2::ReverseProgramFanout(std::vector<int>* histogram) const {
381   if (prog_ == NULL)
382     return -1;
383   Prog* prog = ReverseProg();
384   if (prog == NULL)
385     return -1;
386   return Fanout(prog, histogram);
387 }
388 
389 // Returns named_groups_, computing it if needed.
NamedCapturingGroups() const390 const std::map<std::string, int>& RE2::NamedCapturingGroups() const {
391   absl::call_once(named_groups_once_, [](const RE2* re) {
392     if (re->suffix_regexp_ != NULL)
393       re->named_groups_ = re->suffix_regexp_->NamedCaptures();
394     if (re->named_groups_ == NULL)
395       re->named_groups_ = empty_named_groups();
396   }, this);
397   return *named_groups_;
398 }
399 
400 // Returns group_names_, computing it if needed.
CapturingGroupNames() const401 const std::map<int, std::string>& RE2::CapturingGroupNames() const {
402   absl::call_once(group_names_once_, [](const RE2* re) {
403     if (re->suffix_regexp_ != NULL)
404       re->group_names_ = re->suffix_regexp_->CaptureNames();
405     if (re->group_names_ == NULL)
406       re->group_names_ = empty_group_names();
407   }, this);
408   return *group_names_;
409 }
410 
411 /***** Convenience interfaces *****/
412 
FullMatchN(absl::string_view text,const RE2 & re,const Arg * const args[],int n)413 bool RE2::FullMatchN(absl::string_view text, const RE2& re,
414                      const Arg* const args[], int n) {
415   return re.DoMatch(text, ANCHOR_BOTH, NULL, args, n);
416 }
417 
PartialMatchN(absl::string_view text,const RE2 & re,const Arg * const args[],int n)418 bool RE2::PartialMatchN(absl::string_view text, const RE2& re,
419                         const Arg* const args[], int n) {
420   return re.DoMatch(text, UNANCHORED, NULL, args, n);
421 }
422 
ConsumeN(absl::string_view * input,const RE2 & re,const Arg * const args[],int n)423 bool RE2::ConsumeN(absl::string_view* input, const RE2& re,
424                    const Arg* const args[], int n) {
425   size_t consumed;
426   if (re.DoMatch(*input, ANCHOR_START, &consumed, args, n)) {
427     input->remove_prefix(consumed);
428     return true;
429   } else {
430     return false;
431   }
432 }
433 
FindAndConsumeN(absl::string_view * input,const RE2 & re,const Arg * const args[],int n)434 bool RE2::FindAndConsumeN(absl::string_view* input, const RE2& re,
435                           const Arg* const args[], int n) {
436   size_t consumed;
437   if (re.DoMatch(*input, UNANCHORED, &consumed, args, n)) {
438     input->remove_prefix(consumed);
439     return true;
440   } else {
441     return false;
442   }
443 }
444 
Replace(std::string * str,const RE2 & re,absl::string_view rewrite)445 bool RE2::Replace(std::string* str,
446                   const RE2& re,
447                   absl::string_view rewrite) {
448   absl::string_view vec[kVecSize];
449   int nvec = 1 + MaxSubmatch(rewrite);
450   if (nvec > 1 + re.NumberOfCapturingGroups())
451     return false;
452   if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
453     return false;
454   if (!re.Match(*str, 0, str->size(), UNANCHORED, vec, nvec))
455     return false;
456 
457   std::string s;
458   if (!re.Rewrite(&s, rewrite, vec, nvec))
459     return false;
460 
461   ABSL_DCHECK_GE(vec[0].data(), str->data());
462   ABSL_DCHECK_LE(vec[0].data() + vec[0].size(), str->data() + str->size());
463   str->replace(vec[0].data() - str->data(), vec[0].size(), s);
464   return true;
465 }
466 
GlobalReplace(std::string * str,const RE2 & re,absl::string_view rewrite)467 int RE2::GlobalReplace(std::string* str,
468                        const RE2& re,
469                        absl::string_view rewrite) {
470   absl::string_view vec[kVecSize];
471   int nvec = 1 + MaxSubmatch(rewrite);
472   if (nvec > 1 + re.NumberOfCapturingGroups())
473     return false;
474   if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
475     return false;
476 
477   const char* p = str->data();
478   const char* ep = p + str->size();
479   const char* lastend = NULL;
480   std::string out;
481   int count = 0;
482   while (p <= ep) {
483     if (maximum_global_replace_count != -1 &&
484         count >= maximum_global_replace_count)
485       break;
486     if (!re.Match(*str, static_cast<size_t>(p - str->data()),
487                   str->size(), UNANCHORED, vec, nvec))
488       break;
489     if (p < vec[0].data())
490       out.append(p, vec[0].data() - p);
491     if (vec[0].data() == lastend && vec[0].empty()) {
492       // Disallow empty match at end of last match: skip ahead.
493       //
494       // fullrune() takes int, not ptrdiff_t. However, it just looks
495       // at the leading byte and treats any length >= 4 the same.
496       if (re.options().encoding() == RE2::Options::EncodingUTF8 &&
497           fullrune(p, static_cast<int>(std::min(ptrdiff_t{4}, ep - p)))) {
498         // re is in UTF-8 mode and there is enough left of str
499         // to allow us to advance by up to UTFmax bytes.
500         Rune r;
501         int n = chartorune(&r, p);
502         // Some copies of chartorune have a bug that accepts
503         // encodings of values in (10FFFF, 1FFFFF] as valid.
504         if (r > Runemax) {
505           n = 1;
506           r = Runeerror;
507         }
508         if (!(n == 1 && r == Runeerror)) {  // no decoding error
509           out.append(p, n);
510           p += n;
511           continue;
512         }
513       }
514       // Most likely, re is in Latin-1 mode. If it is in UTF-8 mode,
515       // we fell through from above and the GIGO principle applies.
516       if (p < ep)
517         out.append(p, 1);
518       p++;
519       continue;
520     }
521     re.Rewrite(&out, rewrite, vec, nvec);
522     p = vec[0].data() + vec[0].size();
523     lastend = p;
524     count++;
525   }
526 
527   if (count == 0)
528     return 0;
529 
530   if (p < ep)
531     out.append(p, ep - p);
532   using std::swap;
533   swap(out, *str);
534   return count;
535 }
536 
Extract(absl::string_view text,const RE2 & re,absl::string_view rewrite,std::string * out)537 bool RE2::Extract(absl::string_view text,
538                   const RE2& re,
539                   absl::string_view rewrite,
540                   std::string* out) {
541   absl::string_view vec[kVecSize];
542   int nvec = 1 + MaxSubmatch(rewrite);
543   if (nvec > 1 + re.NumberOfCapturingGroups())
544     return false;
545   if (nvec > static_cast<int>(ABSL_ARRAYSIZE(vec)))
546     return false;
547   if (!re.Match(text, 0, text.size(), UNANCHORED, vec, nvec))
548     return false;
549 
550   out->clear();
551   return re.Rewrite(out, rewrite, vec, nvec);
552 }
553 
QuoteMeta(absl::string_view unquoted)554 std::string RE2::QuoteMeta(absl::string_view unquoted) {
555   std::string result;
556   result.reserve(unquoted.size() << 1);
557 
558   // Escape any ascii character not in [A-Za-z_0-9].
559   //
560   // Note that it's legal to escape a character even if it has no
561   // special meaning in a regular expression -- so this function does
562   // that.  (This also makes it identical to the perl function of the
563   // same name except for the null-character special case;
564   // see `perldoc -f quotemeta`.)
565   for (size_t ii = 0; ii < unquoted.size(); ++ii) {
566     // Note that using 'isalnum' here raises the benchmark time from
567     // 32ns to 58ns:
568     if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
569         (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
570         (unquoted[ii] < '0' || unquoted[ii] > '9') &&
571         unquoted[ii] != '_' &&
572         // If this is the part of a UTF8 or Latin1 character, we need
573         // to copy this byte without escaping.  Experimentally this is
574         // what works correctly with the regexp library.
575         !(unquoted[ii] & 128)) {
576       if (unquoted[ii] == '\0') {  // Special handling for null chars.
577         // Note that this special handling is not strictly required for RE2,
578         // but this quoting is required for other regexp libraries such as
579         // PCRE.
580         // Can't use "\\0" since the next character might be a digit.
581         result += "\\x00";
582         continue;
583       }
584       result += '\\';
585     }
586     result += unquoted[ii];
587   }
588 
589   return result;
590 }
591 
PossibleMatchRange(std::string * min,std::string * max,int maxlen) const592 bool RE2::PossibleMatchRange(std::string* min, std::string* max,
593                              int maxlen) const {
594   if (prog_ == NULL)
595     return false;
596 
597   int n = static_cast<int>(prefix_.size());
598   if (n > maxlen)
599     n = maxlen;
600 
601   // Determine initial min max from prefix_ literal.
602   *min = prefix_.substr(0, n);
603   *max = prefix_.substr(0, n);
604   if (prefix_foldcase_) {
605     // prefix is ASCII lowercase; change *min to uppercase.
606     for (int i = 0; i < n; i++) {
607       char& c = (*min)[i];
608       if ('a' <= c && c <= 'z')
609         c += 'A' - 'a';
610     }
611   }
612 
613   // Add to prefix min max using PossibleMatchRange on regexp.
614   std::string dmin, dmax;
615   maxlen -= n;
616   if (maxlen > 0 && prog_->PossibleMatchRange(&dmin, &dmax, maxlen)) {
617     min->append(dmin);
618     max->append(dmax);
619   } else if (!max->empty()) {
620     // prog_->PossibleMatchRange has failed us,
621     // but we still have useful information from prefix_.
622     // Round up *max to allow any possible suffix.
623     PrefixSuccessor(max);
624   } else {
625     // Nothing useful.
626     *min = "";
627     *max = "";
628     return false;
629   }
630 
631   return true;
632 }
633 
634 // Avoid possible locale nonsense in standard strcasecmp.
635 // The string a is known to be all lowercase.
ascii_strcasecmp(const char * a,const char * b,size_t len)636 static int ascii_strcasecmp(const char* a, const char* b, size_t len) {
637   const char* ae = a + len;
638 
639   for (; a < ae; a++, b++) {
640     uint8_t x = *a;
641     uint8_t y = *b;
642     if ('A' <= y && y <= 'Z')
643       y += 'a' - 'A';
644     if (x != y)
645       return x - y;
646   }
647   return 0;
648 }
649 
650 
651 /***** Actual matching and rewriting code *****/
652 
Match(absl::string_view text,size_t startpos,size_t endpos,Anchor re_anchor,absl::string_view * submatch,int nsubmatch) const653 bool RE2::Match(absl::string_view text,
654                 size_t startpos,
655                 size_t endpos,
656                 Anchor re_anchor,
657                 absl::string_view* submatch,
658                 int nsubmatch) const {
659   if (!ok()) {
660     if (options_.log_errors())
661       ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
662     return false;
663   }
664 
665   if (startpos > endpos || endpos > text.size()) {
666     if (options_.log_errors())
667       ABSL_LOG(ERROR) << "RE2: invalid startpos, endpos pair. ["
668                       << "startpos: " << startpos << ", "
669                       << "endpos: " << endpos << ", "
670                       << "text size: " << text.size() << "]";
671     return false;
672   }
673 
674   absl::string_view subtext = text;
675   subtext.remove_prefix(startpos);
676   subtext.remove_suffix(text.size() - endpos);
677 
678   // Use DFAs to find exact location of match, filter out non-matches.
679 
680   // Don't ask for the location if we won't use it.
681   // SearchDFA can do extra optimizations in that case.
682   absl::string_view match;
683   absl::string_view* matchp = &match;
684   if (nsubmatch == 0)
685     matchp = NULL;
686 
687   int ncap = 1 + NumberOfCapturingGroups();
688   if (ncap > nsubmatch)
689     ncap = nsubmatch;
690 
691   // If the regexp is anchored explicitly, must not be in middle of text.
692   if (prog_->anchor_start() && startpos != 0)
693     return false;
694   if (prog_->anchor_end() && endpos != text.size())
695     return false;
696 
697   // If the regexp is anchored explicitly, update re_anchor
698   // so that we can potentially fall into a faster case below.
699   if (prog_->anchor_start() && prog_->anchor_end())
700     re_anchor = ANCHOR_BOTH;
701   else if (prog_->anchor_start() && re_anchor != ANCHOR_BOTH)
702     re_anchor = ANCHOR_START;
703 
704   // Check for the required prefix, if any.
705   size_t prefixlen = 0;
706   if (!prefix_.empty()) {
707     if (startpos != 0)
708       return false;
709     prefixlen = prefix_.size();
710     if (prefixlen > subtext.size())
711       return false;
712     if (prefix_foldcase_) {
713       if (ascii_strcasecmp(&prefix_[0], subtext.data(), prefixlen) != 0)
714         return false;
715     } else {
716       if (memcmp(&prefix_[0], subtext.data(), prefixlen) != 0)
717         return false;
718     }
719     subtext.remove_prefix(prefixlen);
720     // If there is a required prefix, the anchor must be at least ANCHOR_START.
721     if (re_anchor != ANCHOR_BOTH)
722       re_anchor = ANCHOR_START;
723   }
724 
725   Prog::Anchor anchor = Prog::kUnanchored;
726   Prog::MatchKind kind =
727       longest_match_ ? Prog::kLongestMatch : Prog::kFirstMatch;
728 
729   bool can_one_pass = is_one_pass_ && ncap <= Prog::kMaxOnePassCapture;
730   bool can_bit_state = prog_->CanBitState();
731   size_t bit_state_text_max_size = prog_->bit_state_text_max_size();
732 
733 #ifdef RE2_HAVE_THREAD_LOCAL
734   hooks::context = this;
735 #endif
736   bool dfa_failed = false;
737   bool skipped_test = false;
738   switch (re_anchor) {
739     default:
740       ABSL_LOG(DFATAL) << "Unexpected re_anchor value: " << re_anchor;
741       return false;
742 
743     case UNANCHORED: {
744       if (prog_->anchor_end()) {
745         // This is a very special case: we don't need the forward DFA because
746         // we already know where the match must end! Instead, the reverse DFA
747         // can say whether there is a match and (optionally) where it starts.
748         Prog* prog = ReverseProg();
749         if (prog == NULL) {
750           // Fall back to NFA below.
751           skipped_test = true;
752           break;
753         }
754         if (!prog->SearchDFA(subtext, text, Prog::kAnchored,
755                              Prog::kLongestMatch, matchp, &dfa_failed, NULL)) {
756           if (dfa_failed) {
757             if (options_.log_errors())
758               ABSL_LOG(ERROR) << "DFA out of memory: "
759                               << "pattern length " << pattern_->size() << ", "
760                               << "program size " << prog->size() << ", "
761                               << "list count " << prog->list_count() << ", "
762                               << "bytemap range " << prog->bytemap_range();
763             // Fall back to NFA below.
764             skipped_test = true;
765             break;
766           }
767           return false;
768         }
769         if (matchp == NULL)  // Matched.  Don't care where.
770           return true;
771         break;
772       }
773 
774       if (!prog_->SearchDFA(subtext, text, anchor, kind,
775                             matchp, &dfa_failed, NULL)) {
776         if (dfa_failed) {
777           if (options_.log_errors())
778             ABSL_LOG(ERROR) << "DFA out of memory: "
779                             << "pattern length " << pattern_->size() << ", "
780                             << "program size " << prog_->size() << ", "
781                             << "list count " << prog_->list_count() << ", "
782                             << "bytemap range " << prog_->bytemap_range();
783           // Fall back to NFA below.
784           skipped_test = true;
785           break;
786         }
787         return false;
788       }
789       if (matchp == NULL)  // Matched.  Don't care where.
790         return true;
791       // SearchDFA set match.end() but didn't know where the
792       // match started.  Run the regexp backward from match.end()
793       // to find the longest possible match -- that's where it started.
794       Prog* prog = ReverseProg();
795       if (prog == NULL) {
796         // Fall back to NFA below.
797         skipped_test = true;
798         break;
799       }
800       if (!prog->SearchDFA(match, text, Prog::kAnchored,
801                            Prog::kLongestMatch, &match, &dfa_failed, NULL)) {
802         if (dfa_failed) {
803           if (options_.log_errors())
804             ABSL_LOG(ERROR) << "DFA out of memory: "
805                             << "pattern length " << pattern_->size() << ", "
806                             << "program size " << prog->size() << ", "
807                             << "list count " << prog->list_count() << ", "
808                             << "bytemap range " << prog->bytemap_range();
809           // Fall back to NFA below.
810           skipped_test = true;
811           break;
812         }
813         if (options_.log_errors())
814           ABSL_LOG(ERROR) << "SearchDFA inconsistency";
815         return false;
816       }
817       break;
818     }
819 
820     case ANCHOR_BOTH:
821     case ANCHOR_START:
822       if (re_anchor == ANCHOR_BOTH)
823         kind = Prog::kFullMatch;
824       anchor = Prog::kAnchored;
825 
826       // If only a small amount of text and need submatch
827       // information anyway and we're going to use OnePass or BitState
828       // to get it, we might as well not even bother with the DFA:
829       // OnePass or BitState will be fast enough.
830       // On tiny texts, OnePass outruns even the DFA, and
831       // it doesn't have the shared state and occasional mutex that
832       // the DFA does.
833       if (can_one_pass && text.size() <= 4096 &&
834           (ncap > 1 || text.size() <= 16)) {
835         skipped_test = true;
836         break;
837       }
838       if (can_bit_state && text.size() <= bit_state_text_max_size &&
839           ncap > 1) {
840         skipped_test = true;
841         break;
842       }
843       if (!prog_->SearchDFA(subtext, text, anchor, kind,
844                             &match, &dfa_failed, NULL)) {
845         if (dfa_failed) {
846           if (options_.log_errors())
847             ABSL_LOG(ERROR) << "DFA out of memory: "
848                             << "pattern length " << pattern_->size() << ", "
849                             << "program size " << prog_->size() << ", "
850                             << "list count " << prog_->list_count() << ", "
851                             << "bytemap range " << prog_->bytemap_range();
852           // Fall back to NFA below.
853           skipped_test = true;
854           break;
855         }
856         return false;
857       }
858       break;
859   }
860 
861   if (!skipped_test && ncap <= 1) {
862     // We know exactly where it matches.  That's enough.
863     if (ncap == 1)
864       submatch[0] = match;
865   } else {
866     absl::string_view subtext1;
867     if (skipped_test) {
868       // DFA ran out of memory or was skipped:
869       // need to search in entire original text.
870       subtext1 = subtext;
871     } else {
872       // DFA found the exact match location:
873       // let NFA run an anchored, full match search
874       // to find submatch locations.
875       subtext1 = match;
876       anchor = Prog::kAnchored;
877       kind = Prog::kFullMatch;
878     }
879 
880     if (can_one_pass && anchor != Prog::kUnanchored) {
881       if (!prog_->SearchOnePass(subtext1, text, anchor, kind, submatch, ncap)) {
882         if (!skipped_test && options_.log_errors())
883           ABSL_LOG(ERROR) << "SearchOnePass inconsistency";
884         return false;
885       }
886     } else if (can_bit_state && subtext1.size() <= bit_state_text_max_size) {
887       if (!prog_->SearchBitState(subtext1, text, anchor,
888                                  kind, submatch, ncap)) {
889         if (!skipped_test && options_.log_errors())
890           ABSL_LOG(ERROR) << "SearchBitState inconsistency";
891         return false;
892       }
893     } else {
894       if (!prog_->SearchNFA(subtext1, text, anchor, kind, submatch, ncap)) {
895         if (!skipped_test && options_.log_errors())
896           ABSL_LOG(ERROR) << "SearchNFA inconsistency";
897         return false;
898       }
899     }
900   }
901 
902   // Adjust overall match for required prefix that we stripped off.
903   if (prefixlen > 0 && nsubmatch > 0)
904     submatch[0] = absl::string_view(submatch[0].data() - prefixlen,
905                                     submatch[0].size() + prefixlen);
906 
907   // Zero submatches that don't exist in the regexp.
908   for (int i = ncap; i < nsubmatch; i++)
909     submatch[i] = absl::string_view();
910   return true;
911 }
912 
913 // Internal matcher - like Match() but takes Args not string_views.
DoMatch(absl::string_view text,Anchor re_anchor,size_t * consumed,const Arg * const * args,int n) const914 bool RE2::DoMatch(absl::string_view text,
915                   Anchor re_anchor,
916                   size_t* consumed,
917                   const Arg* const* args,
918                   int n) const {
919   if (!ok()) {
920     if (options_.log_errors())
921       ABSL_LOG(ERROR) << "Invalid RE2: " << *error_;
922     return false;
923   }
924 
925   if (NumberOfCapturingGroups() < n) {
926     // RE has fewer capturing groups than number of Arg pointers passed in.
927     return false;
928   }
929 
930   // Count number of capture groups needed.
931   int nvec;
932   if (n == 0 && consumed == NULL)
933     nvec = 0;
934   else
935     nvec = n+1;
936 
937   absl::FixedArray<absl::string_view, kVecSize> vec_storage(nvec);
938   absl::string_view* vec = vec_storage.data();
939 
940   if (!Match(text, 0, text.size(), re_anchor, vec, nvec)) {
941     return false;
942   }
943 
944   if (consumed != NULL)
945     *consumed = static_cast<size_t>(EndPtr(vec[0]) - BeginPtr(text));
946 
947   if (n == 0 || args == NULL) {
948     // We are not interested in results
949     return true;
950   }
951 
952   // If we got here, we must have matched the whole pattern.
953   for (int i = 0; i < n; i++) {
954     absl::string_view s = vec[i+1];
955     if (!args[i]->Parse(s.data(), s.size())) {
956       // TODO: Should we indicate what the error was?
957       return false;
958     }
959   }
960 
961   return true;
962 }
963 
964 // Checks that the rewrite string is well-formed with respect to this
965 // regular expression.
CheckRewriteString(absl::string_view rewrite,std::string * error) const966 bool RE2::CheckRewriteString(absl::string_view rewrite,
967                              std::string* error) const {
968   int max_token = -1;
969   for (const char *s = rewrite.data(), *end = s + rewrite.size();
970        s < end; s++) {
971     int c = *s;
972     if (c != '\\') {
973       continue;
974     }
975     if (++s == end) {
976       *error = "Rewrite schema error: '\\' not allowed at end.";
977       return false;
978     }
979     c = *s;
980     if (c == '\\') {
981       continue;
982     }
983     if (!absl::ascii_isdigit(c)) {
984       *error = "Rewrite schema error: "
985                "'\\' must be followed by a digit or '\\'.";
986       return false;
987     }
988     int n = (c - '0');
989     if (max_token < n) {
990       max_token = n;
991     }
992   }
993 
994   if (max_token > NumberOfCapturingGroups()) {
995     *error = absl::StrFormat(
996         "Rewrite schema requests %d matches, but the regexp only has %d "
997         "parenthesized subexpressions.",
998         max_token, NumberOfCapturingGroups());
999     return false;
1000   }
1001   return true;
1002 }
1003 
1004 // Returns the maximum submatch needed for the rewrite to be done by Replace().
1005 // E.g. if rewrite == "foo \\2,\\1", returns 2.
MaxSubmatch(absl::string_view rewrite)1006 int RE2::MaxSubmatch(absl::string_view rewrite) {
1007   int max = 0;
1008   for (const char *s = rewrite.data(), *end = s + rewrite.size();
1009        s < end; s++) {
1010     if (*s == '\\') {
1011       s++;
1012       int c = (s < end) ? *s : -1;
1013       if (absl::ascii_isdigit(c)) {
1014         int n = (c - '0');
1015         if (n > max)
1016           max = n;
1017       }
1018     }
1019   }
1020   return max;
1021 }
1022 
1023 // Append the "rewrite" string, with backslash substitutions from "vec",
1024 // to string "out".
Rewrite(std::string * out,absl::string_view rewrite,const absl::string_view * vec,int veclen) const1025 bool RE2::Rewrite(std::string* out,
1026                   absl::string_view rewrite,
1027                   const absl::string_view* vec,
1028                   int veclen) const {
1029   for (const char *s = rewrite.data(), *end = s + rewrite.size();
1030        s < end; s++) {
1031     if (*s != '\\') {
1032       out->push_back(*s);
1033       continue;
1034     }
1035     s++;
1036     int c = (s < end) ? *s : -1;
1037     if (absl::ascii_isdigit(c)) {
1038       int n = (c - '0');
1039       if (n >= veclen) {
1040         if (options_.log_errors()) {
1041           ABSL_LOG(ERROR) << "invalid substitution \\" << n
1042                           << " from " << veclen << " groups";
1043         }
1044         return false;
1045       }
1046       absl::string_view snip = vec[n];
1047       if (!snip.empty())
1048         out->append(snip.data(), snip.size());
1049     } else if (c == '\\') {
1050       out->push_back('\\');
1051     } else {
1052       if (options_.log_errors())
1053         ABSL_LOG(ERROR) << "invalid rewrite pattern: " << rewrite;
1054       return false;
1055     }
1056   }
1057   return true;
1058 }
1059 
1060 /***** Parsers for various types *****/
1061 
1062 namespace re2_internal {
1063 
1064 template <>
Parse(const char * str,size_t n,void * dest)1065 bool Parse(const char* str, size_t n, void* dest) {
1066   // We fail if somebody asked us to store into a non-NULL void* pointer
1067   return (dest == NULL);
1068 }
1069 
1070 template <>
Parse(const char * str,size_t n,std::string * dest)1071 bool Parse(const char* str, size_t n, std::string* dest) {
1072   if (dest == NULL) return true;
1073   dest->assign(str, n);
1074   return true;
1075 }
1076 
1077 template <>
Parse(const char * str,size_t n,absl::string_view * dest)1078 bool Parse(const char* str, size_t n, absl::string_view* dest) {
1079   if (dest == NULL) return true;
1080   *dest = absl::string_view(str, n);
1081   return true;
1082 }
1083 
1084 template <>
Parse(const char * str,size_t n,char * dest)1085 bool Parse(const char* str, size_t n, char* dest) {
1086   if (n != 1) return false;
1087   if (dest == NULL) return true;
1088   *dest = str[0];
1089   return true;
1090 }
1091 
1092 template <>
Parse(const char * str,size_t n,signed char * dest)1093 bool Parse(const char* str, size_t n, signed char* dest) {
1094   if (n != 1) return false;
1095   if (dest == NULL) return true;
1096   *dest = str[0];
1097   return true;
1098 }
1099 
1100 template <>
Parse(const char * str,size_t n,unsigned char * dest)1101 bool Parse(const char* str, size_t n, unsigned char* dest) {
1102   if (n != 1) return false;
1103   if (dest == NULL) return true;
1104   *dest = str[0];
1105   return true;
1106 }
1107 
1108 // Largest number spec that we are willing to parse
1109 static const int kMaxNumberLength = 32;
1110 
1111 // REQUIRES "buf" must have length at least nbuf.
1112 // Copies "str" into "buf" and null-terminates.
1113 // Overwrites *np with the new length.
TerminateNumber(char * buf,size_t nbuf,const char * str,size_t * np,bool accept_spaces)1114 static const char* TerminateNumber(char* buf, size_t nbuf, const char* str,
1115                                    size_t* np, bool accept_spaces) {
1116   size_t n = *np;
1117   if (n == 0) return "";
1118   if (n > 0 && absl::ascii_isspace(*str)) {
1119     // We are less forgiving than the strtoxxx() routines and do not
1120     // allow leading spaces. We do allow leading spaces for floats.
1121     if (!accept_spaces) {
1122       return "";
1123     }
1124     while (n > 0 && absl::ascii_isspace(*str)) {
1125       n--;
1126       str++;
1127     }
1128   }
1129 
1130   // Although buf has a fixed maximum size, we can still handle
1131   // arbitrarily large integers correctly by omitting leading zeros.
1132   // (Numbers that are still too long will be out of range.)
1133   // Before deciding whether str is too long,
1134   // remove leading zeros with s/000+/00/.
1135   // Leaving the leading two zeros in place means that
1136   // we don't change 0000x123 (invalid) into 0x123 (valid).
1137   // Skip over leading - before replacing.
1138   bool neg = false;
1139   if (n >= 1 && str[0] == '-') {
1140     neg = true;
1141     n--;
1142     str++;
1143   }
1144 
1145   if (n >= 3 && str[0] == '0' && str[1] == '0') {
1146     while (n >= 3 && str[2] == '0') {
1147       n--;
1148       str++;
1149     }
1150   }
1151 
1152   if (neg) {  // make room in buf for -
1153     n++;
1154     str--;
1155   }
1156 
1157   if (n > nbuf-1) return "";
1158 
1159   memmove(buf, str, n);
1160   if (neg) {
1161     buf[0] = '-';
1162   }
1163   buf[n] = '\0';
1164   *np = n;
1165   return buf;
1166 }
1167 
1168 template <>
Parse(const char * str,size_t n,float * dest)1169 bool Parse(const char* str, size_t n, float* dest) {
1170   if (n == 0) return false;
1171   static const int kMaxLength = 200;
1172   char buf[kMaxLength+1];
1173   str = TerminateNumber(buf, sizeof buf, str, &n, true);
1174   char* end;
1175   errno = 0;
1176   float r = strtof(str, &end);
1177   if (end != str + n) return false;   // Leftover junk
1178   if (errno) return false;
1179   if (dest == NULL) return true;
1180   *dest = r;
1181   return true;
1182 }
1183 
1184 template <>
Parse(const char * str,size_t n,double * dest)1185 bool Parse(const char* str, size_t n, double* dest) {
1186   if (n == 0) return false;
1187   static const int kMaxLength = 200;
1188   char buf[kMaxLength+1];
1189   str = TerminateNumber(buf, sizeof buf, str, &n, true);
1190   char* end;
1191   errno = 0;
1192   double r = strtod(str, &end);
1193   if (end != str + n) return false;   // Leftover junk
1194   if (errno) return false;
1195   if (dest == NULL) return true;
1196   *dest = r;
1197   return true;
1198 }
1199 
1200 template <>
Parse(const char * str,size_t n,long * dest,int radix)1201 bool Parse(const char* str, size_t n, long* dest, int radix) {
1202   if (n == 0) return false;
1203   char buf[kMaxNumberLength+1];
1204   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1205   char* end;
1206   errno = 0;
1207   long r = strtol(str, &end, radix);
1208   if (end != str + n) return false;   // Leftover junk
1209   if (errno) return false;
1210   if (dest == NULL) return true;
1211   *dest = r;
1212   return true;
1213 }
1214 
1215 template <>
Parse(const char * str,size_t n,unsigned long * dest,int radix)1216 bool Parse(const char* str, size_t n, unsigned long* dest, int radix) {
1217   if (n == 0) return false;
1218   char buf[kMaxNumberLength+1];
1219   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1220   if (str[0] == '-') {
1221     // strtoul() will silently accept negative numbers and parse
1222     // them.  This module is more strict and treats them as errors.
1223     return false;
1224   }
1225 
1226   char* end;
1227   errno = 0;
1228   unsigned long r = strtoul(str, &end, radix);
1229   if (end != str + n) return false;   // Leftover junk
1230   if (errno) return false;
1231   if (dest == NULL) return true;
1232   *dest = r;
1233   return true;
1234 }
1235 
1236 template <>
Parse(const char * str,size_t n,short * dest,int radix)1237 bool Parse(const char* str, size_t n, short* dest, int radix) {
1238   long r;
1239   if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1240   if ((short)r != r) return false;              // Out of range
1241   if (dest == NULL) return true;
1242   *dest = (short)r;
1243   return true;
1244 }
1245 
1246 template <>
Parse(const char * str,size_t n,unsigned short * dest,int radix)1247 bool Parse(const char* str, size_t n, unsigned short* dest, int radix) {
1248   unsigned long r;
1249   if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1250   if ((unsigned short)r != r) return false;     // Out of range
1251   if (dest == NULL) return true;
1252   *dest = (unsigned short)r;
1253   return true;
1254 }
1255 
1256 template <>
Parse(const char * str,size_t n,int * dest,int radix)1257 bool Parse(const char* str, size_t n, int* dest, int radix) {
1258   long r;
1259   if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1260   if ((int)r != r) return false;                // Out of range
1261   if (dest == NULL) return true;
1262   *dest = (int)r;
1263   return true;
1264 }
1265 
1266 template <>
Parse(const char * str,size_t n,unsigned int * dest,int radix)1267 bool Parse(const char* str, size_t n, unsigned int* dest, int radix) {
1268   unsigned long r;
1269   if (!Parse(str, n, &r, radix)) return false;  // Could not parse
1270   if ((unsigned int)r != r) return false;       // Out of range
1271   if (dest == NULL) return true;
1272   *dest = (unsigned int)r;
1273   return true;
1274 }
1275 
1276 template <>
Parse(const char * str,size_t n,long long * dest,int radix)1277 bool Parse(const char* str, size_t n, long long* dest, int radix) {
1278   if (n == 0) return false;
1279   char buf[kMaxNumberLength+1];
1280   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1281   char* end;
1282   errno = 0;
1283   long long r = strtoll(str, &end, radix);
1284   if (end != str + n) return false;   // Leftover junk
1285   if (errno) return false;
1286   if (dest == NULL) return true;
1287   *dest = r;
1288   return true;
1289 }
1290 
1291 template <>
Parse(const char * str,size_t n,unsigned long long * dest,int radix)1292 bool Parse(const char* str, size_t n, unsigned long long* dest, int radix) {
1293   if (n == 0) return false;
1294   char buf[kMaxNumberLength+1];
1295   str = TerminateNumber(buf, sizeof buf, str, &n, false);
1296   if (str[0] == '-') {
1297     // strtoull() will silently accept negative numbers and parse
1298     // them.  This module is more strict and treats them as errors.
1299     return false;
1300   }
1301   char* end;
1302   errno = 0;
1303   unsigned long long r = strtoull(str, &end, radix);
1304   if (end != str + n) return false;   // Leftover junk
1305   if (errno) return false;
1306   if (dest == NULL) return true;
1307   *dest = r;
1308   return true;
1309 }
1310 
1311 }  // namespace re2_internal
1312 
1313 namespace hooks {
1314 
1315 #ifdef RE2_HAVE_THREAD_LOCAL
1316 thread_local const RE2* context = NULL;
1317 #endif
1318 
1319 template <typename T>
1320 union Hook {
Store(T * cb)1321   void Store(T* cb) { cb_.store(cb, std::memory_order_release); }
Load() const1322   T* Load() const { return cb_.load(std::memory_order_acquire); }
1323 
1324 #if !defined(__clang__) && defined(_MSC_VER)
1325   // Citing https://github.com/protocolbuffers/protobuf/pull/4777 as precedent,
1326   // this is a gross hack to make std::atomic<T*> constant-initialized on MSVC.
1327   static_assert(ATOMIC_POINTER_LOCK_FREE == 2,
1328                 "std::atomic<T*> must be always lock-free");
1329   T* cb_for_constinit_;
1330 #endif
1331 
1332   std::atomic<T*> cb_;
1333 };
1334 
1335 template <typename T>
DoNothing(const T &)1336 static void DoNothing(const T&) {}
1337 
1338 #define DEFINE_HOOK(type, name)                                       \
1339   static Hook<type##Callback> name##_hook = {{&DoNothing<type>}};     \
1340   void Set##type##Hook(type##Callback* cb) { name##_hook.Store(cb); } \
1341   type##Callback* Get##type##Hook() { return name##_hook.Load(); }
1342 
1343 DEFINE_HOOK(DFAStateCacheReset, dfa_state_cache_reset)
1344 DEFINE_HOOK(DFASearchFailure, dfa_search_failure)
1345 
1346 #undef DEFINE_HOOK
1347 
1348 }  // namespace hooks
1349 
1350 }  // namespace re2
1351