• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include <fuzzer/FuzzedDataProvider.h>
6 #include <stddef.h>
7 #include <stdint.h>
8 #include <memory>
9 #include <queue>
10 #include <string>
11 #include <vector>
12 
13 #include "re2/prefilter.h"
14 #include "re2/re2.h"
15 
16 using re2::StringPiece;
17 
18 // NOT static, NOT signed.
19 uint8_t dummy = 0;
20 
TestOneInput(StringPiece pattern,const RE2::Options & options,StringPiece text)21 void TestOneInput(StringPiece pattern, const RE2::Options& options,
22                   StringPiece text) {
23   // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W.
24   // Otherwise, we will waste time on inputs that have long runs of various
25   // character classes. The fuzzer has shown itself to be easily capable of
26   // generating such patterns that fall within the other limits, but result
27   // in timeouts nonetheless. The marginal cost is high - even more so when
28   // counted repetition is involved - whereas the marginal benefit is zero.
29   // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain.
30   int char_class = 0;
31   int backslash_p = 0;  // very expensive, so handle specially
32   for (size_t i = 0; i < pattern.size(); i++) {
33     if (pattern[i] == '.')
34       char_class++;
35     if (pattern[i] != '\\')
36       continue;
37     i++;
38     if (i >= pattern.size())
39       break;
40     if (pattern[i] == 'p' || pattern[i] == 'P' ||
41         pattern[i] == 'd' || pattern[i] == 'D' ||
42         pattern[i] == 's' || pattern[i] == 'S' ||
43         pattern[i] == 'w' || pattern[i] == 'W')
44       char_class++;
45     if (pattern[i] == 'p' || pattern[i] == 'P')
46       backslash_p++;
47   }
48   if (char_class > 9)
49     return;
50   if (backslash_p > 1)
51     return;
52 
53   RE2 re(pattern, options);
54   if (!re.ok())
55     return;
56 
57   // Don't waste time fuzzing programs with large substrings.
58   // They can cause bug reports due to fuzzer timeouts when they
59   // are repetitions (e.g. hundreds of NUL bytes) and matching is
60   // unanchored. And they aren't interesting for fuzzing purposes.
61   std::unique_ptr<re2::Prefilter> prefilter(re2::Prefilter::FromRE2(&re));
62   if (prefilter == nullptr)
63     return;
64   std::queue<re2::Prefilter*> nodes;
65   nodes.push(prefilter.get());
66   while (!nodes.empty()) {
67     re2::Prefilter* node = nodes.front();
68     nodes.pop();
69     if (node->op() == re2::Prefilter::ATOM) {
70       if (node->atom().size() > 9)
71         return;
72     } else if (node->op() == re2::Prefilter::AND ||
73                node->op() == re2::Prefilter::OR) {
74       for (re2::Prefilter* sub : *node->subs())
75         nodes.push(sub);
76     }
77   }
78 
79   // Don't waste time fuzzing high-size programs.
80   // They can cause bug reports due to fuzzer timeouts.
81   int size = re.ProgramSize();
82   if (size > 9999)
83     return;
84   int rsize = re.ReverseProgramSize();
85   if (rsize > 9999)
86     return;
87 
88   // Don't waste time fuzzing high-fanout programs.
89   // They can cause bug reports due to fuzzer timeouts.
90   std::vector<int> histogram;
91   int fanout = re.ProgramFanout(&histogram);
92   if (fanout > 9)
93     return;
94   int rfanout = re.ReverseProgramFanout(&histogram);
95   if (rfanout > 9)
96     return;
97 
98   if (re.NumberOfCapturingGroups() == 0) {
99     // Avoid early return due to too many arguments.
100     StringPiece sp = text;
101     RE2::FullMatch(sp, re);
102     RE2::PartialMatch(sp, re);
103     RE2::Consume(&sp, re);
104     sp = text;  // Reset.
105     RE2::FindAndConsume(&sp, re);
106   } else {
107     // Okay, we have at least one capturing group...
108     // Try conversion for variously typed arguments.
109     StringPiece sp = text;
110     short s;
111     RE2::FullMatch(sp, re, &s);
112     long l;
113     RE2::PartialMatch(sp, re, &l);
114     float f;
115     RE2::Consume(&sp, re, &f);
116     sp = text;  // Reset.
117     double d;
118     RE2::FindAndConsume(&sp, re, &d);
119   }
120 
121   std::string s = std::string(text);
122   RE2::Replace(&s, re, "");
123   s = std::string(text);  // Reset.
124   RE2::GlobalReplace(&s, re, "");
125 
126   std::string min, max;
127   re.PossibleMatchRange(&min, &max, /*maxlen=*/9);
128 
129   // Exercise some other API functionality.
130   dummy += re.NamedCapturingGroups().size();
131   dummy += re.CapturingGroupNames().size();
132   dummy += RE2::QuoteMeta(pattern).size();
133 }
134 
135 // Entry point for libFuzzer.
LLVMFuzzerTestOneInput(const uint8_t * data,size_t size)136 extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
137   // An input larger than 4 KiB probably isn't interesting. (This limit
138   // allows for fdp.ConsumeRandomLengthString()'s backslash behaviour.)
139   if (size == 0 || size > 4096)
140     return 0;
141 
142   FuzzedDataProvider fdp(data, size);
143 
144   // The convention here is that fdp.ConsumeBool() returning false sets
145   // the default value whereas returning true sets the alternate value:
146   // most options default to false and so can be set directly; encoding
147   // defaults to UTF-8; case_sensitive defaults to true. We do NOT want
148   // to log errors. max_mem is 64 MiB because we can afford to use more
149   // RAM in exchange for (hopefully) faster fuzzing.
150   RE2::Options options;
151   options.set_encoding(fdp.ConsumeBool() ? RE2::Options::EncodingLatin1
152                                          : RE2::Options::EncodingUTF8);
153   options.set_posix_syntax(fdp.ConsumeBool());
154   options.set_longest_match(fdp.ConsumeBool());
155   options.set_log_errors(false);
156   options.set_max_mem(64 << 20);
157   options.set_literal(fdp.ConsumeBool());
158   options.set_never_nl(fdp.ConsumeBool());
159   options.set_dot_nl(fdp.ConsumeBool());
160   options.set_never_capture(fdp.ConsumeBool());
161   options.set_case_sensitive(!fdp.ConsumeBool());
162   options.set_perl_classes(fdp.ConsumeBool());
163   options.set_word_boundary(fdp.ConsumeBool());
164   options.set_one_line(fdp.ConsumeBool());
165 
166   std::string pattern = fdp.ConsumeRandomLengthString(999);
167   std::string text = fdp.ConsumeRandomLengthString(999);
168 
169   TestOneInput(pattern, options, text);
170   return 0;
171 }
172