1 /***************************************************************************
2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3 tries to compile and match it, deriving options from the string itself. If
4 STANDALONE is defined, a main program that calls the driver with the contents
5 of specified files is compiled, and commentary on what is happening is output.
6 If an argument starts with '=' the rest of it it is taken as a literal string
7 rather than a file name. This allows easy testing of short strings.
8
9 Written by Philip Hazel, October 2016
10 ***************************************************************************/
11
12 #include <errno.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #define PCRE2_CODE_UNIT_WIDTH 8
18 #include "pcre2.h"
19
20 #define MAX_MATCH_SIZE 1000
21
22 #define DFA_WORKSPACE_COUNT 100
23
24 #define ALLOWED_COMPILE_OPTIONS \
25 (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
26 PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
27 PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
28 PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
29 PCRE2_NO_AUTO_CAPTURE| \
30 PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
31 PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
32 PCRE2_UTF)
33
34 #define ALLOWED_MATCH_OPTIONS \
35 (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
36 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37 PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
38
39 /* This is the callout function. Its only purpose is to halt matching if there
40 are more than 100 callouts, as one way of stopping too much time being spent on
41 fruitless matches. The callout data is a pointer to the counter. */
42
callout_function(pcre2_callout_block * cb,void * callout_data)43 static int callout_function(pcre2_callout_block *cb, void *callout_data)
44 {
45 (void)cb; /* Avoid unused parameter warning */
46 *((uint32_t *)callout_data) += 1;
47 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
48 }
49
50 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
51 "no previous prototype" warning when compiling at high warning level. */
52
53 int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
54
55 /* Here's the driving function. */
56
LLVMFuzzerTestOneInput(const unsigned char * data,size_t size)57 int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
58 {
59 uint32_t compile_options;
60 uint32_t match_options;
61 pcre2_match_data *match_data = NULL;
62 pcre2_match_context *match_context = NULL;
63 size_t match_size;
64 int dfa_workspace[DFA_WORKSPACE_COUNT];
65 int r1, r2;
66 int i;
67
68 if (size < 1) return 0;
69
70 /* Limiting the length of the subject for matching stops fruitless searches
71 in large trees taking too much time. */
72
73 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
74
75 /* Figure out some options to use. Initialize the random number to ensure
76 repeatability. Ensure that we get a 32-bit unsigned random number for testing
77 options. (RAND_MAX is required to be at least 32767, but is commonly
78 2147483647, which excludes the top bit.) */
79
80 srand((unsigned int)(data[size/2]));
81 r1 = rand();
82 r2 = rand();
83
84 /* Ensure that all undefined option bits are zero (waste of time trying them)
85 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
86 input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
87 reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
88 \C in random patterns is highly likely to cause a crash. */
89
90 compile_options =
91 ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
92 PCRE2_NEVER_BACKSLASH_C;
93
94 match_options =
95 ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
96
97 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
98 allowed together and just give an immediate error return. */
99
100 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
101 match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
102
103 /* Do the compile with and without the options, and after a successful compile,
104 likewise do the match with and without the options. */
105
106 for (i = 0; i < 2; i++)
107 {
108 uint32_t callout_count;
109 int errorcode;
110 PCRE2_SIZE erroroffset;
111 pcre2_code *code;
112
113 #ifdef STANDALONE
114 printf("Compile options %.8x never_backslash_c", compile_options);
115 printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
116 ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
117 ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
118 ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
119 ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
120 ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
121 ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
122 ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
123 ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
124 ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
125 ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
126 ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
127 ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
128 ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
129 ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
130 ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
131 ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
132 ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
133 ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
134 ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
135 ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
136 ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
137 ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
138 ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
139 ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
140 ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
141 ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
142 #endif
143
144 code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
145 &errorcode, &erroroffset, NULL);
146
147 /* Compilation succeeded */
148
149 if (code != NULL)
150 {
151 int j;
152 uint32_t save_match_options = match_options;
153
154 #ifdef SUPPORT_JIT
155 pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
156 #endif
157
158 /* Create match data and context blocks only when we first need them. Set
159 low match and depth limits to avoid wasting too much searching large
160 pattern trees. Almost all matches are going to fail. */
161
162 if (match_data == NULL)
163 {
164 match_data = pcre2_match_data_create(32, NULL);
165 if (match_data == NULL)
166 {
167 #ifdef STANDALONE
168 printf("** Failed to create match data block\n");
169 #endif
170 return 0;
171 }
172 }
173
174 if (match_context == NULL)
175 {
176 match_context = pcre2_match_context_create(NULL);
177 if (match_context == NULL)
178 {
179 #ifdef STANDALONE
180 printf("** Failed to create match context block\n");
181 #endif
182 return 0;
183 }
184 (void)pcre2_set_match_limit(match_context, 100);
185 (void)pcre2_set_depth_limit(match_context, 100);
186 (void)pcre2_set_callout(match_context, callout_function, &callout_count);
187 }
188
189 /* Match twice, with and without options. */
190
191 for (j = 0; j < 2; j++)
192 {
193 #ifdef STANDALONE
194 printf("Match options %.8x", match_options);
195 printf("%s%s%s%s%s%s%s%s%s%s\n",
196 ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
197 ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
198 ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
199 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
200 ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
201 ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
202 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
203 ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
204 ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
205 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
206 #endif
207
208 callout_count = 0;
209 errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
210 match_options, match_data, match_context);
211
212 #ifdef STANDALONE
213 if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
214 {
215 unsigned char buffer[256];
216 pcre2_get_error_message(errorcode, buffer, 256);
217 printf("Match failed: error %d: %s\n", errorcode, buffer);
218 }
219 #endif
220
221 match_options = 0; /* For second time */
222 }
223
224 /* Match with DFA twice, with and without options. */
225
226 match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
227
228 for (j = 0; j < 2; j++)
229 {
230 #ifdef STANDALONE
231 printf("DFA match options %.8x", match_options);
232 printf("%s%s%s%s%s%s%s%s%s\n",
233 ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
234 ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
235 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
236 ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
237 ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
238 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
239 ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
240 ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
241 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
242 #endif
243
244 callout_count = 0;
245 errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
246 (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
247 dfa_workspace, DFA_WORKSPACE_COUNT);
248
249 #ifdef STANDALONE
250 if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
251 {
252 unsigned char buffer[256];
253 pcre2_get_error_message(errorcode, buffer, 256);
254 printf("Match failed: error %d: %s\n", errorcode, buffer);
255 }
256 #endif
257
258 match_options = 0; /* For second time */
259 }
260
261 match_options = save_match_options; /* Reset for the second compile */
262 pcre2_code_free(code);
263 }
264
265 /* Compilation failed */
266
267 else
268 {
269 unsigned char buffer[256];
270 pcre2_get_error_message(errorcode, buffer, 256);
271 #ifdef STANDALONE
272 printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
273 #else
274 if (strstr((const char *)buffer, "internal error") != NULL) abort();
275 #endif
276 }
277
278 compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
279 }
280
281 if (match_data != NULL) pcre2_match_data_free(match_data);
282 if (match_context != NULL) pcre2_match_context_free(match_context);
283
284 return 0;
285 }
286
287
288 /* Optional main program. */
289
290 #ifdef STANDALONE
main(int argc,char ** argv)291 int main(int argc, char **argv)
292 {
293 int i;
294
295 if (argc < 2)
296 {
297 printf("** No arguments given\n");
298 return 0;
299 }
300
301 for (i = 1; i < argc; i++)
302 {
303 size_t filelen;
304 size_t readsize;
305 unsigned char *buffer;
306 FILE *f;
307
308 /* Handle a literal string. Copy to an exact size buffer so that checks for
309 overrunning work. */
310
311 if (argv[i][0] == '=')
312 {
313 readsize = strlen(argv[i]) - 1;
314 printf("------ <Literal> ------\n");
315 printf("Length = %lu\n", readsize);
316 printf("%.*s\n", (int)readsize, argv[i]+1);
317 buffer = (unsigned char *)malloc(readsize);
318 if (buffer == NULL)
319 printf("** Failed to allocate %lu bytes of memory\n", readsize);
320 else
321 {
322 memcpy(buffer, argv[i]+1, readsize);
323 LLVMFuzzerTestOneInput(buffer, readsize);
324 free(buffer);
325 }
326 continue;
327 }
328
329 /* Handle a string given in a file */
330
331 f = fopen(argv[i], "rb");
332 if (f == NULL)
333 {
334 printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
335 continue;
336 }
337
338 printf("------ %s ------\n", argv[i]);
339
340 fseek(f, 0, SEEK_END);
341 filelen = ftell(f);
342 fseek(f, 0, SEEK_SET);
343
344 buffer = (unsigned char *)malloc(filelen);
345 if (buffer == NULL)
346 {
347 printf("** Failed to allocate %lu bytes of memory\n", filelen);
348 fclose(f);
349 continue;
350 }
351
352 readsize = fread(buffer, 1, filelen, f);
353 fclose(f);
354
355 if (readsize != filelen)
356 printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
357 else
358 {
359 printf("Length = %lu\n", filelen);
360 LLVMFuzzerTestOneInput(buffer, filelen);
361 }
362 free(buffer);
363 }
364
365 return 0;
366 }
367 #endif /* STANDALONE */
368
369 /* End */
370