1 /***************************************************************************
2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3 tries to compile and match it, deriving options from the string itself. If
4 STANDALONE is defined, a main program that calls the driver with the contents
5 of specified files is compiled, and commentary on what is happening is output.
6 If an argument starts with '=' the rest of it it is taken as a literal string
7 rather than a file name. This allows easy testing of short strings.
8
9 Written by Philip Hazel, October 2016
10 ***************************************************************************/
11
12 #include <errno.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #define PCRE2_CODE_UNIT_WIDTH 8
18 #include "pcre2.h"
19
20 #define MAX_MATCH_SIZE 1000
21
22 #define DFA_WORKSPACE_COUNT 100
23
24 #define ALLOWED_COMPILE_OPTIONS \
25 (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
26 PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
27 PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
28 PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
29 PCRE2_NO_AUTO_CAPTURE| \
30 PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
31 PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
32 PCRE2_UTF)
33
34 #define ALLOWED_MATCH_OPTIONS \
35 (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
36 PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37 PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
38
39 /* This is the callout function. Its only purpose is to halt matching if there
40 are more than 100 callouts, as one way of stopping too much time being spent on
41 fruitless matches. The callout data is a pointer to the counter. */
42
callout_function(pcre2_callout_block * cb,void * callout_data)43 static int callout_function(pcre2_callout_block *cb, void *callout_data)
44 {
45 (void)cb; /* Avoid unused parameter warning */
46 *((uint32_t *)callout_data) += 1;
47 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
48 }
49
50 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
51 "no previous prototype" warning when compiling at high warning level. */
52
53 int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
54
55 /* Here's the driving function. */
56
LLVMFuzzerTestOneInput(const unsigned char * data,size_t size)57 int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
58 {
59 uint32_t compile_options;
60 uint32_t match_options;
61 pcre2_match_data *match_data = NULL;
62 pcre2_match_context *match_context = NULL;
63 size_t match_size;
64 int dfa_workspace[DFA_WORKSPACE_COUNT];
65 int r1, r2;
66 int i;
67
68 if (size < 1) return 0;
69
70 /* Limiting the length of the subject for matching stops fruitless searches
71 in large trees taking too much time. */
72
73 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
74
75 /* Figure out some options to use. Initialize the random number to ensure
76 repeatability. Ensure that we get a 32-bit unsigned random number for testing
77 options. (RAND_MAX is required to be at least 32767, but is commonly
78 2147483647, which excludes the top bit.) */
79
80 srand((unsigned int)(data[size/2]));
81 r1 = rand();
82 r2 = rand();
83
84 /* Ensure that all undefined option bits are zero (waste of time trying them)
85 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
86 input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
87 reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
88 \C in random patterns is highly likely to cause a crash. */
89
90 compile_options =
91 ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
92 PCRE2_NEVER_BACKSLASH_C;
93
94 match_options =
95 ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
96
97 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
98 allowed together and just give an immediate error return. */
99
100 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
101 match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
102
103 /* Do the compile with and without the options, and after a successful compile,
104 likewise do the match with and without the options. */
105
106 for (i = 0; i < 2; i++)
107 {
108 uint32_t callout_count;
109 int errorcode;
110 PCRE2_SIZE erroroffset;
111 pcre2_code *code;
112
113 #ifdef STANDALONE
114 printf("Compile options %.8x never_backslash_c", compile_options);
115 printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
116 ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
117 ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
118 ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
119 ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
120 ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
121 ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
122 ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
123 ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
124 ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
125 ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
126 ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
127 ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
128 ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
129 ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
130 ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
131 ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
132 ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
133 ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
134 ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
135 ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
136 ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
137 ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
138 ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
139 ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
140 ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
141 ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
142 #endif
143
144 code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
145 &errorcode, &erroroffset, NULL);
146
147 /* Compilation succeeded */
148
149 if (code != NULL)
150 {
151 int j;
152 uint32_t save_match_options = match_options;
153
154 /* Create match data and context blocks only when we first need them. Set
155 low match and depth limits to avoid wasting too much searching large
156 pattern trees. Almost all matches are going to fail. */
157
158 if (match_data == NULL)
159 {
160 match_data = pcre2_match_data_create(32, NULL);
161 if (match_data == NULL)
162 {
163 #ifdef STANDALONE
164 printf("** Failed to create match data block\n");
165 #endif
166 return 0;
167 }
168 }
169
170 if (match_context == NULL)
171 {
172 match_context = pcre2_match_context_create(NULL);
173 if (match_context == NULL)
174 {
175 #ifdef STANDALONE
176 printf("** Failed to create match context block\n");
177 #endif
178 return 0;
179 }
180 (void)pcre2_set_match_limit(match_context, 100);
181 (void)pcre2_set_depth_limit(match_context, 100);
182 (void)pcre2_set_callout(match_context, callout_function, &callout_count);
183 }
184
185 /* Match twice, with and without options. */
186
187 for (j = 0; j < 2; j++)
188 {
189 #ifdef STANDALONE
190 printf("Match options %.8x", match_options);
191 printf("%s%s%s%s%s%s%s%s%s%s\n",
192 ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
193 ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
194 ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
195 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
196 ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
197 ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
198 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
199 ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
200 ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
201 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
202 #endif
203
204 callout_count = 0;
205 errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
206 match_options, match_data, match_context);
207
208 #ifdef STANDALONE
209 if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
210 {
211 unsigned char buffer[256];
212 pcre2_get_error_message(errorcode, buffer, 256);
213 printf("Match failed: error %d: %s\n", errorcode, buffer);
214 }
215 #endif
216
217 match_options = 0; /* For second time */
218 }
219
220 /* Match with DFA twice, with and without options. */
221
222 match_options = save_match_options & ~PCRE2_NO_JIT; /* Not valid for DFA */
223
224 for (j = 0; j < 2; j++)
225 {
226 #ifdef STANDALONE
227 printf("DFA match options %.8x", match_options);
228 printf("%s%s%s%s%s%s%s%s%s\n",
229 ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
230 ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
231 ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
232 ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
233 ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
234 ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
235 ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
236 ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
237 ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
238 #endif
239
240 callout_count = 0;
241 errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
242 (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
243 dfa_workspace, DFA_WORKSPACE_COUNT);
244
245 #ifdef STANDALONE
246 if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
247 {
248 unsigned char buffer[256];
249 pcre2_get_error_message(errorcode, buffer, 256);
250 printf("Match failed: error %d: %s\n", errorcode, buffer);
251 }
252 #endif
253
254 match_options = 0; /* For second time */
255 }
256
257 match_options = save_match_options; /* Reset for the second compile */
258 pcre2_code_free(code);
259 }
260
261 /* Compilation failed */
262
263 else
264 {
265 unsigned char buffer[256];
266 pcre2_get_error_message(errorcode, buffer, 256);
267 #ifdef STANDALONE
268 printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
269 #else
270 if (strstr((const char *)buffer, "internal error") != NULL) abort();
271 #endif
272 }
273
274 compile_options = PCRE2_NEVER_BACKSLASH_C; /* For second time */
275 }
276
277 if (match_data != NULL) pcre2_match_data_free(match_data);
278 if (match_context != NULL) pcre2_match_context_free(match_context);
279
280 return 0;
281 }
282
283
284 /* Optional main program. */
285
286 #ifdef STANDALONE
main(int argc,char ** argv)287 int main(int argc, char **argv)
288 {
289 int i;
290
291 if (argc < 2)
292 {
293 printf("** No arguments given\n");
294 return 0;
295 }
296
297 for (i = 1; i < argc; i++)
298 {
299 size_t filelen;
300 size_t readsize;
301 unsigned char *buffer;
302 FILE *f;
303
304 /* Handle a literal string. Copy to an exact size buffer so that checks for
305 overrunning work. */
306
307 if (argv[i][0] == '=')
308 {
309 readsize = strlen(argv[i]) - 1;
310 printf("------ <Literal> ------\n");
311 printf("Length = %lu\n", readsize);
312 printf("%.*s\n", (int)readsize, argv[i]+1);
313 buffer = (unsigned char *)malloc(readsize);
314 if (buffer == NULL)
315 printf("** Failed to allocate %lu bytes of memory\n", readsize);
316 else
317 {
318 memcpy(buffer, argv[i]+1, readsize);
319 LLVMFuzzerTestOneInput(buffer, readsize);
320 free(buffer);
321 }
322 continue;
323 }
324
325 /* Handle a string given in a file */
326
327 f = fopen(argv[i], "rb");
328 if (f == NULL)
329 {
330 printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
331 continue;
332 }
333
334 printf("------ %s ------\n", argv[i]);
335
336 fseek(f, 0, SEEK_END);
337 filelen = ftell(f);
338 fseek(f, 0, SEEK_SET);
339
340 buffer = (unsigned char *)malloc(filelen);
341 if (buffer == NULL)
342 {
343 printf("** Failed to allocate %lu bytes of memory\n", filelen);
344 fclose(f);
345 continue;
346 }
347
348 readsize = fread(buffer, 1, filelen, f);
349 fclose(f);
350
351 if (readsize != filelen)
352 printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
353 else
354 {
355 printf("Length = %lu\n", filelen);
356 LLVMFuzzerTestOneInput(buffer, filelen);
357 }
358 free(buffer);
359 }
360
361 return 0;
362 }
363 #endif /* STANDALONE */
364
365 /* End */
366