• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /***************************************************************************
2 Fuzzer driver for PCRE2. Given an arbitrary string of bytes and a length, it
3 tries to compile and match it, deriving options from the string itself. If
4 STANDALONE is defined, a main program that calls the driver with the contents
5 of specified files is compiled, and commentary on what is happening is output.
6 If an argument starts with '=' the rest of it it is taken as a literal string
7 rather than a file name. This allows easy testing of short strings.
8 
9 Written by Philip Hazel, October 2016
10 ***************************************************************************/
11 
12 #include <errno.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #define PCRE2_CODE_UNIT_WIDTH 8
18 #include "pcre2.h"
19 
20 #define MAX_MATCH_SIZE 1000
21 
22 #define DFA_WORKSPACE_COUNT 100
23 
24 #define ALLOWED_COMPILE_OPTIONS \
25   (PCRE2_ANCHORED|PCRE2_ALLOW_EMPTY_CLASS|PCRE2_ALT_BSUX|PCRE2_ALT_CIRCUMFLEX| \
26    PCRE2_ALT_VERBNAMES|PCRE2_AUTO_CALLOUT|PCRE2_CASELESS|PCRE2_DOLLAR_ENDONLY| \
27    PCRE2_DOTALL|PCRE2_DUPNAMES|PCRE2_ENDANCHORED|PCRE2_EXTENDED|PCRE2_FIRSTLINE| \
28    PCRE2_MATCH_UNSET_BACKREF|PCRE2_MULTILINE|PCRE2_NEVER_BACKSLASH_C| \
29    PCRE2_NO_AUTO_CAPTURE| \
30    PCRE2_NO_AUTO_POSSESS|PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_NO_START_OPTIMIZE| \
31    PCRE2_UCP|PCRE2_UNGREEDY|PCRE2_USE_OFFSET_LIMIT| \
32    PCRE2_UTF)
33 
34 #define ALLOWED_MATCH_OPTIONS \
35   (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
36    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
37    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
38 
39 /* This is the callout function. Its only purpose is to halt matching if there
40 are more than 100 callouts, as one way of stopping too much time being spent on
41 fruitless matches. The callout data is a pointer to the counter. */
42 
callout_function(pcre2_callout_block * cb,void * callout_data)43 static int callout_function(pcre2_callout_block *cb, void *callout_data)
44 {
45 (void)cb;  /* Avoid unused parameter warning */
46 *((uint32_t *)callout_data) += 1;
47 return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
48 }
49 
50 /* Putting in this apparently unnecessary prototype prevents gcc from giving a
51 "no previous prototype" warning when compiling at high warning level. */
52 
53 int LLVMFuzzerTestOneInput(const unsigned char *, size_t);
54 
55 /* Here's the driving function. */
56 
LLVMFuzzerTestOneInput(const unsigned char * data,size_t size)57 int LLVMFuzzerTestOneInput(const unsigned char *data, size_t size)
58 {
59 uint32_t compile_options;
60 uint32_t match_options;
61 pcre2_match_data *match_data = NULL;
62 pcre2_match_context *match_context = NULL;
63 size_t match_size;
64 int dfa_workspace[DFA_WORKSPACE_COUNT];
65 int r1, r2;
66 int i;
67 
68 if (size < 1) return 0;
69 
70 /* Limiting the length of the subject for matching stops fruitless searches
71 in large trees taking too much time. */
72 
73 match_size = (size > MAX_MATCH_SIZE)? MAX_MATCH_SIZE : size;
74 
75 /* Figure out some options to use. Initialize the random number to ensure
76 repeatability. Ensure that we get a 32-bit unsigned random number for testing
77 options. (RAND_MAX is required to be at least 32767, but is commonly
78 2147483647, which excludes the top bit.) */
79 
80 srand((unsigned int)(data[size/2]));
81 r1 = rand();
82 r2 = rand();
83 
84 /* Ensure that all undefined option bits are zero (waste of time trying them)
85 and also that PCRE2_NO_UTF_CHECK is unset, as there is no guarantee that the
86 input is UTF-8. Also unset PCRE2_NEVER_UTF and PCRE2_NEVER_UCP as there is no
87 reason to disallow UTF and UCP. Force PCRE2_NEVER_BACKSLASH_C to be set because
88 \C in random patterns is highly likely to cause a crash. */
89 
90 compile_options =
91   ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_COMPILE_OPTIONS) |
92   PCRE2_NEVER_BACKSLASH_C;
93 
94 match_options =
95   ((((uint32_t)r1 << 16) | ((uint32_t)r2 & 0xffff)) & ALLOWED_MATCH_OPTIONS);
96 
97 /* Discard partial matching if PCRE2_ENDANCHORED is set, because they are not
98 allowed together and just give an immediate error return. */
99 
100 if (((compile_options|match_options) & PCRE2_ENDANCHORED) != 0)
101   match_options &= ~(PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT);
102 
103 /* Do the compile with and without the options, and after a successful compile,
104 likewise do the match with and without the options. */
105 
106 for (i = 0; i < 2; i++)
107   {
108   uint32_t callout_count;
109   int errorcode;
110   PCRE2_SIZE erroroffset;
111   pcre2_code *code;
112 
113 #ifdef STANDALONE
114   printf("Compile options %.8x never_backslash_c", compile_options);
115   printf("%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
116     ((compile_options & PCRE2_ALT_BSUX) != 0)? ",alt_bsux" : "",
117     ((compile_options & PCRE2_ALT_CIRCUMFLEX) != 0)? ",alt_circumflex" : "",
118     ((compile_options & PCRE2_ALT_VERBNAMES) != 0)? ",alt_verbnames" : "",
119     ((compile_options & PCRE2_ALLOW_EMPTY_CLASS) != 0)? ",allow_empty_class" : "",
120     ((compile_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
121     ((compile_options & PCRE2_AUTO_CALLOUT) != 0)? ",auto_callout" : "",
122     ((compile_options & PCRE2_CASELESS) != 0)? ",caseless" : "",
123     ((compile_options & PCRE2_DOLLAR_ENDONLY) != 0)? ",dollar_endonly" : "",
124     ((compile_options & PCRE2_DOTALL) != 0)? ",dotall" : "",
125     ((compile_options & PCRE2_DUPNAMES) != 0)? ",dupnames" : "",
126     ((compile_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
127     ((compile_options & PCRE2_EXTENDED) != 0)? ",extended" : "",
128     ((compile_options & PCRE2_FIRSTLINE) != 0)? ",firstline" : "",
129     ((compile_options & PCRE2_MATCH_UNSET_BACKREF) != 0)? ",match_unset_backref" : "",
130     ((compile_options & PCRE2_MULTILINE) != 0)? ",multiline" : "",
131     ((compile_options & PCRE2_NEVER_UCP) != 0)? ",never_ucp" : "",
132     ((compile_options & PCRE2_NEVER_UTF) != 0)? ",never_utf" : "",
133     ((compile_options & PCRE2_NO_AUTO_CAPTURE) != 0)? ",no_auto_capture" : "",
134     ((compile_options & PCRE2_NO_AUTO_POSSESS) != 0)? ",no_auto_possess" : "",
135     ((compile_options & PCRE2_NO_DOTSTAR_ANCHOR) != 0)? ",no_dotstar_anchor" : "",
136     ((compile_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
137     ((compile_options & PCRE2_NO_START_OPTIMIZE) != 0)? ",no_start_optimize" : "",
138     ((compile_options & PCRE2_UCP) != 0)? ",ucp" : "",
139     ((compile_options & PCRE2_UNGREEDY) != 0)? ",ungreedy" : "",
140     ((compile_options & PCRE2_USE_OFFSET_LIMIT) != 0)? ",use_offset_limit" : "",
141     ((compile_options & PCRE2_UTF) != 0)? ",utf" : "");
142 #endif
143 
144   code = pcre2_compile((PCRE2_SPTR)data, (PCRE2_SIZE)size, compile_options,
145     &errorcode, &erroroffset, NULL);
146 
147   /* Compilation succeeded */
148 
149   if (code != NULL)
150     {
151     int j;
152     uint32_t save_match_options = match_options;
153 
154 #ifdef SUPPORT_JIT
155     pcre2_jit_compile(code, PCRE2_JIT_COMPLETE);
156 #endif
157 
158     /* Create match data and context blocks only when we first need them. Set
159     low match and depth limits to avoid wasting too much searching large
160     pattern trees. Almost all matches are going to fail. */
161 
162     if (match_data == NULL)
163       {
164       match_data = pcre2_match_data_create(32, NULL);
165       if (match_data == NULL)
166         {
167 #ifdef STANDALONE
168         printf("** Failed to create match data block\n");
169 #endif
170         return 0;
171         }
172       }
173 
174     if (match_context == NULL)
175       {
176       match_context = pcre2_match_context_create(NULL);
177       if (match_context == NULL)
178         {
179 #ifdef STANDALONE
180         printf("** Failed to create match context block\n");
181 #endif
182         return 0;
183         }
184       (void)pcre2_set_match_limit(match_context, 100);
185       (void)pcre2_set_depth_limit(match_context, 100);
186       (void)pcre2_set_callout(match_context, callout_function, &callout_count);
187       }
188 
189     /* Match twice, with and without options. */
190 
191     for (j = 0; j < 2; j++)
192       {
193 #ifdef STANDALONE
194       printf("Match options %.8x", match_options);
195       printf("%s%s%s%s%s%s%s%s%s%s\n",
196         ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
197         ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
198         ((match_options & PCRE2_NO_JIT) != 0)? ",no_jit" : "",
199         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
200         ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
201         ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
202         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
203         ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
204         ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
205         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
206 #endif
207 
208       callout_count = 0;
209       errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)match_size, 0,
210         match_options, match_data, match_context);
211 
212 #ifdef STANDALONE
213       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
214         {
215         unsigned char buffer[256];
216         pcre2_get_error_message(errorcode, buffer, 256);
217         printf("Match failed: error %d: %s\n", errorcode, buffer);
218         }
219 #endif
220 
221       match_options = 0;  /* For second time */
222       }
223 
224     /* Match with DFA twice, with and without options. */
225 
226     match_options = save_match_options & ~PCRE2_NO_JIT;  /* Not valid for DFA */
227 
228     for (j = 0; j < 2; j++)
229       {
230 #ifdef STANDALONE
231       printf("DFA match options %.8x", match_options);
232       printf("%s%s%s%s%s%s%s%s%s\n",
233         ((match_options & PCRE2_ANCHORED) != 0)? ",anchored" : "",
234         ((match_options & PCRE2_ENDANCHORED) != 0)? ",endanchored" : "",
235         ((match_options & PCRE2_NO_UTF_CHECK) != 0)? ",no_utf_check" : "",
236         ((match_options & PCRE2_NOTBOL) != 0)? ",notbol" : "",
237         ((match_options & PCRE2_NOTEMPTY) != 0)? ",notempty" : "",
238         ((match_options & PCRE2_NOTEMPTY_ATSTART) != 0)? ",notempty_atstart" : "",
239         ((match_options & PCRE2_NOTEOL) != 0)? ",noteol" : "",
240         ((match_options & PCRE2_PARTIAL_HARD) != 0)? ",partial_hard" : "",
241         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
242 #endif
243 
244       callout_count = 0;
245       errorcode = pcre2_dfa_match(code, (PCRE2_SPTR)data,
246         (PCRE2_SIZE)match_size, 0, match_options, match_data, match_context,
247         dfa_workspace, DFA_WORKSPACE_COUNT);
248 
249 #ifdef STANDALONE
250       if (errorcode >= 0) printf("Match returned %d\n", errorcode); else
251         {
252         unsigned char buffer[256];
253         pcre2_get_error_message(errorcode, buffer, 256);
254         printf("Match failed: error %d: %s\n", errorcode, buffer);
255         }
256 #endif
257 
258       match_options = 0;  /* For second time */
259       }
260 
261     match_options = save_match_options;  /* Reset for the second compile */
262     pcre2_code_free(code);
263     }
264 
265   /* Compilation failed */
266 
267   else
268     {
269     unsigned char buffer[256];
270     pcre2_get_error_message(errorcode, buffer, 256);
271 #ifdef STANDALONE
272     printf("Error %d at offset %lu: %s\n", errorcode, erroroffset, buffer);
273 #else
274     if (strstr((const char *)buffer, "internal error") != NULL) abort();
275 #endif
276     }
277 
278   compile_options = PCRE2_NEVER_BACKSLASH_C;  /* For second time */
279   }
280 
281 if (match_data != NULL) pcre2_match_data_free(match_data);
282 if (match_context != NULL) pcre2_match_context_free(match_context);
283 
284 return 0;
285 }
286 
287 
288 /* Optional main program.  */
289 
290 #ifdef STANDALONE
main(int argc,char ** argv)291 int main(int argc, char **argv)
292 {
293 int i;
294 
295 if (argc < 2)
296   {
297   printf("** No arguments given\n");
298   return 0;
299   }
300 
301 for (i = 1; i < argc; i++)
302   {
303   size_t filelen;
304   size_t readsize;
305   unsigned char *buffer;
306   FILE *f;
307 
308   /* Handle a literal string. Copy to an exact size buffer so that checks for
309   overrunning work. */
310 
311   if (argv[i][0] == '=')
312     {
313     readsize = strlen(argv[i]) - 1;
314     printf("------ <Literal> ------\n");
315     printf("Length = %lu\n", readsize);
316     printf("%.*s\n", (int)readsize, argv[i]+1);
317     buffer = (unsigned char *)malloc(readsize);
318     if (buffer == NULL)
319       printf("** Failed to allocate %lu bytes of memory\n", readsize);
320     else
321       {
322       memcpy(buffer, argv[i]+1, readsize);
323       LLVMFuzzerTestOneInput(buffer, readsize);
324       free(buffer);
325       }
326     continue;
327     }
328 
329   /* Handle a string given in a file */
330 
331   f = fopen(argv[i], "rb");
332   if (f == NULL)
333     {
334     printf("** Failed to open %s: %s\n", argv[i], strerror(errno));
335     continue;
336     }
337 
338   printf("------ %s ------\n", argv[i]);
339 
340   fseek(f, 0, SEEK_END);
341   filelen = ftell(f);
342   fseek(f, 0, SEEK_SET);
343 
344   buffer = (unsigned char *)malloc(filelen);
345   if (buffer == NULL)
346     {
347     printf("** Failed to allocate %lu bytes of memory\n", filelen);
348     fclose(f);
349     continue;
350     }
351 
352   readsize = fread(buffer, 1, filelen, f);
353   fclose(f);
354 
355   if (readsize != filelen)
356     printf("** File size is %lu but fread() returned %lu\n", filelen, readsize);
357   else
358     {
359     printf("Length = %lu\n", filelen);
360     LLVMFuzzerTestOneInput(buffer, filelen);
361     }
362   free(buffer);
363   }
364 
365 return 0;
366 }
367 #endif  /* STANDALONE */
368 
369 /* End */
370