1 /*
2 american fuzzy lop++ - extras relates routines
3 ----------------------------------------------
4
5 Originally written by Michal Zalewski
6
7 Now maintained by Marc Heuse <mh@mh-sec.de>,
8 Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
9 Andrea Fioraldi <andreafioraldi@gmail.com>
10
11 Copyright 2016, 2017 Google Inc. All rights reserved.
12 Copyright 2019-2022 AFLplusplus Project. All rights reserved.
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at:
17
18 https://www.apache.org/licenses/LICENSE-2.0
19
20 This is the real deal: the program takes an instrumented binary and
21 attempts a variety of basic fuzzing tricks, paying close attention to
22 how they affect the execution path.
23
24 */
25
26 #include "afl-fuzz.h"
27
28 /* helper function for auto_extras qsort */
compare_auto_extras_len(const void * ae1,const void * ae2)29 static int compare_auto_extras_len(const void *ae1, const void *ae2) {
30
31 return ((struct auto_extra_data *)ae1)->len -
32 ((struct auto_extra_data *)ae2)->len;
33
34 }
35
36 /* descending order */
37
compare_auto_extras_use_d(const void * ae1,const void * ae2)38 static int compare_auto_extras_use_d(const void *ae1, const void *ae2) {
39
40 return ((struct auto_extra_data *)ae2)->hit_cnt -
41 ((struct auto_extra_data *)ae1)->hit_cnt;
42
43 }
44
45 /* Helper function for load_extras. */
46
compare_extras_len(const void * e1,const void * e2)47 static int compare_extras_len(const void *e1, const void *e2) {
48
49 return ((struct extra_data *)e1)->len - ((struct extra_data *)e2)->len;
50
51 }
52
53 /* Read extras from a file, sort by size. */
54
load_extras_file(afl_state_t * afl,u8 * fname,u32 * min_len,u32 * max_len,u32 dict_level)55 void load_extras_file(afl_state_t *afl, u8 *fname, u32 *min_len, u32 *max_len,
56 u32 dict_level) {
57
58 FILE *f;
59 u8 buf[MAX_LINE];
60 u8 * lptr;
61 u32 cur_line = 0;
62
63 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
64
65 f = fopen(fname, "r");
66
67 if (!f) { PFATAL("Unable to open '%s'", fname); }
68
69 while ((lptr = fgets(buf, MAX_LINE, f))) {
70
71 u8 *rptr, *wptr;
72 u32 klen = 0;
73
74 ++cur_line;
75
76 /* Trim on left and right. */
77
78 while (isspace(*lptr)) {
79
80 ++lptr;
81
82 }
83
84 rptr = lptr + strlen(lptr) - 1;
85 while (rptr >= lptr && isspace(*rptr)) {
86
87 --rptr;
88
89 }
90
91 ++rptr;
92 *rptr = 0;
93
94 /* Skip empty lines and comments. */
95
96 if (!*lptr || *lptr == '#') { continue; }
97
98 /* All other lines must end with '"', which we can consume. */
99
100 --rptr;
101
102 if (rptr < lptr || *rptr != '"') {
103
104 WARNF("Malformed name=\"value\" pair in line %u.", cur_line);
105 continue;
106
107 }
108
109 *rptr = 0;
110
111 /* Skip alphanumerics and dashes (label). */
112
113 while (isalnum(*lptr) || *lptr == '_') {
114
115 ++lptr;
116
117 }
118
119 /* If @number follows, parse that. */
120
121 if (*lptr == '@') {
122
123 ++lptr;
124 if (atoi(lptr) > (s32)dict_level) { continue; }
125 while (isdigit(*lptr)) {
126
127 ++lptr;
128
129 }
130
131 }
132
133 /* Skip [number] */
134
135 if (*lptr == '[') {
136
137 do {
138
139 ++lptr;
140
141 } while (*lptr >= '0' && *lptr <= '9');
142
143 if (*lptr == ']') { ++lptr; }
144
145 }
146
147 /* Skip whitespace and = signs. */
148
149 while (isspace(*lptr) || *lptr == '=') {
150
151 ++lptr;
152
153 }
154
155 /* Consume opening '"'. */
156
157 if (*lptr != '"') {
158
159 WARNF("Malformed name=\"keyword\" pair in line %u.", cur_line);
160 continue;
161
162 }
163
164 ++lptr;
165
166 if (!*lptr) {
167
168 WARNF("Empty keyword in line %u.", cur_line);
169 continue;
170
171 }
172
173 /* Okay, let's allocate memory and copy data between "...", handling
174 \xNN escaping, \\, and \". */
175
176 afl->extras =
177 afl_realloc((void **)&afl->extras,
178 (afl->extras_cnt + 1) * sizeof(struct extra_data));
179 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
180
181 wptr = afl->extras[afl->extras_cnt].data = ck_alloc(rptr - lptr);
182
183 if (!wptr) { PFATAL("no mem for data"); }
184
185 while (*lptr) {
186
187 char *hexdigits = "0123456789abcdef";
188
189 switch (*lptr) {
190
191 case 1 ... 31:
192 case 128 ... 255:
193 WARNF("Non-printable characters in line %u.", cur_line);
194 continue;
195 break;
196
197 case '\\':
198
199 ++lptr;
200
201 if (*lptr == '\\' || *lptr == '"') {
202
203 *(wptr++) = *(lptr++);
204 klen++;
205 break;
206
207 }
208
209 if (*lptr != 'x' || !isxdigit(lptr[1]) || !isxdigit(lptr[2])) {
210
211 WARNF("Invalid escaping (not \\xNN) in line %u.", cur_line);
212 continue;
213
214 }
215
216 *(wptr++) = ((strchr(hexdigits, tolower(lptr[1])) - hexdigits) << 4) |
217 (strchr(hexdigits, tolower(lptr[2])) - hexdigits);
218
219 lptr += 3;
220 ++klen;
221
222 break;
223
224 default:
225 *(wptr++) = *(lptr++);
226 ++klen;
227
228 }
229
230 }
231
232 afl->extras[afl->extras_cnt].len = klen;
233
234 if (afl->extras[afl->extras_cnt].len > MAX_DICT_FILE) {
235
236 WARNF(
237 "Keyword too big in line %u (%s, limit is %s)", cur_line,
238 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), klen),
239 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
240 continue;
241
242 }
243
244 if (*min_len > klen) { *min_len = klen; }
245 if (*max_len < klen) { *max_len = klen; }
246
247 ++afl->extras_cnt;
248
249 }
250
251 fclose(f);
252
253 }
254
extras_check_and_sort(afl_state_t * afl,u32 min_len,u32 max_len,u8 * dir)255 static void extras_check_and_sort(afl_state_t *afl, u32 min_len, u32 max_len,
256 u8 *dir) {
257
258 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
259
260 if (!afl->extras_cnt) {
261
262 WARNF("No usable data in '%s'", dir);
263 return;
264
265 }
266
267 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
268 compare_extras_len);
269
270 ACTF("Loaded %u extra tokens, size range %s to %s.", afl->extras_cnt,
271 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), min_len),
272 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), max_len));
273
274 if (max_len > 32) {
275
276 WARNF("Some tokens are relatively large (%s) - consider trimming.",
277 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), max_len));
278
279 }
280
281 if (afl->extras_cnt > afl->max_det_extras) {
282
283 WARNF("More than %u tokens - will use them probabilistically.",
284 afl->max_det_extras);
285
286 }
287
288 }
289
290 /* Read extras from the extras directory and sort them by size. */
291
load_extras(afl_state_t * afl,u8 * dir)292 void load_extras(afl_state_t *afl, u8 *dir) {
293
294 DIR * d;
295 struct dirent *de;
296 u32 min_len = MAX_DICT_FILE, max_len = 0, dict_level = 0;
297 u8 * x;
298
299 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
300
301 /* If the name ends with @, extract level and continue. */
302
303 if ((x = strchr(dir, '@'))) {
304
305 *x = 0;
306 dict_level = atoi(x + 1);
307
308 }
309
310 ACTF("Loading extra dictionary from '%s' (level %u)...", dir, dict_level);
311
312 d = opendir(dir);
313
314 if (!d) {
315
316 if (errno == ENOTDIR) {
317
318 load_extras_file(afl, dir, &min_len, &max_len, dict_level);
319 extras_check_and_sort(afl, min_len, max_len, dir);
320 return;
321
322 }
323
324 PFATAL("Unable to open '%s'", dir);
325
326 }
327
328 if (x) { FATAL("Dictionary levels not supported for directories."); }
329
330 while ((de = readdir(d))) {
331
332 struct stat st;
333 u8 * fn = alloc_printf("%s/%s", dir, de->d_name);
334 s32 fd;
335
336 if (lstat(fn, &st) || access(fn, R_OK)) {
337
338 PFATAL("Unable to access '%s'", fn);
339
340 }
341
342 /* This also takes care of . and .. */
343 if (!S_ISREG(st.st_mode) || !st.st_size) {
344
345 ck_free(fn);
346 continue;
347
348 }
349
350 if (st.st_size > MAX_DICT_FILE) {
351
352 WARNF(
353 "Extra '%s' is too big (%s, limit is %s)", fn,
354 stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), st.st_size),
355 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
356 continue;
357
358 }
359
360 if (min_len > st.st_size) { min_len = st.st_size; }
361 if (max_len < st.st_size) { max_len = st.st_size; }
362
363 afl->extras =
364 afl_realloc((void **)&afl->extras,
365 (afl->extras_cnt + 1) * sizeof(struct extra_data));
366 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
367
368 afl->extras[afl->extras_cnt].data = ck_alloc(st.st_size);
369 afl->extras[afl->extras_cnt].len = st.st_size;
370
371 fd = open(fn, O_RDONLY);
372
373 if (fd < 0) { PFATAL("Unable to open '%s'", fn); }
374
375 ck_read(fd, afl->extras[afl->extras_cnt].data, st.st_size, fn);
376
377 close(fd);
378 ck_free(fn);
379
380 ++afl->extras_cnt;
381
382 }
383
384 closedir(d);
385
386 extras_check_and_sort(afl, min_len, max_len, dir);
387
388 }
389
390 /* Helper function for maybe_add_auto(afl, ) */
391
memcmp_nocase(u8 * m1,u8 * m2,u32 len)392 static inline u8 memcmp_nocase(u8 *m1, u8 *m2, u32 len) {
393
394 while (len--) {
395
396 if (tolower(*(m1++)) ^ tolower(*(m2++))) { return 1; }
397
398 }
399
400 return 0;
401
402 }
403
404 /* add an extra/dict/token - no checks performed, no sorting */
405
add_extra_nocheck(afl_state_t * afl,u8 * mem,u32 len)406 static void add_extra_nocheck(afl_state_t *afl, u8 *mem, u32 len) {
407
408 afl->extras = afl_realloc((void **)&afl->extras,
409 (afl->extras_cnt + 1) * sizeof(struct extra_data));
410
411 if (unlikely(!afl->extras)) { PFATAL("alloc"); }
412
413 afl->extras[afl->extras_cnt].data = ck_alloc(len);
414 afl->extras[afl->extras_cnt].len = len;
415 memcpy(afl->extras[afl->extras_cnt].data, mem, len);
416 afl->extras_cnt++;
417
418 /* We only want to print this once */
419
420 if (afl->extras_cnt == afl->max_det_extras + 1) {
421
422 WARNF("More than %u tokens - will use them probabilistically.",
423 afl->max_det_extras);
424
425 }
426
427 }
428
429 /* Sometimes strings in input is transformed to unicode internally, so for
430 fuzzing we should attempt to de-unicode if it looks like simple unicode */
431
deunicode_extras(afl_state_t * afl)432 void deunicode_extras(afl_state_t *afl) {
433
434 if (!afl->extras_cnt) return;
435
436 u32 i, j, orig_cnt = afl->extras_cnt;
437 u8 buf[64];
438
439 for (i = 0; i < orig_cnt; ++i) {
440
441 if (afl->extras[i].len < 6 || afl->extras[i].len > 64 ||
442 afl->extras[i].len % 2) {
443
444 continue;
445
446 }
447
448 u32 k = 0, z1 = 0, z2 = 0, z3 = 0, z4 = 0, half = afl->extras[i].len >> 1;
449 u32 quarter = half >> 1;
450
451 for (j = 0; j < afl->extras[i].len; ++j) {
452
453 switch (j % 4) {
454
455 case 2:
456 if (!afl->extras[i].data[j]) { ++z3; }
457 // fall through
458 case 0:
459 if (!afl->extras[i].data[j]) { ++z1; }
460 break;
461 case 3:
462 if (!afl->extras[i].data[j]) { ++z4; }
463 // fall through
464 case 1:
465 if (!afl->extras[i].data[j]) { ++z2; }
466 break;
467
468 }
469
470 }
471
472 if ((z1 < half && z2 < half) || z1 + z2 == afl->extras[i].len) { continue; }
473
474 // also maybe 32 bit unicode?
475 if (afl->extras[i].len % 4 == 0 && afl->extras[i].len >= 12 &&
476 (z3 == quarter || z4 == quarter) && z1 + z2 == quarter * 3) {
477
478 for (j = 0; j < afl->extras[i].len; ++j) {
479
480 if (z4 < quarter) {
481
482 if (j % 4 == 3) { buf[k++] = afl->extras[i].data[j]; }
483
484 } else if (z3 < quarter) {
485
486 if (j % 4 == 2) { buf[k++] = afl->extras[i].data[j]; }
487
488 } else if (z2 < half) {
489
490 if (j % 4 == 1) { buf[k++] = afl->extras[i].data[j]; }
491
492 } else {
493
494 if (j % 4 == 0) { buf[k++] = afl->extras[i].data[j]; }
495
496 }
497
498 }
499
500 add_extra_nocheck(afl, buf, k);
501 k = 0;
502
503 }
504
505 for (j = 0; j < afl->extras[i].len; ++j) {
506
507 if (z1 < half) {
508
509 if (j % 2 == 0) { buf[k++] = afl->extras[i].data[j]; }
510
511 } else {
512
513 if (j % 2 == 1) { buf[k++] = afl->extras[i].data[j]; }
514
515 }
516
517 }
518
519 add_extra_nocheck(afl, buf, k);
520
521 }
522
523 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
524 compare_extras_len);
525
526 }
527
528 /* Removes duplicates from the loaded extras. This can happen if multiple files
529 are loaded */
530
dedup_extras(afl_state_t * afl)531 void dedup_extras(afl_state_t *afl) {
532
533 if (afl->extras_cnt < 2) return;
534
535 u32 i, j, orig_cnt = afl->extras_cnt;
536
537 for (i = 0; i < afl->extras_cnt - 1; ++i) {
538
539 for (j = i + 1; j < afl->extras_cnt; ++j) {
540
541 restart_dedup:
542
543 // if the goto was used we could be at the end of the list
544 if (j >= afl->extras_cnt || afl->extras[i].len != afl->extras[j].len)
545 break;
546
547 if (memcmp(afl->extras[i].data, afl->extras[j].data,
548 afl->extras[i].len) == 0) {
549
550 ck_free(afl->extras[j].data);
551 if (j + 1 < afl->extras_cnt) // not at the end of the list?
552 memmove((char *)&afl->extras[j], (char *)&afl->extras[j + 1],
553 (afl->extras_cnt - j - 1) * sizeof(struct extra_data));
554 --afl->extras_cnt;
555 goto restart_dedup; // restart if several duplicates are in a row
556
557 }
558
559 }
560
561 }
562
563 if (afl->extras_cnt != orig_cnt)
564 afl->extras = afl_realloc_exact(
565 (void **)&afl->extras, afl->extras_cnt * sizeof(struct extra_data));
566
567 }
568
569 /* Adds a new extra / dict entry. */
add_extra(afl_state_t * afl,u8 * mem,u32 len)570 void add_extra(afl_state_t *afl, u8 *mem, u32 len) {
571
572 u32 i, found = 0;
573
574 for (i = 0; i < afl->extras_cnt; i++) {
575
576 if (afl->extras[i].len == len) {
577
578 if (memcmp(afl->extras[i].data, mem, len) == 0) return;
579 found = 1;
580
581 } else {
582
583 if (found) break;
584
585 }
586
587 }
588
589 if (len > MAX_DICT_FILE) {
590
591 u8 val_bufs[2][STRINGIFY_VAL_SIZE_MAX];
592 WARNF("Extra '%.*s' is too big (%s, limit is %s), skipping file!", (int)len,
593 mem, stringify_mem_size(val_bufs[0], sizeof(val_bufs[0]), len),
594 stringify_mem_size(val_bufs[1], sizeof(val_bufs[1]), MAX_DICT_FILE));
595 return;
596
597 } else if (len > 32) {
598
599 WARNF("Extra '%.*s' is pretty large, consider trimming.", (int)len, mem);
600
601 }
602
603 add_extra_nocheck(afl, mem, len);
604
605 qsort(afl->extras, afl->extras_cnt, sizeof(struct extra_data),
606 compare_extras_len);
607
608 }
609
610 /* Maybe add automatic extra. */
611
maybe_add_auto(afl_state_t * afl,u8 * mem,u32 len)612 void maybe_add_auto(afl_state_t *afl, u8 *mem, u32 len) {
613
614 u32 i;
615
616 /* Allow users to specify that they don't want auto dictionaries. */
617
618 if (!MAX_AUTO_EXTRAS || !USE_AUTO_EXTRAS) { return; }
619
620 /* Skip runs of identical bytes. */
621
622 for (i = 1; i < len; ++i) {
623
624 if (mem[0] ^ mem[i]) { break; }
625
626 }
627
628 if (i == len || unlikely(len > MAX_AUTO_EXTRA)) { return; }
629
630 /* Reject builtin interesting values. */
631
632 if (len == 2) {
633
634 i = sizeof(interesting_16) >> 1;
635
636 while (i--) {
637
638 if (*((u16 *)mem) == interesting_16[i] ||
639 *((u16 *)mem) == SWAP16(interesting_16[i])) {
640
641 return;
642
643 }
644
645 }
646
647 }
648
649 if (len == 4) {
650
651 i = sizeof(interesting_32) >> 2;
652
653 while (i--) {
654
655 if (*((u32 *)mem) == (u32)interesting_32[i] ||
656 *((u32 *)mem) == SWAP32(interesting_32[i])) {
657
658 return;
659
660 }
661
662 }
663
664 }
665
666 /* Reject anything that matches existing extras. Do a case-insensitive
667 match. We optimize by exploiting the fact that extras[] are sorted
668 by size. */
669
670 for (i = 0; i < afl->extras_cnt; ++i) {
671
672 if (afl->extras[i].len >= len) { break; }
673
674 }
675
676 for (; i < afl->extras_cnt && afl->extras[i].len == len; ++i) {
677
678 if (!memcmp_nocase(afl->extras[i].data, mem, len)) { return; }
679
680 }
681
682 /* Last but not least, check afl->a_extras[] for matches. There are no
683 guarantees of a particular sort order. */
684
685 afl->auto_changed = 1;
686
687 for (i = 0; i < afl->a_extras_cnt; ++i) {
688
689 if (afl->a_extras[i].len == len &&
690 !memcmp_nocase(afl->a_extras[i].data, mem, len)) {
691
692 afl->a_extras[i].hit_cnt++;
693 goto sort_a_extras;
694
695 }
696
697 }
698
699 /* At this point, looks like we're dealing with a new entry. So, let's
700 append it if we have room. Otherwise, let's randomly evict some other
701 entry from the bottom half of the list. */
702
703 if (afl->a_extras_cnt < MAX_AUTO_EXTRAS) {
704
705 memcpy(afl->a_extras[afl->a_extras_cnt].data, mem, len);
706 afl->a_extras[afl->a_extras_cnt].len = len;
707 ++afl->a_extras_cnt;
708
709 } else {
710
711 i = MAX_AUTO_EXTRAS / 2 + rand_below(afl, (MAX_AUTO_EXTRAS + 1) / 2);
712
713 memcpy(afl->a_extras[i].data, mem, len);
714 afl->a_extras[i].len = len;
715 afl->a_extras[i].hit_cnt = 0;
716
717 }
718
719 sort_a_extras:
720
721 /* First, sort all auto extras by use count, descending order. */
722
723 qsort(afl->a_extras, afl->a_extras_cnt, sizeof(struct auto_extra_data),
724 compare_auto_extras_use_d);
725
726 /* Then, sort the top USE_AUTO_EXTRAS entries by size. */
727
728 qsort(afl->a_extras, MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt),
729 sizeof(struct auto_extra_data), compare_auto_extras_len);
730
731 }
732
733 /* Save automatically generated extras. */
734
save_auto(afl_state_t * afl)735 void save_auto(afl_state_t *afl) {
736
737 u32 i;
738
739 if (!afl->auto_changed) { return; }
740 afl->auto_changed = 0;
741
742 for (i = 0; i < MIN((u32)USE_AUTO_EXTRAS, afl->a_extras_cnt); ++i) {
743
744 u8 *fn =
745 alloc_printf("%s/queue/.state/auto_extras/auto_%06u", afl->out_dir, i);
746 s32 fd;
747
748 fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, DEFAULT_PERMISSION);
749
750 if (fd < 0) { PFATAL("Unable to create '%s'", fn); }
751
752 ck_write(fd, afl->a_extras[i].data, afl->a_extras[i].len, fn);
753
754 close(fd);
755 ck_free(fn);
756
757 }
758
759 }
760
761 /* Load automatically generated extras. */
762
load_auto(afl_state_t * afl)763 void load_auto(afl_state_t *afl) {
764
765 u32 i;
766
767 for (i = 0; i < USE_AUTO_EXTRAS; ++i) {
768
769 u8 tmp[MAX_AUTO_EXTRA + 1];
770 u8 *fn = alloc_printf("%s/.state/auto_extras/auto_%06u", afl->in_dir, i);
771 s32 fd, len;
772
773 fd = open(fn, O_RDONLY);
774
775 if (fd < 0) {
776
777 if (errno != ENOENT) { PFATAL("Unable to open '%s'", fn); }
778 ck_free(fn);
779 break;
780
781 }
782
783 /* We read one byte more to cheaply detect tokens that are too
784 long (and skip them). */
785
786 len = read(fd, tmp, MAX_AUTO_EXTRA + 1);
787
788 if (len < 0) { PFATAL("Unable to read from '%s'", fn); }
789
790 if (len >= MIN_AUTO_EXTRA && len <= MAX_AUTO_EXTRA) {
791
792 maybe_add_auto(afl, tmp, len);
793
794 }
795
796 close(fd);
797 ck_free(fn);
798
799 }
800
801 if (i) {
802
803 OKF("Loaded %u auto-discovered dictionary tokens.", i);
804
805 } else {
806
807 ACTF("No auto-generated dictionary tokens to reuse.");
808
809 }
810
811 }
812
813 /* Destroy extras. */
814
destroy_extras(afl_state_t * afl)815 void destroy_extras(afl_state_t *afl) {
816
817 u32 i;
818
819 for (i = 0; i < afl->extras_cnt; ++i) {
820
821 ck_free(afl->extras[i].data);
822
823 }
824
825 afl_free(afl->extras);
826
827 }
828
829