1 /*
2 american fuzzy lop++ - wrapper for GNU as
3 -----------------------------------------
4
5 Originally written by Michal Zalewski
6
7 Now maintained by Marc Heuse <mh@mh-sec.de>,
8 Heiko Eißfeldt <heiko.eissfeldt@hexco.de> and
9 Andrea Fioraldi <andreafioraldi@gmail.com>
10
11 Copyright 2016, 2017 Google Inc. All rights reserved.
12 Copyright 2019-2022 AFLplusplus Project. All rights reserved.
13
14 Licensed under the Apache License, Version 2.0 (the "License");
15 you may not use this file except in compliance with the License.
16 You may obtain a copy of the License at:
17
18 https://www.apache.org/licenses/LICENSE-2.0
19
20 The sole purpose of this wrapper is to preprocess assembly files generated
21 by GCC / clang and inject the instrumentation bits included from afl-as.h. It
22 is automatically invoked by the toolchain when compiling programs using
23 afl-gcc / afl-clang.
24
25 Note that it's an explicit non-goal to instrument hand-written assembly,
26 be it in separate .s files or in __asm__ blocks. The only aspiration this
27 utility has right now is to be able to skip them gracefully and allow the
28 compilation process to continue.
29
30 That said, see utils/clang_asm_normalize/ for a solution that may
31 allow clang users to make things work even with hand-crafted assembly. Just
32 note that there is no equivalent for GCC.
33
34 */
35
36 #define AFL_MAIN
37
38 #include "config.h"
39 #include "types.h"
40 #include "debug.h"
41 #include "alloc-inl.h"
42
43 #include "afl-as.h"
44
45 #include <stdio.h>
46 #include <unistd.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <ctype.h>
52 #include <fcntl.h>
53
54 #include <sys/wait.h>
55 #include <sys/time.h>
56
57 static u8 **as_params; /* Parameters passed to the real 'as' */
58
59 static u8 *input_file; /* Originally specified input file */
60 static u8 *modified_file; /* Instrumented file for the real 'as' */
61
62 static u8 be_quiet, /* Quiet mode (no stderr output) */
63 clang_mode, /* Running in clang mode? */
64 pass_thru, /* Just pass data through? */
65 just_version, /* Just show version? */
66 sanitizer; /* Using ASAN / MSAN */
67
68 static u32 inst_ratio = 100, /* Instrumentation probability (%) */
69 as_par_cnt = 1; /* Number of params to 'as' */
70
71 /* If we don't find --32 or --64 in the command line, default to
72 instrumentation for whichever mode we were compiled with. This is not
73 perfect, but should do the trick for almost all use cases. */
74
75 #ifdef WORD_SIZE_64
76
77 static u8 use_64bit = 1;
78
79 #else
80
81 static u8 use_64bit = 0;
82
83 #ifdef __APPLE__
84 #error "Sorry, 32-bit Apple platforms are not supported."
85 #endif /* __APPLE__ */
86
87 #endif /* ^WORD_SIZE_64 */
88
89 /* Examine and modify parameters to pass to 'as'. Note that the file name
90 is always the last parameter passed by GCC, so we exploit this property
91 to keep the code simple. */
92
edit_params(int argc,char ** argv)93 static void edit_params(int argc, char **argv) {
94
95 u8 *tmp_dir = getenv("TMPDIR"), *afl_as = getenv("AFL_AS");
96 u32 i;
97
98 #ifdef __APPLE__
99
100 u8 use_clang_as = 0;
101
102 /* On MacOS X, the Xcode cctool 'as' driver is a bit stale and does not work
103 with the code generated by newer versions of clang that are hand-built
104 by the user. See the thread here: https://goo.gl/HBWDtn.
105
106 To work around this, when using clang and running without AFL_AS
107 specified, we will actually call 'clang -c' instead of 'as -q' to
108 compile the assembly file.
109
110 The tools aren't cmdline-compatible, but at least for now, we can
111 seemingly get away with this by making only very minor tweaks. Thanks
112 to Nico Weber for the idea. */
113
114 if (clang_mode && !afl_as) {
115
116 use_clang_as = 1;
117
118 afl_as = getenv("AFL_CC");
119 if (!afl_as) afl_as = getenv("AFL_CXX");
120 if (!afl_as) afl_as = "clang";
121
122 }
123
124 #endif /* __APPLE__ */
125
126 /* Although this is not documented, GCC also uses TEMP and TMP when TMPDIR
127 is not set. We need to check these non-standard variables to properly
128 handle the pass_thru logic later on. */
129
130 if (!tmp_dir) { tmp_dir = getenv("TEMP"); }
131 if (!tmp_dir) { tmp_dir = getenv("TMP"); }
132 if (!tmp_dir) { tmp_dir = "/tmp"; }
133
134 as_params = ck_alloc((argc + 32) * sizeof(u8 *));
135 if (unlikely((INT_MAX - 32) < argc || !as_params)) {
136
137 FATAL("Too many parameters passed to as");
138
139 }
140
141 as_params[0] = afl_as ? afl_as : (u8 *)"as";
142
143 as_params[argc] = 0;
144
145 for (i = 1; (s32)i < argc - 1; i++) {
146
147 if (!strcmp(argv[i], "--64")) {
148
149 use_64bit = 1;
150
151 } else if (!strcmp(argv[i], "--32")) {
152
153 use_64bit = 0;
154
155 }
156
157 #ifdef __APPLE__
158
159 /* The Apple case is a bit different... */
160
161 if (!strcmp(argv[i], "-arch") && i + 1 < (u32)argc) {
162
163 if (!strcmp(argv[i + 1], "x86_64"))
164 use_64bit = 1;
165 else if (!strcmp(argv[i + 1], "i386"))
166 FATAL("Sorry, 32-bit Apple platforms are not supported.");
167
168 }
169
170 /* Strip options that set the preference for a particular upstream
171 assembler in Xcode. */
172
173 if (clang_mode && (!strcmp(argv[i], "-q") || !strcmp(argv[i], "-Q")))
174 continue;
175
176 #endif /* __APPLE__ */
177
178 as_params[as_par_cnt++] = argv[i];
179
180 }
181
182 #ifdef __APPLE__
183
184 /* When calling clang as the upstream assembler, append -c -x assembler
185 and hope for the best. */
186
187 if (use_clang_as) {
188
189 as_params[as_par_cnt++] = "-c";
190 as_params[as_par_cnt++] = "-x";
191 as_params[as_par_cnt++] = "assembler";
192
193 }
194
195 #endif /* __APPLE__ */
196
197 input_file = argv[argc - 1];
198
199 if (input_file[0] == '-') {
200
201 if (!strcmp(input_file + 1, "-version")) {
202
203 just_version = 1;
204 modified_file = input_file;
205 goto wrap_things_up;
206
207 }
208
209 if (input_file[1]) {
210
211 FATAL("Incorrect use (not called through afl-gcc?)");
212
213 } else {
214
215 input_file = NULL;
216
217 }
218
219 } else {
220
221 /* Check if this looks like a standard invocation as a part of an attempt
222 to compile a program, rather than using gcc on an ad-hoc .s file in
223 a format we may not understand. This works around an issue compiling
224 NSS. */
225
226 if (strncmp(input_file, tmp_dir, strlen(tmp_dir)) &&
227 strncmp(input_file, "/var/tmp/", 9) &&
228 strncmp(input_file, "/tmp/", 5) &&
229 getenv("AFL_AS_FORCE_INSTRUMENT") == NULL) {
230
231 pass_thru = 1;
232
233 } else if (getenv("AFL_AS_FORCE_INSTRUMENT")) {
234
235 unsetenv("AFL_AS_FORCE_INSTRUMENT");
236
237 }
238
239 }
240
241 modified_file = alloc_printf("%s/.afl-%u-%u-%u.s", tmp_dir, (u32)getpid(),
242 (u32)time(NULL), (u32)random());
243
244 wrap_things_up:
245
246 as_params[as_par_cnt++] = modified_file;
247 as_params[as_par_cnt] = NULL;
248
249 }
250
251 /* Process input file, generate modified_file. Insert instrumentation in all
252 the appropriate places. */
253
add_instrumentation(void)254 static void add_instrumentation(void) {
255
256 static u8 line[MAX_LINE];
257
258 FILE *inf;
259 FILE *outf;
260 s32 outfd;
261 u32 ins_lines = 0;
262
263 u8 instr_ok = 0, skip_csect = 0, skip_next_label = 0, skip_intel = 0,
264 skip_app = 0, instrument_next = 0;
265
266 #ifdef __APPLE__
267
268 u8 *colon_pos;
269
270 #endif /* __APPLE__ */
271
272 if (input_file) {
273
274 inf = fopen(input_file, "r");
275 if (!inf) { PFATAL("Unable to read '%s'", input_file); }
276
277 } else {
278
279 inf = stdin;
280
281 }
282
283 outfd = open(modified_file, O_WRONLY | O_EXCL | O_CREAT, DEFAULT_PERMISSION);
284
285 if (outfd < 0) { PFATAL("Unable to write to '%s'", modified_file); }
286
287 outf = fdopen(outfd, "w");
288
289 if (!outf) { PFATAL("fdopen() failed"); }
290
291 while (fgets(line, MAX_LINE, inf)) {
292
293 /* In some cases, we want to defer writing the instrumentation trampoline
294 until after all the labels, macros, comments, etc. If we're in this
295 mode, and if the line starts with a tab followed by a character, dump
296 the trampoline now. */
297
298 if (!pass_thru && !skip_intel && !skip_app && !skip_csect && instr_ok &&
299 instrument_next && line[0] == '\t' && isalpha(line[1])) {
300
301 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
302 R(MAP_SIZE));
303
304 instrument_next = 0;
305 ins_lines++;
306
307 }
308
309 /* Output the actual line, call it a day in pass-thru mode. */
310
311 fputs(line, outf);
312
313 if (pass_thru) { continue; }
314
315 /* All right, this is where the actual fun begins. For one, we only want to
316 instrument the .text section. So, let's keep track of that in processed
317 files - and let's set instr_ok accordingly. */
318
319 if (line[0] == '\t' && line[1] == '.') {
320
321 /* OpenBSD puts jump tables directly inline with the code, which is
322 a bit annoying. They use a specific format of p2align directives
323 around them, so we use that as a signal. */
324
325 if (!clang_mode && instr_ok && !strncmp(line + 2, "p2align ", 8) &&
326 isdigit(line[10]) && line[11] == '\n') {
327
328 skip_next_label = 1;
329
330 }
331
332 if (!strncmp(line + 2, "text\n", 5) ||
333 !strncmp(line + 2, "section\t.text", 13) ||
334 !strncmp(line + 2, "section\t__TEXT,__text", 21) ||
335 !strncmp(line + 2, "section __TEXT,__text", 21)) {
336
337 instr_ok = 1;
338 continue;
339
340 }
341
342 if (!strncmp(line + 2, "section\t", 8) ||
343 !strncmp(line + 2, "section ", 8) || !strncmp(line + 2, "bss\n", 4) ||
344 !strncmp(line + 2, "data\n", 5)) {
345
346 instr_ok = 0;
347 continue;
348
349 }
350
351 }
352
353 /* Detect off-flavor assembly (rare, happens in gdb). When this is
354 encountered, we set skip_csect until the opposite directive is
355 seen, and we do not instrument. */
356
357 if (strstr(line, ".code")) {
358
359 if (strstr(line, ".code32")) { skip_csect = use_64bit; }
360 if (strstr(line, ".code64")) { skip_csect = !use_64bit; }
361
362 }
363
364 /* Detect syntax changes, as could happen with hand-written assembly.
365 Skip Intel blocks, resume instrumentation when back to AT&T. */
366
367 if (strstr(line, ".intel_syntax")) { skip_intel = 1; }
368 if (strstr(line, ".att_syntax")) { skip_intel = 0; }
369
370 /* Detect and skip ad-hoc __asm__ blocks, likewise skipping them. */
371
372 if (line[0] == '#' || line[1] == '#') {
373
374 if (strstr(line, "#APP")) { skip_app = 1; }
375 if (strstr(line, "#NO_APP")) { skip_app = 0; }
376
377 }
378
379 /* If we're in the right mood for instrumenting, check for function
380 names or conditional labels. This is a bit messy, but in essence,
381 we want to catch:
382
383 ^main: - function entry point (always instrumented)
384 ^.L0: - GCC branch label
385 ^.LBB0_0: - clang branch label (but only in clang mode)
386 ^\tjnz foo - conditional branches
387
388 ...but not:
389
390 ^# BB#0: - clang comments
391 ^ # BB#0: - ditto
392 ^.Ltmp0: - clang non-branch labels
393 ^.LC0 - GCC non-branch labels
394 ^.LBB0_0: - ditto (when in GCC mode)
395 ^\tjmp foo - non-conditional jumps
396
397 Additionally, clang and GCC on MacOS X follow a different convention
398 with no leading dots on labels, hence the weird maze of #ifdefs
399 later on.
400
401 */
402
403 if (skip_intel || skip_app || skip_csect || !instr_ok || line[0] == '#' ||
404 line[0] == ' ') {
405
406 continue;
407
408 }
409
410 /* Conditional branch instruction (jnz, etc). We append the instrumentation
411 right after the branch (to instrument the not-taken path) and at the
412 branch destination label (handled later on). */
413
414 if (line[0] == '\t') {
415
416 if (line[1] == 'j' && line[2] != 'm' && R(100) < (long)inst_ratio) {
417
418 fprintf(outf, use_64bit ? trampoline_fmt_64 : trampoline_fmt_32,
419 R(MAP_SIZE));
420
421 ins_lines++;
422
423 }
424
425 continue;
426
427 }
428
429 /* Label of some sort. This may be a branch destination, but we need to
430 read carefully and account for several different formatting
431 conventions. */
432
433 #ifdef __APPLE__
434
435 /* Apple: L<whatever><digit>: */
436
437 if ((colon_pos = strstr(line, ":"))) {
438
439 if (line[0] == 'L' && isdigit(*(colon_pos - 1))) {
440
441 #else
442
443 /* Everybody else: .L<whatever>: */
444
445 if (strstr(line, ":")) {
446
447 if (line[0] == '.') {
448
449 #endif /* __APPLE__ */
450
451 /* .L0: or LBB0_0: style jump destination */
452
453 #ifdef __APPLE__
454
455 /* Apple: L<num> / LBB<num> */
456
457 if ((isdigit(line[1]) || (clang_mode && !strncmp(line, "LBB", 3))) &&
458 R(100) < (long)inst_ratio) {
459
460 #else
461
462 /* Apple: .L<num> / .LBB<num> */
463
464 if ((isdigit(line[2]) ||
465 (clang_mode && !strncmp(line + 1, "LBB", 3))) &&
466 R(100) < (long)inst_ratio) {
467
468 #endif /* __APPLE__ */
469
470 /* An optimization is possible here by adding the code only if the
471 label is mentioned in the code in contexts other than call / jmp.
472 That said, this complicates the code by requiring two-pass
473 processing (messy with stdin), and results in a speed gain
474 typically under 10%, because compilers are generally pretty good
475 about not generating spurious intra-function jumps.
476
477 We use deferred output chiefly to avoid disrupting
478 .Lfunc_begin0-style exception handling calculations (a problem on
479 MacOS X). */
480
481 if (!skip_next_label) {
482
483 instrument_next = 1;
484
485 } else {
486
487 skip_next_label = 0;
488
489 }
490
491 }
492
493 } else {
494
495 /* Function label (always instrumented, deferred mode). */
496
497 instrument_next = 1;
498
499 }
500
501 }
502
503 }
504
505 if (ins_lines) { fputs(use_64bit ? main_payload_64 : main_payload_32, outf); }
506
507 if (input_file) { fclose(inf); }
508 fclose(outf);
509
510 if (!be_quiet) {
511
512 if (!ins_lines) {
513
514 WARNF("No instrumentation targets found%s.",
515 pass_thru ? " (pass-thru mode)" : "");
516
517 } else {
518
519 char modeline[100];
520 snprintf(modeline, sizeof(modeline), "%s%s%s%s%s%s",
521 getenv("AFL_HARDEN") ? "hardened" : "non-hardened",
522 getenv("AFL_USE_ASAN") ? ", ASAN" : "",
523 getenv("AFL_USE_MSAN") ? ", MSAN" : "",
524 getenv("AFL_USE_TSAN") ? ", TSAN" : "",
525 getenv("AFL_USE_UBSAN") ? ", UBSAN" : "",
526 getenv("AFL_USE_LSAN") ? ", LSAN" : "");
527
528 OKF("Instrumented %u locations (%s-bit, %s mode, ratio %u%%).", ins_lines,
529 use_64bit ? "64" : "32", modeline, inst_ratio);
530
531 }
532
533 }
534
535 }
536
537 /* Main entry point */
538
539 int main(int argc, char **argv) {
540
541 s32 pid;
542 u32 rand_seed, i, j;
543 int status;
544 u8 *inst_ratio_str = getenv("AFL_INST_RATIO");
545
546 struct timeval tv;
547 struct timezone tz;
548
549 clang_mode = !!getenv(CLANG_ENV_VAR);
550
551 if ((isatty(2) && !getenv("AFL_QUIET")) || getenv("AFL_DEBUG") != NULL) {
552
553 SAYF(cCYA "afl-as" VERSION cRST " by Michal Zalewski\n");
554
555 } else {
556
557 be_quiet = 1;
558
559 }
560
561 if (argc < 2 || (argc == 2 && strcmp(argv[1], "-h") == 0)) {
562
563 fprintf(
564 stdout,
565 "afl-as" VERSION
566 " by Michal Zalewski\n"
567 "\n%s [-h]\n\n"
568 "This is a helper application for afl-fuzz. It is a wrapper around GNU "
569 "'as',\n"
570 "executed by the toolchain whenever using afl-gcc or afl-clang. You "
571 "probably\n"
572 "don't want to run this program directly.\n\n"
573
574 "Rarely, when dealing with extremely complex projects, it may be "
575 "advisable\n"
576 "to set AFL_INST_RATIO to a value less than 100 in order to reduce "
577 "the\n"
578 "odds of instrumenting every discovered branch.\n\n"
579 "Environment variables used:\n"
580 "AFL_AS: path to assembler to use for instrumented files\n"
581 "AFL_CC: fall back path to assembler\n"
582 "AFL_CXX: fall back path to assembler\n"
583 "TMPDIR: directory to use for temporary files\n"
584 "TEMP: fall back path to directory for temporary files\n"
585 "TMP: fall back path to directory for temporary files\n"
586 "AFL_INST_RATIO: user specified instrumentation ratio\n"
587 "AFL_QUIET: suppress verbose output\n"
588 "AFL_KEEP_ASSEMBLY: leave instrumented assembly files\n"
589 "AFL_AS_FORCE_INSTRUMENT: force instrumentation for asm sources\n"
590 "AFL_HARDEN, AFL_USE_ASAN, AFL_USE_MSAN, AFL_USE_UBSAN, AFL_USE_LSAN:\n"
591 " used in the instrumentation summary message\n",
592 argv[0]);
593
594 exit(1);
595
596 }
597
598 gettimeofday(&tv, &tz);
599
600 rand_seed = tv.tv_sec ^ tv.tv_usec ^ getpid();
601 // in fast systems where pids can repeat in the same seconds we need this
602 for (i = 1; (s32)i < argc; i++)
603 for (j = 0; j < strlen(argv[i]); j++)
604 rand_seed += argv[i][j];
605
606 srandom(rand_seed);
607
608 edit_params(argc, argv);
609
610 if (inst_ratio_str) {
611
612 if (sscanf(inst_ratio_str, "%u", &inst_ratio) != 1 || inst_ratio > 100) {
613
614 FATAL("Bad value of AFL_INST_RATIO (must be between 0 and 100)");
615
616 }
617
618 }
619
620 if (getenv(AS_LOOP_ENV_VAR)) {
621
622 FATAL("Endless loop when calling 'as' (remove '.' from your PATH)");
623
624 }
625
626 setenv(AS_LOOP_ENV_VAR, "1", 1);
627
628 /* When compiling with ASAN, we don't have a particularly elegant way to skip
629 ASAN-specific branches. But we can probabilistically compensate for
630 that... */
631
632 if (getenv("AFL_USE_ASAN") || getenv("AFL_USE_MSAN")) {
633
634 sanitizer = 1;
635 if (!getenv("AFL_INST_RATIO")) { inst_ratio /= 3; }
636
637 }
638
639 if (!just_version) { add_instrumentation(); }
640
641 if (!(pid = fork())) {
642
643 execvp(as_params[0], (char **)as_params);
644 FATAL("Oops, failed to execute '%s' - check your PATH", as_params[0]);
645
646 }
647
648 if (pid < 0) { PFATAL("fork() failed"); }
649
650 if (waitpid(pid, &status, 0) <= 0) { PFATAL("waitpid() failed"); }
651
652 if (!getenv("AFL_KEEP_ASSEMBLY")) { unlink(modified_file); }
653
654 exit(WEXITSTATUS(status));
655
656 }
657
658