1 // Copyright 2017 syzkaller project authors. All rights reserved.
2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4 // +build
5
6 #include <algorithm>
7 #include <errno.h>
8 #include <signal.h>
9 #include <stdarg.h>
10 #include <stddef.h>
11 #include <stdint.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include <time.h>
16 #include <unistd.h>
17
18 #include "defs.h"
19
20 #if defined(__GNUC__)
21 #define SYSCALLAPI
22 #define NORETURN __attribute__((noreturn))
23 #define ALIGNED(N) __attribute__((aligned(N)))
24 #define PRINTF __attribute__((format(printf, 1, 2)))
25 #else
26 // Assuming windows/cl.
27 #define SYSCALLAPI WINAPI
28 #define NORETURN __declspec(noreturn)
29 #define ALIGNED(N) __declspec(align(N))
30 #define PRINTF
31 #endif
32
33 #ifndef GIT_REVISION
34 #define GIT_REVISION "unknown"
35 #endif
36
37 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
38
39 // uint64 is impossible to printf without using the clumsy and verbose "%" PRId64.
40 // So we define and use uint64. Note: pkg/csource does s/uint64/uint64/.
41 // Also define uint32/16/8 for consistency.
42 typedef unsigned long long uint64;
43 typedef unsigned int uint32;
44 typedef unsigned short uint16;
45 typedef unsigned char uint8;
46
47 // exit/_exit do not necessary work (e.g. if fuzzer sets seccomp filter that prohibits exit_group).
48 // Use doexit instead. We must redefine exit to something that exists in stdlib,
49 // because some standard libraries contain "using ::exit;", but has different signature.
50 #define exit vsnprintf
51
52 // Note: zircon max fd is 256.
53 // Some common_OS.h files know about this constant for RLIMIT_NOFILE.
54 const int kMaxFd = 250;
55 const int kMaxThreads = 16;
56 const int kInPipeFd = kMaxFd - 1; // remapped from stdin
57 const int kOutPipeFd = kMaxFd - 2; // remapped from stdout
58 const int kCoverFd = kOutPipeFd - kMaxThreads;
59 const int kMaxArgs = 9;
60 const int kCoverSize = 256 << 10;
61 const int kFailStatus = 67;
62 const int kRetryStatus = 69;
63 const int kErrorStatus = 68;
64
65 // Logical error (e.g. invalid input program), use as an assert() alternative.
66 NORETURN PRINTF void fail(const char* msg, ...);
67 // Kernel error (e.g. wrong syscall return value).
68 NORETURN PRINTF void error(const char* msg, ...);
69 // Just exit (e.g. due to temporal ENOMEM error).
70 NORETURN PRINTF void exitf(const char* msg, ...);
71 // Print debug output, does not add \n at the end of msg as opposed to the previous functions.
72 PRINTF void debug(const char* msg, ...);
73 void debug_dump_data(const char* data, int length);
74 NORETURN void doexit(int status);
75
76 static void receive_execute();
77 static void reply_execute(int status);
78
79 #if GOOS_akaros
80 static void resend_execute(int fd);
81 #endif
82
83 #if SYZ_EXECUTOR_USES_FORK_SERVER
84 static void receive_handshake();
85 static void reply_handshake();
86 #endif
87
88 #if SYZ_EXECUTOR_USES_SHMEM
89 const int kMaxOutput = 16 << 20;
90 const int kInFd = 3;
91 const int kOutFd = 4;
92 uint32* output_data;
93 uint32* output_pos;
94 static uint32* write_output(uint32 v);
95 static void write_completed(uint32 completed);
96 static uint32 hash(uint32 a);
97 static bool dedup(uint32 sig);
98 #endif
99
100 enum sandbox_type {
101 sandbox_none,
102 sandbox_setuid,
103 sandbox_namespace,
104 };
105
106 bool flag_debug;
107 bool flag_cover;
108 bool flag_sandbox_privs;
109 sandbox_type flag_sandbox;
110 bool flag_enable_tun;
111 bool flag_enable_net_dev;
112 bool flag_enable_fault_injection;
113
114 bool flag_collect_cover;
115 bool flag_dedup_cover;
116 bool flag_threaded;
117 bool flag_collide;
118
119 // If true, then executor should write the comparisons data to fuzzer.
120 bool flag_collect_comps;
121
122 // Inject fault into flag_fault_nth-th operation in flag_fault_call-th syscall.
123 bool flag_inject_fault;
124 int flag_fault_call;
125 int flag_fault_nth;
126
127 #define SYZ_EXECUTOR 1
128 #include "common.h"
129
130 const int kMaxCommands = 1000;
131 const int kMaxInput = 2 << 20;
132
133 const uint64 instr_eof = -1;
134 const uint64 instr_copyin = -2;
135 const uint64 instr_copyout = -3;
136
137 const uint64 arg_const = 0;
138 const uint64 arg_result = 1;
139 const uint64 arg_data = 2;
140 const uint64 arg_csum = 3;
141
142 const uint64 binary_format_native = 0;
143 const uint64 binary_format_bigendian = 1;
144 const uint64 binary_format_strdec = 2;
145 const uint64 binary_format_strhex = 3;
146 const uint64 binary_format_stroct = 4;
147
148 const uint64 no_copyout = -1;
149
150 int running;
151 uint32 completed;
152 bool collide;
153 bool is_kernel_64_bit = true;
154
155 ALIGNED(64 << 10)
156 char input_data[kMaxInput];
157
158 // Checksum kinds.
159 const uint64 arg_csum_inet = 0;
160
161 // Checksum chunk kinds.
162 const uint64 arg_csum_chunk_data = 0;
163 const uint64 arg_csum_chunk_const = 1;
164
165 typedef long(SYSCALLAPI* syscall_t)(long, long, long, long, long, long, long, long, long);
166
167 struct call_t {
168 const char* name;
169 int sys_nr;
170 syscall_t call;
171 };
172
173 struct cover_t {
174 int fd;
175 uint32 size;
176 char* data;
177 char* data_end;
178 };
179
180 struct thread_t {
181 int id;
182 bool created;
183 event_t ready;
184 event_t done;
185 uint64* copyout_pos;
186 uint64 copyout_index;
187 bool colliding;
188 bool executing;
189 int call_index;
190 int call_num;
191 int num_args;
192 long args[kMaxArgs];
193 long res;
194 uint32 reserrno;
195 bool fault_injected;
196 cover_t cov;
197 };
198
199 static thread_t threads[kMaxThreads];
200 static thread_t* last_scheduled;
201
202 struct res_t {
203 bool executed;
204 uint64 val;
205 };
206
207 res_t results[kMaxCommands];
208
209 const uint64 kInMagic = 0xbadc0ffeebadface;
210 const uint32 kOutMagic = 0xbadf00d;
211
212 struct handshake_req {
213 uint64 magic;
214 uint64 flags; // env flags
215 uint64 pid;
216 };
217
218 struct handshake_reply {
219 uint32 magic;
220 };
221
222 struct execute_req {
223 uint64 magic;
224 uint64 env_flags;
225 uint64 exec_flags;
226 uint64 pid;
227 uint64 fault_call;
228 uint64 fault_nth;
229 uint64 prog_size;
230 };
231
232 struct execute_reply {
233 uint32 magic;
234 uint32 done;
235 uint32 status;
236 };
237
238 // call_reply.flags
239 const uint32 call_flag_executed = 1 << 0;
240 const uint32 call_flag_finished = 1 << 1;
241 const uint32 call_flag_blocked = 1 << 2;
242 const uint32 call_flag_fault_injected = 1 << 3;
243
244 struct call_reply {
245 execute_reply header;
246 uint32 call_index;
247 uint32 call_num;
248 uint32 reserrno;
249 uint32 flags;
250 uint32 signal_size;
251 uint32 cover_size;
252 uint32 comps_size;
253 // signal/cover/comps follow
254 };
255
256 enum {
257 KCOV_CMP_CONST = 1,
258 KCOV_CMP_SIZE1 = 0,
259 KCOV_CMP_SIZE2 = 2,
260 KCOV_CMP_SIZE4 = 4,
261 KCOV_CMP_SIZE8 = 6,
262 KCOV_CMP_SIZE_MASK = 6,
263 };
264
265 struct kcov_comparison_t {
266 // Note: comparisons are always 64-bits regardless of kernel bitness.
267 uint64 type;
268 uint64 arg1;
269 uint64 arg2;
270 uint64 pc;
271
272 bool ignore() const;
273 void write();
274 bool operator==(const struct kcov_comparison_t& other) const;
275 bool operator<(const struct kcov_comparison_t& other) const;
276 };
277
278 static thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos);
279 static void handle_completion(thread_t* th);
280 static void copyout_call_results(thread_t* th);
281 static void write_call_output(thread_t* th, bool finished);
282 static void execute_call(thread_t* th);
283 static void thread_create(thread_t* th, int id);
284 static void* worker_thread(void* arg);
285 static uint64 read_input(uint64** input_posp, bool peek = false);
286 static uint64 read_arg(uint64** input_posp);
287 static uint64 read_const_arg(uint64** input_posp, uint64* size_p, uint64* bf, uint64* bf_off_p, uint64* bf_len_p);
288 static uint64 read_result(uint64** input_posp);
289 static void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len);
290 static bool copyout(char* addr, uint64 size, uint64* res);
291 static void setup_control_pipes();
292
293 #include "syscalls.h"
294
295 #if GOOS_linux
296 #include "executor_linux.h"
297 #elif GOOS_fuchsia
298 #include "executor_fuchsia.h"
299 #elif GOOS_akaros
300 #include "executor_akaros.h"
301 #elif GOOS_freebsd || GOOS_netbsd
302 #include "executor_bsd.h"
303 #elif GOOS_windows
304 #include "executor_windows.h"
305 #elif GOOS_test
306 #include "executor_test.h"
307 #else
308 #error "unknown OS"
309 #endif
310
311 #include "test.h"
312
main(int argc,char ** argv)313 int main(int argc, char** argv)
314 {
315 if (argc == 2 && strcmp(argv[1], "version") == 0) {
316 puts(GOOS " " GOARCH " " SYZ_REVISION " " GIT_REVISION);
317 return 0;
318 }
319 if (argc == 2 && strcmp(argv[1], "test") == 0)
320 return run_tests();
321
322 os_init(argc, argv, (void*)SYZ_DATA_OFFSET, SYZ_NUM_PAGES * SYZ_PAGE_SIZE);
323
324 #if SYZ_EXECUTOR_USES_SHMEM
325 if (mmap(&input_data[0], kMaxInput, PROT_READ, MAP_PRIVATE | MAP_FIXED, kInFd, 0) != &input_data[0])
326 fail("mmap of input file failed");
327 // The output region is the only thing in executor process for which consistency matters.
328 // If it is corrupted ipc package will fail to parse its contents and panic.
329 // But fuzzer constantly invents new ways of how to currupt the region,
330 // so we map the region at a (hopefully) hard to guess address with random offset,
331 // surrounded by unmapped pages.
332 // The address chosen must also work on 32-bit kernels with 1GB user address space.
333 void* preferred = (void*)(0x1b2bc20000ull + (1 << 20) * (getpid() % 128));
334 output_data = (uint32*)mmap(preferred, kMaxOutput,
335 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, kOutFd, 0);
336 if (output_data != preferred)
337 fail("mmap of output file failed");
338
339 // Prevent test programs to mess with these fds.
340 // Due to races in collider mode, a program can e.g. ftruncate one of these fds,
341 // which will cause fuzzer to crash.
342 close(kInFd);
343 close(kOutFd);
344 #endif
345
346 use_temporary_dir();
347 install_segv_handler();
348 setup_control_pipes();
349 #if SYZ_EXECUTOR_USES_FORK_SERVER
350 receive_handshake();
351 #else
352 receive_execute();
353 #endif
354 if (flag_cover) {
355 for (int i = 0; i < kMaxThreads; i++) {
356 threads[i].cov.fd = kCoverFd + i;
357 cover_open(&threads[i].cov);
358 }
359 }
360
361 int status = 0;
362 switch (flag_sandbox) {
363 case sandbox_none:
364 status = do_sandbox_none();
365 break;
366 case sandbox_setuid:
367 status = do_sandbox_setuid();
368 break;
369 case sandbox_namespace:
370 status = do_sandbox_namespace();
371 break;
372 default:
373 fail("unknown sandbox type");
374 }
375 #if SYZ_EXECUTOR_USES_FORK_SERVER
376 // Other statuses happen when fuzzer processes manages to kill loop.
377 if (status != kFailStatus && status != kErrorStatus)
378 status = kRetryStatus;
379 // If an external sandbox process wraps executor, the out pipe will be closed
380 // before the sandbox process exits this will make ipc package kill the sandbox.
381 // As the result sandbox process will exit with exit status 9 instead of the executor
382 // exit status (notably kRetryStatus). Consequently, ipc will treat it as hard
383 // failure rather than a temporal failure. So we duplicate the exit status on the pipe.
384 reply_execute(status);
385 errno = 0;
386 if (status == kFailStatus)
387 fail("loop failed");
388 if (status == kErrorStatus)
389 error("loop errored");
390 // Loop can be killed by a test process with e.g.:
391 // ptrace(PTRACE_SEIZE, 1, 0, 0x100040)
392 // This is unfortunate, but I don't have a better solution than ignoring it for now.
393 exitf("loop exited with status %d", status);
394 // Unreachable.
395 return 1;
396 #else
397 reply_execute(status);
398 return status;
399 #endif
400 }
401
setup_control_pipes()402 void setup_control_pipes()
403 {
404 if (dup2(0, kInPipeFd) < 0)
405 fail("dup2(0, kInPipeFd) failed");
406 if (dup2(1, kOutPipeFd) < 0)
407 fail("dup2(1, kOutPipeFd) failed");
408 if (dup2(2, 1) < 0)
409 fail("dup2(2, 1) failed");
410 // We used to close(0), but now we dup stderr to stdin to keep fd numbers
411 // stable across executor and C programs generated by pkg/csource.
412 if (dup2(2, 0) < 0)
413 fail("dup2(2, 0) failed");
414 }
415
parse_env_flags(uint64 flags)416 void parse_env_flags(uint64 flags)
417 {
418 flag_debug = flags & (1 << 0);
419 flag_cover = flags & (1 << 1);
420 flag_sandbox = sandbox_none;
421 if (flags & (1 << 2))
422 flag_sandbox = sandbox_setuid;
423 else if (flags & (1 << 3))
424 flag_sandbox = sandbox_namespace;
425 flag_enable_tun = flags & (1 << 4);
426 flag_enable_net_dev = flags & (1 << 5);
427 flag_enable_fault_injection = flags & (1 << 6);
428 }
429
430 #if SYZ_EXECUTOR_USES_FORK_SERVER
receive_handshake()431 void receive_handshake()
432 {
433 handshake_req req = {};
434 int n = read(kInPipeFd, &req, sizeof(req));
435 if (n != sizeof(req))
436 fail("handshake read failed: %d", n);
437 if (req.magic != kInMagic)
438 fail("bad handshake magic 0x%llx", req.magic);
439 parse_env_flags(req.flags);
440 procid = req.pid;
441 }
442
reply_handshake()443 void reply_handshake()
444 {
445 handshake_reply reply = {};
446 reply.magic = kOutMagic;
447 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply))
448 fail("control pipe write failed");
449 }
450 #endif
451
452 static execute_req last_execute_req;
453
receive_execute()454 void receive_execute()
455 {
456 execute_req& req = last_execute_req;
457 if (read(kInPipeFd, &req, sizeof(req)) != (ssize_t)sizeof(req))
458 fail("control pipe read failed");
459 if (req.magic != kInMagic)
460 fail("bad execute request magic 0x%llx", req.magic);
461 if (req.prog_size > kMaxInput)
462 fail("bad execute prog size 0x%llx", req.prog_size);
463 parse_env_flags(req.env_flags);
464 procid = req.pid;
465 flag_collect_cover = req.exec_flags & (1 << 0);
466 flag_dedup_cover = req.exec_flags & (1 << 1);
467 flag_inject_fault = req.exec_flags & (1 << 2);
468 flag_collect_comps = req.exec_flags & (1 << 3);
469 flag_threaded = req.exec_flags & (1 << 4);
470 flag_collide = req.exec_flags & (1 << 5);
471 flag_fault_call = req.fault_call;
472 flag_fault_nth = req.fault_nth;
473 if (!flag_threaded)
474 flag_collide = false;
475 debug("exec opts: pid=%llu threaded=%d collide=%d cover=%d comps=%d dedup=%d fault=%d/%d/%d prog=%llu\n",
476 procid, flag_threaded, flag_collide, flag_collect_cover, flag_collect_comps,
477 flag_dedup_cover, flag_inject_fault, flag_fault_call, flag_fault_nth,
478 req.prog_size);
479 if (SYZ_EXECUTOR_USES_SHMEM) {
480 if (req.prog_size)
481 fail("need_prog: no program");
482 return;
483 }
484 if (req.prog_size == 0)
485 fail("need_prog: no program");
486 uint64 pos = 0;
487 for (;;) {
488 ssize_t rv = read(kInPipeFd, input_data + pos, sizeof(input_data) - pos);
489 if (rv < 0)
490 fail("read failed");
491 pos += rv;
492 if (rv == 0 || pos >= req.prog_size)
493 break;
494 }
495 if (pos != req.prog_size)
496 fail("bad input size %lld, want %lld", pos, req.prog_size);
497 }
498
499 #if GOOS_akaros
resend_execute(int fd)500 void resend_execute(int fd)
501 {
502 execute_req& req = last_execute_req;
503 if (write(fd, &req, sizeof(req)) != sizeof(req))
504 fail("child pipe header write failed");
505 if (write(fd, input_data, req.prog_size) != (ssize_t)req.prog_size)
506 fail("child pipe program write failed");
507 }
508 #endif
509
reply_execute(int status)510 void reply_execute(int status)
511 {
512 execute_reply reply = {};
513 reply.magic = kOutMagic;
514 reply.done = true;
515 reply.status = status;
516 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply))
517 fail("control pipe write failed");
518 }
519
520 // execute_one executes program stored in input_data.
execute_one()521 void execute_one()
522 {
523 // Duplicate global collide variable on stack.
524 // Fuzzer once come up with ioctl(fd, FIONREAD, 0x920000),
525 // where 0x920000 was exactly collide address, so every iteration reset collide to 0.
526 bool colliding = false;
527 #if SYZ_EXECUTOR_USES_SHMEM
528 output_pos = output_data;
529 write_output(0); // Number of executed syscalls (updated later).
530 #endif
531 uint64 start = current_time_ms();
532
533 retry:
534 uint64* input_pos = (uint64*)input_data;
535
536 if (flag_cover && !colliding && !flag_threaded)
537 cover_enable(&threads[0].cov, flag_collect_comps);
538
539 int call_index = 0;
540 for (;;) {
541 uint64 call_num = read_input(&input_pos);
542 if (call_num == instr_eof)
543 break;
544 if (call_num == instr_copyin) {
545 char* addr = (char*)read_input(&input_pos);
546 uint64 typ = read_input(&input_pos);
547 switch (typ) {
548 case arg_const: {
549 uint64 size, bf, bf_off, bf_len;
550 uint64 arg = read_const_arg(&input_pos, &size, &bf, &bf_off, &bf_len);
551 copyin(addr, arg, size, bf, bf_off, bf_len);
552 break;
553 }
554 case arg_result: {
555 uint64 meta = read_input(&input_pos);
556 uint64 size = meta & 0xff;
557 uint64 bf = meta >> 8;
558 uint64 val = read_result(&input_pos);
559 copyin(addr, val, size, bf, 0, 0);
560 break;
561 }
562 case arg_data: {
563 uint64 size = read_input(&input_pos);
564 NONFAILING(memcpy(addr, input_pos, size));
565 // Read out the data.
566 for (uint64 i = 0; i < (size + 7) / 8; i++)
567 read_input(&input_pos);
568 break;
569 }
570 case arg_csum: {
571 debug("checksum found at %p\n", addr);
572 uint64 size = read_input(&input_pos);
573 char* csum_addr = addr;
574 uint64 csum_kind = read_input(&input_pos);
575 switch (csum_kind) {
576 case arg_csum_inet: {
577 if (size != 2)
578 fail("inet checksum must be 2 bytes, not %llu", size);
579 debug("calculating checksum for %p\n", csum_addr);
580 struct csum_inet csum;
581 csum_inet_init(&csum);
582 uint64 chunks_num = read_input(&input_pos);
583 uint64 chunk;
584 for (chunk = 0; chunk < chunks_num; chunk++) {
585 uint64 chunk_kind = read_input(&input_pos);
586 uint64 chunk_value = read_input(&input_pos);
587 uint64 chunk_size = read_input(&input_pos);
588 switch (chunk_kind) {
589 case arg_csum_chunk_data:
590 debug("#%lld: data chunk, addr: %llx, size: %llu\n", chunk, chunk_value, chunk_size);
591 NONFAILING(csum_inet_update(&csum, (const uint8*)chunk_value, chunk_size));
592 break;
593 case arg_csum_chunk_const:
594 if (chunk_size != 2 && chunk_size != 4 && chunk_size != 8) {
595 fail("bad checksum const chunk size %lld\n", chunk_size);
596 }
597 // Here we assume that const values come to us big endian.
598 debug("#%lld: const chunk, value: %llx, size: %llu\n", chunk, chunk_value, chunk_size);
599 csum_inet_update(&csum, (const uint8*)&chunk_value, chunk_size);
600 break;
601 default:
602 fail("bad checksum chunk kind %llu", chunk_kind);
603 }
604 }
605 uint16 csum_value = csum_inet_digest(&csum);
606 debug("writing inet checksum %hx to %p\n", csum_value, csum_addr);
607 copyin(csum_addr, csum_value, 2, binary_format_native, 0, 0);
608 break;
609 }
610 default:
611 fail("bad checksum kind %llu", csum_kind);
612 }
613 break;
614 }
615 default:
616 fail("bad argument type %llu", typ);
617 }
618 continue;
619 }
620 if (call_num == instr_copyout) {
621 read_input(&input_pos); // index
622 read_input(&input_pos); // addr
623 read_input(&input_pos); // size
624 // The copyout will happen when/if the call completes.
625 continue;
626 }
627
628 // Normal syscall.
629 if (call_num >= ARRAY_SIZE(syscalls))
630 fail("invalid command number %llu", call_num);
631 uint64 copyout_index = read_input(&input_pos);
632 uint64 num_args = read_input(&input_pos);
633 if (num_args > kMaxArgs)
634 fail("command has bad number of arguments %llu", num_args);
635 uint64 args[kMaxArgs] = {};
636 for (uint64 i = 0; i < num_args; i++)
637 args[i] = read_arg(&input_pos);
638 for (uint64 i = num_args; i < 6; i++)
639 args[i] = 0;
640 thread_t* th = schedule_call(call_index++, call_num, colliding, copyout_index,
641 num_args, args, input_pos);
642
643 if (colliding && (call_index % 2) == 0) {
644 // Don't wait for every other call.
645 // We already have results from the previous execution.
646 } else if (flag_threaded) {
647 // Wait for call completion.
648 // Note: sys knows about this 25ms timeout when it generates timespec/timeval values.
649 const uint64 timeout_ms = flag_debug ? 1000 : 45;
650 if (event_timedwait(&th->done, timeout_ms))
651 handle_completion(th);
652 // Check if any of previous calls have completed.
653 for (int i = 0; i < kMaxThreads; i++) {
654 th = &threads[i];
655 if (th->executing && event_isset(&th->done))
656 handle_completion(th);
657 }
658 } else {
659 // Execute directly.
660 if (th != &threads[0])
661 fail("using non-main thread in non-thread mode");
662 event_reset(&th->ready);
663 execute_call(th);
664 event_set(&th->done);
665 handle_completion(th);
666 }
667 }
668
669 if (!colliding && !collide && running > 0) {
670 // Give unfinished syscalls some additional time.
671 last_scheduled = 0;
672 uint64 wait = 100;
673 uint64 wait_start = current_time_ms();
674 uint64 wait_end = wait_start + wait;
675 if (wait_end < start + 800)
676 wait_end = start + 800;
677 while (running > 0 && current_time_ms() <= wait_end) {
678 sleep_ms(1);
679 for (int i = 0; i < kMaxThreads; i++) {
680 thread_t* th = &threads[i];
681 if (th->executing && event_isset(&th->done))
682 handle_completion(th);
683 }
684 }
685 // Write output coverage for unfinished calls.
686 if (running > 0) {
687 for (int i = 0; i < kMaxThreads; i++) {
688 thread_t* th = &threads[i];
689 if (th->executing) {
690 if (flag_cover)
691 cover_collect(&th->cov);
692 write_call_output(th, false);
693 }
694 }
695 }
696 }
697
698 if (flag_collide && !flag_inject_fault && !colliding && !collide) {
699 debug("enabling collider\n");
700 collide = colliding = true;
701 goto retry;
702 }
703 }
704
schedule_call(int call_index,int call_num,bool colliding,uint64 copyout_index,uint64 num_args,uint64 * args,uint64 * pos)705 thread_t* schedule_call(int call_index, int call_num, bool colliding, uint64 copyout_index, uint64 num_args, uint64* args, uint64* pos)
706 {
707 // Find a spare thread to execute the call.
708 int i;
709 for (i = 0; i < kMaxThreads; i++) {
710 thread_t* th = &threads[i];
711 if (!th->created)
712 thread_create(th, i);
713 if (event_isset(&th->done)) {
714 if (th->executing)
715 handle_completion(th);
716 break;
717 }
718 }
719 if (i == kMaxThreads)
720 exitf("out of threads");
721 thread_t* th = &threads[i];
722 debug("scheduling call %d [%s] on thread %d\n", call_index, syscalls[call_num].name, th->id);
723 if (event_isset(&th->ready) || !event_isset(&th->done) || th->executing)
724 fail("bad thread state in schedule: ready=%d done=%d executing=%d",
725 event_isset(&th->ready), event_isset(&th->done), th->executing);
726 last_scheduled = th;
727 th->colliding = colliding;
728 th->copyout_pos = pos;
729 th->copyout_index = copyout_index;
730 event_reset(&th->done);
731 th->executing = true;
732 th->call_index = call_index;
733 th->call_num = call_num;
734 th->num_args = num_args;
735 for (int i = 0; i < kMaxArgs; i++)
736 th->args[i] = args[i];
737 event_set(&th->ready);
738 running++;
739 return th;
740 }
741
742 #if SYZ_EXECUTOR_USES_SHMEM
743 template <typename cover_t>
write_coverage_signal(thread_t * th,uint32 * signal_count_pos,uint32 * cover_count_pos)744 void write_coverage_signal(thread_t* th, uint32* signal_count_pos, uint32* cover_count_pos)
745 {
746 // Write out feedback signals.
747 // Currently it is code edges computed as xor of two subsequent basic block PCs.
748 cover_t* cover_data = ((cover_t*)th->cov.data) + 1;
749 uint32 nsig = 0;
750 cover_t prev = 0;
751 for (uint32 i = 0; i < th->cov.size; i++) {
752 cover_t pc = cover_data[i];
753 if (!cover_check(pc)) {
754 debug("got bad pc: 0x%llx\n", (uint64)pc);
755 doexit(0);
756 }
757 cover_t sig = pc ^ prev;
758 prev = hash(pc);
759 if (dedup(sig))
760 continue;
761 write_output(sig);
762 nsig++;
763 }
764 // Write out number of signals.
765 *signal_count_pos = nsig;
766
767 if (!flag_collect_cover)
768 return;
769 // Write out real coverage (basic block PCs).
770 uint32 cover_size = th->cov.size;
771 if (flag_dedup_cover) {
772 cover_t* end = cover_data + cover_size;
773 std::sort(cover_data, end);
774 cover_size = std::unique(cover_data, end) - cover_data;
775 }
776 // Truncate PCs to uint32 assuming that they fit into 32-bits.
777 // True for x86_64 and arm64 without KASLR.
778 for (uint32 i = 0; i < cover_size; i++)
779 write_output(cover_data[i]);
780 *cover_count_pos = cover_size;
781 }
782 #endif
783
handle_completion(thread_t * th)784 void handle_completion(thread_t* th)
785 {
786 debug("completion of call %d [%s] on thread %d\n", th->call_index, syscalls[th->call_num].name, th->id);
787 if (event_isset(&th->ready) || !event_isset(&th->done) || !th->executing)
788 fail("bad thread state in completion: ready=%d done=%d executing=%d",
789 event_isset(&th->ready), event_isset(&th->done), th->executing);
790 if (th->res != (long)-1)
791 copyout_call_results(th);
792 if (!collide && !th->colliding)
793 write_call_output(th, true);
794 th->executing = false;
795 running--;
796 if (running < 0)
797 fail("running = %d", running);
798 }
799
copyout_call_results(thread_t * th)800 void copyout_call_results(thread_t* th)
801 {
802 if (th->copyout_index != no_copyout) {
803 if (th->copyout_index >= kMaxCommands)
804 fail("result idx %lld overflows kMaxCommands", th->copyout_index);
805 results[th->copyout_index].executed = true;
806 results[th->copyout_index].val = th->res;
807 }
808 for (bool done = false; !done;) {
809 uint64 instr = read_input(&th->copyout_pos);
810 switch (instr) {
811 case instr_copyout: {
812 uint64 index = read_input(&th->copyout_pos);
813 if (index >= kMaxCommands)
814 fail("result idx %lld overflows kMaxCommands", index);
815 char* addr = (char*)read_input(&th->copyout_pos);
816 uint64 size = read_input(&th->copyout_pos);
817 uint64 val = 0;
818 if (copyout(addr, size, &val)) {
819 results[index].executed = true;
820 results[index].val = val;
821 }
822 debug("copyout 0x%llx from %p\n", val, addr);
823 break;
824 }
825 default:
826 done = true;
827 break;
828 }
829 }
830 }
831
write_call_output(thread_t * th,bool finished)832 void write_call_output(thread_t* th, bool finished)
833 {
834 uint32 reserrno = 999;
835 uint32 call_flags = call_flag_executed;
836 const bool blocked = th != last_scheduled;
837 if (finished) {
838 reserrno = th->res != -1 ? 0 : th->reserrno;
839 call_flags |= call_flag_finished |
840 (blocked ? call_flag_blocked : 0) |
841 (th->fault_injected ? call_flag_fault_injected : 0);
842 }
843 #if SYZ_EXECUTOR_USES_SHMEM
844 write_output(th->call_index);
845 write_output(th->call_num);
846 write_output(reserrno);
847 write_output(call_flags);
848 uint32* signal_count_pos = write_output(0); // filled in later
849 uint32* cover_count_pos = write_output(0); // filled in later
850 uint32* comps_count_pos = write_output(0); // filled in later
851
852 if (flag_collect_comps) {
853 // Collect only the comparisons
854 uint32 ncomps = th->cov.size;
855 kcov_comparison_t* start = (kcov_comparison_t*)(th->cov.data + sizeof(uint64));
856 kcov_comparison_t* end = start + ncomps;
857 if ((char*)end > th->cov.data_end)
858 fail("too many comparisons %u", ncomps);
859 std::sort(start, end);
860 ncomps = std::unique(start, end) - start;
861 uint32 comps_size = 0;
862 for (uint32 i = 0; i < ncomps; ++i) {
863 if (start[i].ignore())
864 continue;
865 comps_size++;
866 start[i].write();
867 }
868 // Write out number of comparisons.
869 *comps_count_pos = comps_size;
870 } else if (flag_cover) {
871 if (is_kernel_64_bit)
872 write_coverage_signal<uint64>(th, signal_count_pos, cover_count_pos);
873 else
874 write_coverage_signal<uint32>(th, signal_count_pos, cover_count_pos);
875 }
876 debug("out #%u: index=%u num=%u errno=%d finished=%d blocked=%d sig=%u cover=%u comps=%u\n",
877 completed, th->call_index, th->call_num, reserrno, finished, blocked,
878 *signal_count_pos, *cover_count_pos, *comps_count_pos);
879 completed++;
880 write_completed(completed);
881 #else
882 call_reply reply;
883 reply.header.magic = kOutMagic;
884 reply.header.done = 0;
885 reply.header.status = 0;
886 reply.call_index = th->call_index;
887 reply.call_num = th->call_num;
888 reply.reserrno = reserrno;
889 reply.flags = call_flags;
890 reply.signal_size = 0;
891 reply.cover_size = 0;
892 reply.comps_size = 0;
893 if (write(kOutPipeFd, &reply, sizeof(reply)) != sizeof(reply))
894 fail("control pipe call write failed");
895 debug("out: index=%u num=%u errno=%d finished=%d blocked=%d\n",
896 th->call_index, th->call_num, reserrno, finished, blocked);
897 #endif
898 }
899
thread_create(thread_t * th,int id)900 void thread_create(thread_t* th, int id)
901 {
902 th->created = true;
903 th->id = id;
904 th->executing = false;
905 event_init(&th->ready);
906 event_init(&th->done);
907 event_set(&th->done);
908 if (flag_threaded)
909 thread_start(worker_thread, th);
910 }
911
worker_thread(void * arg)912 void* worker_thread(void* arg)
913 {
914 thread_t* th = (thread_t*)arg;
915
916 if (flag_cover)
917 cover_enable(&th->cov, flag_collect_comps);
918 for (;;) {
919 event_wait(&th->ready);
920 event_reset(&th->ready);
921 execute_call(th);
922 event_set(&th->done);
923 }
924 return 0;
925 }
926
execute_call(thread_t * th)927 void execute_call(thread_t* th)
928 {
929 const call_t* call = &syscalls[th->call_num];
930 debug("#%d: %s(", th->id, call->name);
931 for (int i = 0; i < th->num_args; i++) {
932 if (i != 0)
933 debug(", ");
934 debug("0x%lx", th->args[i]);
935 }
936 debug(")\n");
937
938 int fail_fd = -1;
939 if (flag_inject_fault && th->call_index == flag_fault_call) {
940 if (collide)
941 fail("both collide and fault injection are enabled");
942 debug("injecting fault into %d-th operation\n", flag_fault_nth);
943 fail_fd = inject_fault(flag_fault_nth);
944 }
945
946 if (flag_cover)
947 cover_reset(&th->cov);
948 errno = 0;
949 th->res = execute_syscall(call, th->args);
950 th->reserrno = errno;
951 if (th->res == -1 && th->reserrno == 0)
952 th->reserrno = EINVAL; // our syz syscalls may misbehave
953 if (flag_cover) {
954 cover_collect(&th->cov);
955 debug("#%d: read cover size = %u\n", th->id, th->cov.size);
956 if (th->cov.size >= kCoverSize)
957 fail("#%d: too much cover %u", th->id, th->cov.size);
958 }
959 th->fault_injected = false;
960
961 if (flag_inject_fault && th->call_index == flag_fault_call) {
962 th->fault_injected = fault_injected(fail_fd);
963 debug("fault injected: %d\n", th->fault_injected);
964 }
965
966 if (th->res == -1)
967 debug("#%d: %s = errno(%d)\n", th->id, call->name, th->reserrno);
968 else
969 debug("#%d: %s = 0x%lx\n", th->id, call->name, th->res);
970 }
971
972 #if SYZ_EXECUTOR_USES_SHMEM
hash(uint32 a)973 static uint32 hash(uint32 a)
974 {
975 a = (a ^ 61) ^ (a >> 16);
976 a = a + (a << 3);
977 a = a ^ (a >> 4);
978 a = a * 0x27d4eb2d;
979 a = a ^ (a >> 15);
980 return a;
981 }
982
983 const uint32 dedup_table_size = 8 << 10;
984 uint32 dedup_table[dedup_table_size];
985
986 // Poorman's best-effort hashmap-based deduplication.
987 // The hashmap is global which means that we deduplicate across different calls.
988 // This is OK because we are interested only in new signals.
dedup(uint32 sig)989 static bool dedup(uint32 sig)
990 {
991 for (uint32 i = 0; i < 4; i++) {
992 uint32 pos = (sig + i) % dedup_table_size;
993 if (dedup_table[pos] == sig)
994 return true;
995 if (dedup_table[pos] == 0) {
996 dedup_table[pos] = sig;
997 return false;
998 }
999 }
1000 dedup_table[sig % dedup_table_size] = sig;
1001 return false;
1002 }
1003 #endif
1004
copyin(char * addr,uint64 val,uint64 size,uint64 bf,uint64 bf_off,uint64 bf_len)1005 void copyin(char* addr, uint64 val, uint64 size, uint64 bf, uint64 bf_off, uint64 bf_len)
1006 {
1007 if (bf != binary_format_native && (bf_off != 0 || bf_len != 0))
1008 fail("bitmask for string format %llu/%llu", bf_off, bf_len);
1009 switch (bf) {
1010 case binary_format_native:
1011 NONFAILING(switch (size) {
1012 case 1:
1013 STORE_BY_BITMASK(uint8, addr, val, bf_off, bf_len);
1014 break;
1015 case 2:
1016 STORE_BY_BITMASK(uint16, addr, val, bf_off, bf_len);
1017 break;
1018 case 4:
1019 STORE_BY_BITMASK(uint32, addr, val, bf_off, bf_len);
1020 break;
1021 case 8:
1022 STORE_BY_BITMASK(uint64, addr, val, bf_off, bf_len);
1023 break;
1024 default:
1025 fail("copyin: bad argument size %llu", size);
1026 });
1027 break;
1028 case binary_format_strdec:
1029 if (size != 20)
1030 fail("bad strdec size %llu", size);
1031 NONFAILING(sprintf((char*)addr, "%020llu", val));
1032 break;
1033 case binary_format_strhex:
1034 if (size != 18)
1035 fail("bad strhex size %llu", size);
1036 NONFAILING(sprintf((char*)addr, "0x%016llx", val));
1037 break;
1038 case binary_format_stroct:
1039 if (size != 23)
1040 fail("bad stroct size %llu", size);
1041 NONFAILING(sprintf((char*)addr, "%023llo", val));
1042 break;
1043 default:
1044 fail("unknown binary format %llu", bf);
1045 }
1046 }
1047
copyout(char * addr,uint64 size,uint64 * res)1048 bool copyout(char* addr, uint64 size, uint64* res)
1049 {
1050 bool ok = false;
1051 NONFAILING(
1052 switch (size) {
1053 case 1:
1054 *res = *(uint8*)addr;
1055 break;
1056 case 2:
1057 *res = *(uint16*)addr;
1058 break;
1059 case 4:
1060 *res = *(uint32*)addr;
1061 break;
1062 case 8:
1063 *res = *(uint64*)addr;
1064 break;
1065 default:
1066 fail("copyout: bad argument size %llu", size);
1067 } __atomic_store_n(&ok, true, __ATOMIC_RELEASE););
1068 return ok;
1069 }
1070
read_arg(uint64 ** input_posp)1071 uint64 read_arg(uint64** input_posp)
1072 {
1073 uint64 typ = read_input(input_posp);
1074 switch (typ) {
1075 case arg_const: {
1076 uint64 size, bf, bf_off, bf_len;
1077 uint64 val = read_const_arg(input_posp, &size, &bf, &bf_off, &bf_len);
1078 if (bf != binary_format_native)
1079 fail("bad argument binary format %llu", bf);
1080 if (bf_off != 0 || bf_len != 0)
1081 fail("bad argument bitfield %llu/%llu", bf_off, bf_len);
1082 return val;
1083 }
1084 case arg_result: {
1085 uint64 meta = read_input(input_posp);
1086 uint64 bf = meta >> 8;
1087 if (bf != binary_format_native)
1088 fail("bad result argument format %llu", bf);
1089 return read_result(input_posp);
1090 }
1091 default:
1092 fail("bad argument type %llu", typ);
1093 }
1094 }
1095
read_const_arg(uint64 ** input_posp,uint64 * size_p,uint64 * bf_p,uint64 * bf_off_p,uint64 * bf_len_p)1096 uint64 read_const_arg(uint64** input_posp, uint64* size_p, uint64* bf_p, uint64* bf_off_p, uint64* bf_len_p)
1097 {
1098 uint64 meta = read_input(input_posp);
1099 uint64 val = read_input(input_posp);
1100 *size_p = meta & 0xff;
1101 uint64 bf = (meta >> 8) & 0xff;
1102 *bf_off_p = (meta >> 16) & 0xff;
1103 *bf_len_p = (meta >> 24) & 0xff;
1104 uint64 pid_stride = meta >> 32;
1105 val += pid_stride * procid;
1106 if (bf == binary_format_bigendian) {
1107 bf = binary_format_native;
1108 switch (*size_p) {
1109 case 2:
1110 val = htobe16(val);
1111 break;
1112 case 4:
1113 val = htobe32(val);
1114 break;
1115 case 8:
1116 val = htobe64(val);
1117 break;
1118 default:
1119 fail("bad big-endian int size %llu", *size_p);
1120 }
1121 }
1122 *bf_p = bf;
1123 return val;
1124 }
1125
read_result(uint64 ** input_posp)1126 uint64 read_result(uint64** input_posp)
1127 {
1128 uint64 idx = read_input(input_posp);
1129 uint64 op_div = read_input(input_posp);
1130 uint64 op_add = read_input(input_posp);
1131 uint64 arg = read_input(input_posp);
1132 if (idx >= kMaxCommands)
1133 fail("command refers to bad result %lld", idx);
1134 if (results[idx].executed) {
1135 arg = results[idx].val;
1136 if (op_div != 0)
1137 arg = arg / op_div;
1138 arg += op_add;
1139 }
1140 return arg;
1141 }
1142
read_input(uint64 ** input_posp,bool peek)1143 uint64 read_input(uint64** input_posp, bool peek)
1144 {
1145 uint64* input_pos = *input_posp;
1146 if ((char*)input_pos >= input_data + kMaxInput)
1147 fail("input command overflows input %p: [%p:%p)", input_pos, input_data, input_data + kMaxInput);
1148 if (!peek)
1149 *input_posp = input_pos + 1;
1150 return *input_pos;
1151 }
1152
1153 #if SYZ_EXECUTOR_USES_SHMEM
write_output(uint32 v)1154 uint32* write_output(uint32 v)
1155 {
1156 if (output_pos < output_data || (char*)output_pos >= (char*)output_data + kMaxOutput)
1157 fail("output overflow: pos=%p region=[%p:%p]",
1158 output_pos, output_data, (char*)output_data + kMaxOutput);
1159 *output_pos = v;
1160 return output_pos++;
1161 }
1162
write_completed(uint32 completed)1163 void write_completed(uint32 completed)
1164 {
1165 __atomic_store_n(output_data, completed, __ATOMIC_RELEASE);
1166 }
1167 #endif
1168
1169 #if SYZ_EXECUTOR_USES_SHMEM
write()1170 void kcov_comparison_t::write()
1171 {
1172 // Write order: type arg1 arg2 pc.
1173 write_output((uint32)type);
1174
1175 // KCOV converts all arguments of size x first to uintx_t and then to
1176 // uint64. We want to properly extend signed values, e.g we want
1177 // int8 c = 0xfe to be represented as 0xfffffffffffffffe.
1178 // Note that uint8 c = 0xfe will be represented the same way.
1179 // This is ok because during hints processing we will anyways try
1180 // the value 0x00000000000000fe.
1181 switch (type & KCOV_CMP_SIZE_MASK) {
1182 case KCOV_CMP_SIZE1:
1183 arg1 = (uint64)(long long)(signed char)arg1;
1184 arg2 = (uint64)(long long)(signed char)arg2;
1185 break;
1186 case KCOV_CMP_SIZE2:
1187 arg1 = (uint64)(long long)(short)arg1;
1188 arg2 = (uint64)(long long)(short)arg2;
1189 break;
1190 case KCOV_CMP_SIZE4:
1191 arg1 = (uint64)(long long)(int)arg1;
1192 arg2 = (uint64)(long long)(int)arg2;
1193 break;
1194 }
1195 bool is_size_8 = (type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8;
1196 if (!is_size_8) {
1197 write_output((uint32)arg1);
1198 write_output((uint32)arg2);
1199 return;
1200 }
1201 // If we have 64 bits arguments then write them in Little-endian.
1202 write_output((uint32)(arg1 & 0xFFFFFFFF));
1203 write_output((uint32)(arg1 >> 32));
1204 write_output((uint32)(arg2 & 0xFFFFFFFF));
1205 write_output((uint32)(arg2 >> 32));
1206 }
1207
ignore() const1208 bool kcov_comparison_t::ignore() const
1209 {
1210 // Comparisons with 0 are not interesting, fuzzer should be able to guess 0's without help.
1211 if (arg1 == 0 && (arg2 == 0 || (type & KCOV_CMP_CONST)))
1212 return true;
1213 if ((type & KCOV_CMP_SIZE_MASK) == KCOV_CMP_SIZE8) {
1214 // This can be a pointer (assuming 64-bit kernel).
1215 // First of all, we want avert fuzzer from our output region.
1216 // Without this fuzzer manages to discover and corrupt it.
1217 uint64 out_start = (uint64)output_data;
1218 uint64 out_end = out_start + kMaxOutput;
1219 if (arg1 >= out_start && arg1 <= out_end)
1220 return true;
1221 if (arg2 >= out_start && arg2 <= out_end)
1222 return true;
1223 #if defined(GOOS_linux)
1224 // Filter out kernel physical memory addresses.
1225 // These are internal kernel comparisons and should not be interesting.
1226 // The range covers first 1TB of physical mapping.
1227 uint64 kmem_start = (uint64)0xffff880000000000ull;
1228 uint64 kmem_end = (uint64)0xffff890000000000ull;
1229 bool kptr1 = arg1 >= kmem_start && arg1 <= kmem_end;
1230 bool kptr2 = arg2 >= kmem_start && arg2 <= kmem_end;
1231 if (kptr1 && kptr2)
1232 return true;
1233 if (kptr1 && arg2 == 0)
1234 return true;
1235 if (kptr2 && arg1 == 0)
1236 return true;
1237 #endif
1238 }
1239 return false;
1240 }
1241
operator ==(const struct kcov_comparison_t & other) const1242 bool kcov_comparison_t::operator==(const struct kcov_comparison_t& other) const
1243 {
1244 // We don't check for PC equality now, because it is not used.
1245 return type == other.type && arg1 == other.arg1 && arg2 == other.arg2;
1246 }
1247
operator <(const struct kcov_comparison_t & other) const1248 bool kcov_comparison_t::operator<(const struct kcov_comparison_t& other) const
1249 {
1250 if (type != other.type)
1251 return type < other.type;
1252 if (arg1 != other.arg1)
1253 return arg1 < other.arg1;
1254 // We don't check for PC equality now, because it is not used.
1255 return arg2 < other.arg2;
1256 }
1257 #endif
1258
fail(const char * msg,...)1259 void fail(const char* msg, ...)
1260 {
1261 int e = errno;
1262 va_list args;
1263 va_start(args, msg);
1264 vfprintf(stderr, msg, args);
1265 va_end(args);
1266 fprintf(stderr, " (errno %d)\n", e);
1267 // ENOMEM/EAGAIN is frequent cause of failures in fuzzing context,
1268 // so handle it here as non-fatal error.
1269 doexit((e == ENOMEM || e == EAGAIN) ? kRetryStatus : kFailStatus);
1270 }
1271
error(const char * msg,...)1272 void error(const char* msg, ...)
1273 {
1274 va_list args;
1275 va_start(args, msg);
1276 vfprintf(stderr, msg, args);
1277 va_end(args);
1278 fprintf(stderr, "\n");
1279 doexit(kErrorStatus);
1280 }
1281
exitf(const char * msg,...)1282 void exitf(const char* msg, ...)
1283 {
1284 int e = errno;
1285 va_list args;
1286 va_start(args, msg);
1287 vfprintf(stderr, msg, args);
1288 va_end(args);
1289 fprintf(stderr, " (errno %d)\n", e);
1290 doexit(kRetryStatus);
1291 }
1292
debug(const char * msg,...)1293 void debug(const char* msg, ...)
1294 {
1295 if (!flag_debug)
1296 return;
1297 va_list args;
1298 va_start(args, msg);
1299 vfprintf(stderr, msg, args);
1300 va_end(args);
1301 fflush(stderr);
1302 }
1303
debug_dump_data(const char * data,int length)1304 void debug_dump_data(const char* data, int length)
1305 {
1306 if (!flag_debug)
1307 return;
1308 for (int i = 0; i < length; i++) {
1309 debug("%02x ", data[i] & 0xff);
1310 if (i % 16 == 15)
1311 debug("\n");
1312 }
1313 debug("\n");
1314 }
1315