1 // Copyright 2016 syzkaller project authors. All rights reserved.
2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
3
4 // This file is shared between executor and csource package.
5
6 #include <stdlib.h>
7 #include <sys/syscall.h>
8 #include <sys/types.h>
9 #include <unistd.h>
10
11 #if SYZ_EXECUTOR
12 struct cover_t;
13 static void cover_reset(cover_t* cov);
14 #endif
15
16 #if SYZ_EXECUTOR || SYZ_THREADED
17 #include <linux/futex.h>
18 #include <pthread.h>
19
20 typedef struct {
21 int state;
22 } event_t;
23
event_init(event_t * ev)24 static void event_init(event_t* ev)
25 {
26 ev->state = 0;
27 }
28
event_reset(event_t * ev)29 static void event_reset(event_t* ev)
30 {
31 ev->state = 0;
32 }
33
event_set(event_t * ev)34 static void event_set(event_t* ev)
35 {
36 if (ev->state)
37 fail("event already set");
38 __atomic_store_n(&ev->state, 1, __ATOMIC_RELEASE);
39 syscall(SYS_futex, &ev->state, FUTEX_WAKE);
40 }
41
event_wait(event_t * ev)42 static void event_wait(event_t* ev)
43 {
44 while (!__atomic_load_n(&ev->state, __ATOMIC_ACQUIRE))
45 syscall(SYS_futex, &ev->state, FUTEX_WAIT, 0, 0);
46 }
47
event_isset(event_t * ev)48 static int event_isset(event_t* ev)
49 {
50 return __atomic_load_n(&ev->state, __ATOMIC_ACQUIRE);
51 }
52
event_timedwait(event_t * ev,uint64 timeout)53 static int event_timedwait(event_t* ev, uint64 timeout)
54 {
55 uint64 start = current_time_ms();
56 uint64 now = start;
57 for (;;) {
58 uint64 remain = timeout - (now - start);
59 struct timespec ts;
60 ts.tv_sec = remain / 1000;
61 ts.tv_nsec = (remain % 1000) * 1000 * 1000;
62 syscall(SYS_futex, &ev->state, FUTEX_WAIT, 0, &ts);
63 if (__atomic_load_n(&ev->state, __ATOMIC_RELAXED))
64 return 1;
65 now = current_time_ms();
66 if (now - start > timeout)
67 return 0;
68 }
69 }
70 #endif
71
72 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE || SYZ_ENABLE_NETDEV
73 #include <stdarg.h>
74 #include <stdbool.h>
75 #include <string.h>
76
vsnprintf_check(char * str,size_t size,const char * format,va_list args)77 static void vsnprintf_check(char* str, size_t size, const char* format, va_list args)
78 {
79 int rv;
80
81 rv = vsnprintf(str, size, format, args);
82 if (rv < 0)
83 fail("tun: snprintf failed");
84 if ((size_t)rv >= size)
85 fail("tun: string '%s...' doesn't fit into buffer", str);
86 }
87
88 #define COMMAND_MAX_LEN 128
89 #define PATH_PREFIX "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin "
90 #define PATH_PREFIX_LEN (sizeof(PATH_PREFIX) - 1)
91
execute_command(bool panic,const char * format,...)92 static void execute_command(bool panic, const char* format, ...)
93 {
94 va_list args;
95 char command[PATH_PREFIX_LEN + COMMAND_MAX_LEN];
96 int rv;
97
98 va_start(args, format);
99 // Executor process does not have any env, including PATH.
100 // On some distributions, system/shell adds a minimal PATH, on some it does not.
101 // Set own standard PATH to make it work across distributions.
102 memcpy(command, PATH_PREFIX, PATH_PREFIX_LEN);
103 vsnprintf_check(command + PATH_PREFIX_LEN, COMMAND_MAX_LEN, format, args);
104 va_end(args);
105 rv = system(command);
106 if (rv) {
107 if (panic)
108 fail("command '%s' failed: %d", &command[0], rv);
109 debug("command '%s': %d\n", &command[0], rv);
110 }
111 }
112 #endif
113
114 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE
115 #include <arpa/inet.h>
116 #include <errno.h>
117 #include <fcntl.h>
118 #include <linux/if.h>
119 #include <linux/if_ether.h>
120 #include <linux/if_tun.h>
121 #include <linux/ip.h>
122 #include <linux/tcp.h>
123 #include <net/if_arp.h>
124 #include <stdarg.h>
125 #include <stdbool.h>
126 #include <sys/ioctl.h>
127 #include <sys/stat.h>
128
129 static int tunfd = -1;
130 static int tun_frags_enabled;
131
132 // We just need this to be large enough to hold headers that we parse (ethernet/ip/tcp).
133 // Rest of the packet (if any) will be silently truncated which is fine.
134 #define SYZ_TUN_MAX_PACKET_SIZE 1000
135
136 #define TUN_IFACE "syz_tun"
137
138 #define LOCAL_MAC "aa:aa:aa:aa:aa:aa"
139 #define REMOTE_MAC "aa:aa:aa:aa:aa:bb"
140
141 #define LOCAL_IPV4 "172.20.20.170"
142 #define REMOTE_IPV4 "172.20.20.187"
143
144 #define LOCAL_IPV6 "fe80::aa"
145 #define REMOTE_IPV6 "fe80::bb"
146
147 #ifndef IFF_NAPI
148 #define IFF_NAPI 0x0010
149 #endif
150 #ifndef IFF_NAPI_FRAGS
151 #define IFF_NAPI_FRAGS 0x0020
152 #endif
153
initialize_tun(void)154 static void initialize_tun(void)
155 {
156 #if SYZ_EXECUTOR
157 if (!flag_enable_tun)
158 return;
159 #endif
160 tunfd = open("/dev/net/tun", O_RDWR | O_NONBLOCK);
161 if (tunfd == -1) {
162 #if SYZ_EXECUTOR
163 fail("tun: can't open /dev/net/tun\n");
164 #else
165 printf("tun: can't open /dev/net/tun: please enable CONFIG_TUN=y\n");
166 printf("otherwise fuzzing or reproducing might not work as intended\n");
167 return;
168 #endif
169 }
170 // Remap tun onto higher fd number to hide it from fuzzer and to keep
171 // fd numbers stable regardless of whether tun is opened or not (also see kMaxFd).
172 const int kTunFd = 240;
173 if (dup2(tunfd, kTunFd) < 0)
174 fail("dup2(tunfd, kTunFd) failed");
175 close(tunfd);
176 tunfd = kTunFd;
177
178 struct ifreq ifr;
179 memset(&ifr, 0, sizeof(ifr));
180 strncpy(ifr.ifr_name, TUN_IFACE, IFNAMSIZ);
181 ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS;
182 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0) {
183 // IFF_NAPI_FRAGS requires root, so try without it.
184 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
185 if (ioctl(tunfd, TUNSETIFF, (void*)&ifr) < 0)
186 fail("tun: ioctl(TUNSETIFF) failed");
187 }
188 // If IFF_NAPI_FRAGS is not supported it will be silently dropped,
189 // so query the effective flags.
190 if (ioctl(tunfd, TUNGETIFF, (void*)&ifr) < 0)
191 fail("tun: ioctl(TUNGETIFF) failed");
192 tun_frags_enabled = (ifr.ifr_flags & IFF_NAPI_FRAGS) != 0;
193 debug("tun_frags_enabled=%d\n", tun_frags_enabled);
194
195 // Disable IPv6 DAD, otherwise the address remains unusable until DAD completes.
196 // Don't panic because this is an optional config.
197 execute_command(0, "sysctl -w net.ipv6.conf.%s.accept_dad=0", TUN_IFACE);
198
199 // Disable IPv6 router solicitation to prevent IPv6 spam.
200 // Don't panic because this is an optional config.
201 execute_command(0, "sysctl -w net.ipv6.conf.%s.router_solicitations=0", TUN_IFACE);
202 // There seems to be no way to disable IPv6 MTD to prevent more IPv6 spam.
203
204 execute_command(1, "ip link set dev %s address %s", TUN_IFACE, LOCAL_MAC);
205 execute_command(1, "ip addr add %s/24 dev %s", LOCAL_IPV4, TUN_IFACE);
206 execute_command(1, "ip neigh add %s lladdr %s dev %s nud permanent",
207 REMOTE_IPV4, REMOTE_MAC, TUN_IFACE);
208 // Don't panic because ipv6 may be not enabled in kernel.
209 execute_command(0, "ip -6 addr add %s/120 dev %s", LOCAL_IPV6, TUN_IFACE);
210 execute_command(0, "ip -6 neigh add %s lladdr %s dev %s nud permanent",
211 REMOTE_IPV6, REMOTE_MAC, TUN_IFACE);
212 execute_command(1, "ip link set dev %s up", TUN_IFACE);
213 }
214 #endif
215
216 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
217 #include <arpa/inet.h>
218 #include <errno.h>
219 #include <fcntl.h>
220 #include <linux/if.h>
221 #include <linux/if_ether.h>
222 #include <linux/if_tun.h>
223 #include <linux/ip.h>
224 #include <linux/tcp.h>
225 #include <net/if_arp.h>
226 #include <stdarg.h>
227 #include <stdbool.h>
228 #include <sys/ioctl.h>
229 #include <sys/stat.h>
230 #include <sys/uio.h>
231
232 // Addresses are chosen to be in the same subnet as tun addresses.
233 #define DEV_IPV4 "172.20.20.%d"
234 #define DEV_IPV6 "fe80::%02hx"
235 #define DEV_MAC "aa:aa:aa:aa:aa:%02hx"
236
snprintf_check(char * str,size_t size,const char * format,...)237 static void snprintf_check(char* str, size_t size, const char* format, ...)
238 {
239 va_list args;
240
241 va_start(args, format);
242 vsnprintf_check(str, size, format, args);
243 va_end(args);
244 }
245
246 // We test in a separate namespace, which does not have any network devices initially (even lo).
247 // Create/up as many as we can.
initialize_netdevices(void)248 static void initialize_netdevices(void)
249 {
250 #if SYZ_EXECUTOR
251 if (!flag_enable_net_dev)
252 return;
253 #endif
254 unsigned i;
255 const char* devtypes[] = {"ip6gretap", "bridge", "vcan", "bond", "team"};
256 // If you extend this array, also update netdev_addr_id in vnet.txt.
257 const char* devnames[] = {"lo", "sit0", "bridge0", "vcan0", "tunl0",
258 "gre0", "gretap0", "ip_vti0", "ip6_vti0",
259 "ip6tnl0", "ip6gre0", "ip6gretap0",
260 "erspan0", "bond0", "veth0", "veth1", "team0",
261 "veth0_to_bridge", "veth1_to_bridge",
262 "veth0_to_bond", "veth1_to_bond",
263 "veth0_to_team", "veth1_to_team"};
264 const char* devmasters[] = {"bridge", "bond", "team"};
265
266 for (i = 0; i < sizeof(devtypes) / (sizeof(devtypes[0])); i++)
267 execute_command(0, "ip link add dev %s0 type %s", devtypes[i], devtypes[i]);
268 // This adds connected veth0 and veth1 devices.
269 execute_command(0, "ip link add type veth");
270
271 // This creates connected bridge/bond/team_slave devices of type veth,
272 // and makes them slaves of bridge/bond/team devices, respectively.
273 // Note: slave devices don't need MAC/IP addresses, only master devices.
274 // veth0_to_* is not slave devices, which still need ip addresses.
275 for (i = 0; i < sizeof(devmasters) / (sizeof(devmasters[0])); i++) {
276 execute_command(0, "ip link add name %s_slave_0 type veth peer name veth0_to_%s", devmasters[i], devmasters[i]);
277 execute_command(0, "ip link add name %s_slave_1 type veth peer name veth1_to_%s", devmasters[i], devmasters[i]);
278 execute_command(0, "ip link set %s_slave_0 master %s0", devmasters[i], devmasters[i]);
279 execute_command(0, "ip link set %s_slave_1 master %s0", devmasters[i], devmasters[i]);
280 execute_command(0, "ip link set veth0_to_%s up", devmasters[i]);
281 execute_command(0, "ip link set veth1_to_%s up", devmasters[i]);
282 }
283 // bond/team_slave_* will set up automatically when set their master.
284 // But bridge_slave_* need to set up manually.
285 execute_command(0, "ip link set bridge_slave_0 up");
286 execute_command(0, "ip link set bridge_slave_1 up");
287
288 for (i = 0; i < sizeof(devnames) / (sizeof(devnames[0])); i++) {
289 char addr[32];
290 // Assign some unique address to devices. Some devices won't up without this.
291 // Devices that don't need these addresses will simply ignore them.
292 // Shift addresses by 10 because 0 subnet address can mean special things.
293 snprintf_check(addr, sizeof(addr), DEV_IPV4, i + 10);
294 execute_command(0, "ip -4 addr add %s/24 dev %s", addr, devnames[i]);
295 snprintf_check(addr, sizeof(addr), DEV_IPV6, i + 10);
296 execute_command(0, "ip -6 addr add %s/120 dev %s", addr, devnames[i]);
297 snprintf_check(addr, sizeof(addr), DEV_MAC, i + 10);
298 execute_command(0, "ip link set dev %s address %s", devnames[i], addr);
299 execute_command(0, "ip link set dev %s up", devnames[i]);
300 }
301 }
302 #endif
303
304 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE && (__NR_syz_extract_tcp_res || SYZ_REPEAT)
305 #include <errno.h>
306
read_tun(char * data,int size)307 static int read_tun(char* data, int size)
308 {
309 if (tunfd < 0)
310 return -1;
311
312 int rv = read(tunfd, data, size);
313 if (rv < 0) {
314 if (errno == EAGAIN)
315 return -1;
316 // Tun sometimes returns this, unclear if it's a kernel bug or not.
317 if (errno == EBADFD)
318 return -1;
319 fail("tun: read failed with %d", rv);
320 }
321 return rv;
322 }
323 #endif
324
325 #if SYZ_EXECUTOR || __NR_syz_emit_ethernet && SYZ_TUN_ENABLE
326 #include <stdbool.h>
327 #include <sys/uio.h>
328
329 #define MAX_FRAGS 4
330 struct vnet_fragmentation {
331 uint32 full;
332 uint32 count;
333 uint32 frags[MAX_FRAGS];
334 };
335
syz_emit_ethernet(long a0,long a1,long a2)336 static long syz_emit_ethernet(long a0, long a1, long a2)
337 {
338 // syz_emit_ethernet(len len[packet], packet ptr[in, eth_packet], frags ptr[in, vnet_fragmentation, opt])
339 // vnet_fragmentation {
340 // full int32[0:1]
341 // count int32[1:4]
342 // frags array[int32[0:4096], 4]
343 // }
344 if (tunfd < 0)
345 return (uintptr_t)-1;
346
347 uint32 length = a0;
348 char* data = (char*)a1;
349 debug_dump_data(data, length);
350
351 struct vnet_fragmentation* frags = (struct vnet_fragmentation*)a2;
352 struct iovec vecs[MAX_FRAGS + 1];
353 uint32 nfrags = 0;
354 if (!tun_frags_enabled || frags == NULL) {
355 vecs[nfrags].iov_base = data;
356 vecs[nfrags].iov_len = length;
357 nfrags++;
358 } else {
359 bool full = true;
360 uint32 i, count = 0;
361 NONFAILING(full = frags->full);
362 NONFAILING(count = frags->count);
363 if (count > MAX_FRAGS)
364 count = MAX_FRAGS;
365 for (i = 0; i < count && length != 0; i++) {
366 uint32 size = 0;
367 NONFAILING(size = frags->frags[i]);
368 if (size > length)
369 size = length;
370 vecs[nfrags].iov_base = data;
371 vecs[nfrags].iov_len = size;
372 nfrags++;
373 data += size;
374 length -= size;
375 }
376 if (length != 0 && (full || nfrags == 0)) {
377 vecs[nfrags].iov_base = data;
378 vecs[nfrags].iov_len = length;
379 nfrags++;
380 }
381 }
382 return writev(tunfd, vecs, nfrags);
383 }
384 #endif
385
386 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_TUN_ENABLE
flush_tun()387 static void flush_tun()
388 {
389 #if SYZ_EXECUTOR
390 if (!flag_enable_tun)
391 return;
392 #endif
393 char data[SYZ_TUN_MAX_PACKET_SIZE];
394 while (read_tun(&data[0], sizeof(data)) != -1) {
395 }
396 }
397 #endif
398
399 #if SYZ_EXECUTOR || __NR_syz_extract_tcp_res && SYZ_TUN_ENABLE
400 #ifndef __ANDROID__
401 // Can't include <linux/ipv6.h>, since it causes
402 // conflicts due to some structs redefinition.
403 struct ipv6hdr {
404 __u8 priority : 4,
405 version : 4;
406 __u8 flow_lbl[3];
407
408 __be16 payload_len;
409 __u8 nexthdr;
410 __u8 hop_limit;
411
412 struct in6_addr saddr;
413 struct in6_addr daddr;
414 };
415 #endif
416
417 struct tcp_resources {
418 uint32 seq;
419 uint32 ack;
420 };
421
syz_extract_tcp_res(long a0,long a1,long a2)422 static long syz_extract_tcp_res(long a0, long a1, long a2)
423 {
424 // syz_extract_tcp_res(res ptr[out, tcp_resources], seq_inc int32, ack_inc int32)
425
426 if (tunfd < 0)
427 return (uintptr_t)-1;
428
429 char data[SYZ_TUN_MAX_PACKET_SIZE];
430 int rv = read_tun(&data[0], sizeof(data));
431 if (rv == -1)
432 return (uintptr_t)-1;
433 size_t length = rv;
434 debug_dump_data(data, length);
435
436 struct tcphdr* tcphdr;
437
438 if (length < sizeof(struct ethhdr))
439 return (uintptr_t)-1;
440 struct ethhdr* ethhdr = (struct ethhdr*)&data[0];
441
442 if (ethhdr->h_proto == htons(ETH_P_IP)) {
443 if (length < sizeof(struct ethhdr) + sizeof(struct iphdr))
444 return (uintptr_t)-1;
445 struct iphdr* iphdr = (struct iphdr*)&data[sizeof(struct ethhdr)];
446 if (iphdr->protocol != IPPROTO_TCP)
447 return (uintptr_t)-1;
448 if (length < sizeof(struct ethhdr) + iphdr->ihl * 4 + sizeof(struct tcphdr))
449 return (uintptr_t)-1;
450 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + iphdr->ihl * 4];
451 } else {
452 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
453 return (uintptr_t)-1;
454 struct ipv6hdr* ipv6hdr = (struct ipv6hdr*)&data[sizeof(struct ethhdr)];
455 // TODO: parse and skip extension headers.
456 if (ipv6hdr->nexthdr != IPPROTO_TCP)
457 return (uintptr_t)-1;
458 if (length < sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
459 return (uintptr_t)-1;
460 tcphdr = (struct tcphdr*)&data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr)];
461 }
462
463 struct tcp_resources* res = (struct tcp_resources*)a0;
464 NONFAILING(res->seq = htonl((ntohl(tcphdr->seq) + (uint32)a1)));
465 NONFAILING(res->ack = htonl((ntohl(tcphdr->ack_seq) + (uint32)a2)));
466
467 debug("extracted seq: %08x\n", res->seq);
468 debug("extracted ack: %08x\n", res->ack);
469
470 return 0;
471 }
472 #endif
473
474 #if SYZ_EXECUTOR || __NR_syz_open_dev
475 #include <fcntl.h>
476 #include <string.h>
477 #include <sys/stat.h>
478 #include <sys/types.h>
479
syz_open_dev(long a0,long a1,long a2)480 static long syz_open_dev(long a0, long a1, long a2)
481 {
482 if (a0 == 0xc || a0 == 0xb) {
483 // syz_open_dev$char(dev const[0xc], major intptr, minor intptr) fd
484 // syz_open_dev$block(dev const[0xb], major intptr, minor intptr) fd
485 char buf[128];
486 sprintf(buf, "/dev/%s/%d:%d", a0 == 0xc ? "char" : "block", (uint8)a1, (uint8)a2);
487 return open(buf, O_RDWR, 0);
488 } else {
489 // syz_open_dev(dev strconst, id intptr, flags flags[open_flags]) fd
490 char buf[1024];
491 char* hash;
492 NONFAILING(strncpy(buf, (char*)a0, sizeof(buf) - 1));
493 buf[sizeof(buf) - 1] = 0;
494 while ((hash = strchr(buf, '#'))) {
495 *hash = '0' + (char)(a1 % 10); // 10 devices should be enough for everyone.
496 a1 /= 10;
497 }
498 return open(buf, a2, 0);
499 }
500 }
501 #endif
502
503 #if SYZ_EXECUTOR || __NR_syz_open_procfs
504 #include <fcntl.h>
505 #include <string.h>
506 #include <sys/stat.h>
507 #include <sys/types.h>
508
syz_open_procfs(long a0,long a1)509 static long syz_open_procfs(long a0, long a1)
510 {
511 // syz_open_procfs(pid pid, file ptr[in, string[procfs_file]]) fd
512
513 char buf[128];
514 memset(buf, 0, sizeof(buf));
515 if (a0 == 0) {
516 NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/%s", (char*)a1));
517 } else if (a0 == -1) {
518 NONFAILING(snprintf(buf, sizeof(buf), "/proc/thread-self/%s", (char*)a1));
519 } else {
520 NONFAILING(snprintf(buf, sizeof(buf), "/proc/self/task/%d/%s", (int)a0, (char*)a1));
521 }
522 int fd = open(buf, O_RDWR);
523 if (fd == -1)
524 fd = open(buf, O_RDONLY);
525 return fd;
526 }
527 #endif
528
529 #if SYZ_EXECUTOR || __NR_syz_open_pts
530 #include <fcntl.h>
531 #include <sys/ioctl.h>
532 #include <sys/stat.h>
533 #include <sys/types.h>
534
syz_open_pts(long a0,long a1)535 static long syz_open_pts(long a0, long a1)
536 {
537 // syz_openpts(fd fd[tty], flags flags[open_flags]) fd[tty]
538 int ptyno = 0;
539 if (ioctl(a0, TIOCGPTN, &ptyno))
540 return -1;
541 char buf[128];
542 sprintf(buf, "/dev/pts/%d", ptyno);
543 return open(buf, a1, 0);
544 }
545 #endif
546
547 #if SYZ_EXECUTOR || __NR_syz_init_net_socket
548 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
549 #include <fcntl.h>
550 #include <sched.h>
551 #include <sys/stat.h>
552 #include <sys/types.h>
553 #include <unistd.h>
554
555 const int kInitNetNsFd = 239; // see kMaxFd
556 // syz_init_net_socket opens a socket in init net namespace.
557 // Used for families that can only be created in init net namespace.
syz_init_net_socket(long domain,long type,long proto)558 static long syz_init_net_socket(long domain, long type, long proto)
559 {
560 int netns = open("/proc/self/ns/net", O_RDONLY);
561 if (netns == -1)
562 return netns;
563 if (setns(kInitNetNsFd, 0))
564 return -1;
565 int sock = syscall(__NR_socket, domain, type, proto);
566 int err = errno;
567 if (setns(netns, 0))
568 fail("setns(netns) failed");
569 close(netns);
570 errno = err;
571 return sock;
572 }
573 #else
syz_init_net_socket(long domain,long type,long proto)574 static long syz_init_net_socket(long domain, long type, long proto)
575 {
576 return syscall(__NR_socket, domain, type, proto);
577 }
578 #endif
579 #endif
580
581 #if SYZ_EXECUTOR || __NR_syz_genetlink_get_family_id
582 #include <errno.h>
583 #include <linux/genetlink.h>
584 #include <linux/netlink.h>
585 #include <sys/socket.h>
586 #include <sys/types.h>
587
syz_genetlink_get_family_id(long name)588 static long syz_genetlink_get_family_id(long name)
589 {
590 char buf[512] = {0};
591 struct nlmsghdr* hdr = (struct nlmsghdr*)buf;
592 struct genlmsghdr* genlhdr = (struct genlmsghdr*)NLMSG_DATA(hdr);
593 struct nlattr* attr = (struct nlattr*)(genlhdr + 1);
594 hdr->nlmsg_len = sizeof(*hdr) + sizeof(*genlhdr) + sizeof(*attr) + GENL_NAMSIZ;
595 hdr->nlmsg_type = GENL_ID_CTRL;
596 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
597 genlhdr->cmd = CTRL_CMD_GETFAMILY;
598 attr->nla_type = CTRL_ATTR_FAMILY_NAME;
599 attr->nla_len = sizeof(*attr) + GENL_NAMSIZ;
600 NONFAILING(strncpy((char*)(attr + 1), (char*)name, GENL_NAMSIZ));
601 struct iovec iov = {hdr, hdr->nlmsg_len};
602 struct sockaddr_nl addr = {0};
603 addr.nl_family = AF_NETLINK;
604 debug("syz_genetlink_get_family_id(%s)\n", (char*)(attr + 1));
605 int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
606 if (fd == -1) {
607 debug("syz_genetlink_get_family_id: socket failed: %d\n", errno);
608 return -1;
609 }
610 struct msghdr msg = {&addr, sizeof(addr), &iov, 1, NULL, 0, 0};
611 if (sendmsg(fd, &msg, 0) == -1) {
612 debug("syz_genetlink_get_family_id: sendmsg failed: %d\n", errno);
613 close(fd);
614 return -1;
615 }
616 ssize_t n = recv(fd, buf, sizeof(buf), 0);
617 close(fd);
618 if (n <= 0) {
619 debug("syz_genetlink_get_family_id: recv failed: %d\n", errno);
620 return -1;
621 }
622 if (hdr->nlmsg_type != GENL_ID_CTRL) {
623 debug("syz_genetlink_get_family_id: wrong reply type: %d\n", hdr->nlmsg_type);
624 return -1;
625 }
626 for (; (char*)attr < buf + n; attr = (struct nlattr*)((char*)attr + NLMSG_ALIGN(attr->nla_len))) {
627 if (attr->nla_type == CTRL_ATTR_FAMILY_ID)
628 return *(uint16*)(attr + 1);
629 }
630 debug("syz_genetlink_get_family_id: no CTRL_ATTR_FAMILY_ID attr\n");
631 return -1;
632 }
633 #endif
634
635 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
636 #include <errno.h>
637 #include <fcntl.h>
638 #include <linux/loop.h>
639 #include <sys/ioctl.h>
640 #include <sys/stat.h>
641 #include <sys/types.h>
642
643 struct fs_image_segment {
644 void* data;
645 uintptr_t size;
646 uintptr_t offset;
647 };
648
649 #define IMAGE_MAX_SEGMENTS 4096
650 #define IMAGE_MAX_SIZE (129 << 20)
651
652 #if GOARCH_386
653 #define SYZ_memfd_create 356
654 #elif GOARCH_amd64
655 #define SYZ_memfd_create 319
656 #elif GOARCH_arm
657 #define SYZ_memfd_create 385
658 #elif GOARCH_arm64
659 #define SYZ_memfd_create 279
660 #elif GOARCH_ppc64le
661 #define SYZ_memfd_create 360
662 #endif
663 #endif
664
665 #if SYZ_EXECUTOR || __NR_syz_read_part_table
666 // syz_read_part_table(size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]])
syz_read_part_table(unsigned long size,unsigned long nsegs,long segments)667 static long syz_read_part_table(unsigned long size, unsigned long nsegs, long segments)
668 {
669 char loopname[64], linkname[64];
670 int loopfd, err = 0, res = -1;
671 unsigned long i, j;
672 // See the comment in syz_mount_image.
673 struct fs_image_segment* segs = (struct fs_image_segment*)segments;
674
675 if (nsegs > IMAGE_MAX_SEGMENTS)
676 nsegs = IMAGE_MAX_SEGMENTS;
677 for (i = 0; i < nsegs; i++) {
678 if (segs[i].size > IMAGE_MAX_SIZE)
679 segs[i].size = IMAGE_MAX_SIZE;
680 segs[i].offset %= IMAGE_MAX_SIZE;
681 if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size)
682 segs[i].offset = IMAGE_MAX_SIZE - segs[i].size;
683 if (size < segs[i].offset + segs[i].offset)
684 size = segs[i].offset + segs[i].offset;
685 }
686 if (size > IMAGE_MAX_SIZE)
687 size = IMAGE_MAX_SIZE;
688 int memfd = syscall(SYZ_memfd_create, "syz_read_part_table", 0);
689 if (memfd == -1) {
690 err = errno;
691 goto error;
692 }
693 if (ftruncate(memfd, size)) {
694 err = errno;
695 goto error_close_memfd;
696 }
697 for (i = 0; i < nsegs; i++) {
698 if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) {
699 debug("syz_read_part_table: pwrite[%u] failed: %d\n", (int)i, errno);
700 }
701 }
702 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
703 loopfd = open(loopname, O_RDWR);
704 if (loopfd == -1) {
705 err = errno;
706 goto error_close_memfd;
707 }
708 if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
709 if (errno != EBUSY) {
710 err = errno;
711 goto error_close_loop;
712 }
713 ioctl(loopfd, LOOP_CLR_FD, 0);
714 usleep(1000);
715 if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
716 err = errno;
717 goto error_close_loop;
718 }
719 }
720 struct loop_info64 info;
721 if (ioctl(loopfd, LOOP_GET_STATUS64, &info)) {
722 err = errno;
723 goto error_clear_loop;
724 }
725 #if SYZ_EXECUTOR
726 cover_reset(0);
727 #endif
728 info.lo_flags |= LO_FLAGS_PARTSCAN;
729 if (ioctl(loopfd, LOOP_SET_STATUS64, &info)) {
730 err = errno;
731 goto error_clear_loop;
732 }
733 res = 0;
734 // If we managed to parse some partitions, symlink them into our work dir.
735 for (i = 1, j = 0; i < 8; i++) {
736 snprintf(loopname, sizeof(loopname), "/dev/loop%llup%d", procid, (int)i);
737 struct stat statbuf;
738 if (stat(loopname, &statbuf) == 0) {
739 snprintf(linkname, sizeof(linkname), "./file%d", (int)j++);
740 if (symlink(loopname, linkname)) {
741 debug("syz_read_part_table: symlink(%s, %s) failed: %d\n", loopname, linkname, errno);
742 }
743 }
744 }
745 error_clear_loop:
746 ioctl(loopfd, LOOP_CLR_FD, 0);
747 error_close_loop:
748 close(loopfd);
749 error_close_memfd:
750 close(memfd);
751 error:
752 errno = err;
753 return res;
754 }
755 #endif
756
757 #if SYZ_EXECUTOR || __NR_syz_mount_image
758 #include <string.h>
759 #include <sys/mount.h>
760
761 //syz_mount_image(fs ptr[in, string[disk_filesystems]], dir ptr[in, filename], size intptr, nsegs len[segments], segments ptr[in, array[fs_image_segment]], flags flags[mount_flags], opts ptr[in, fs_options[vfat_options]])
762 //fs_image_segment {
763 // data ptr[in, array[int8]]
764 // size len[data, intptr]
765 // offset intptr
766 //}
syz_mount_image(long fsarg,long dir,unsigned long size,unsigned long nsegs,long segments,long flags,long optsarg)767 static long syz_mount_image(long fsarg, long dir, unsigned long size, unsigned long nsegs, long segments, long flags, long optsarg)
768 {
769 char loopname[64], fs[32], opts[256];
770 int loopfd, err = 0, res = -1;
771 unsigned long i;
772 // Strictly saying we ought to do a nonfailing copyout of segments into a local var.
773 // But some filesystems have large number of segments (2000+),
774 // we can't allocate that much on stack and allocating elsewhere is problematic,
775 // so we just use the memory allocated by fuzzer.
776 struct fs_image_segment* segs = (struct fs_image_segment*)segments;
777
778 if (nsegs > IMAGE_MAX_SEGMENTS)
779 nsegs = IMAGE_MAX_SEGMENTS;
780 for (i = 0; i < nsegs; i++) {
781 if (segs[i].size > IMAGE_MAX_SIZE)
782 segs[i].size = IMAGE_MAX_SIZE;
783 segs[i].offset %= IMAGE_MAX_SIZE;
784 if (segs[i].offset > IMAGE_MAX_SIZE - segs[i].size)
785 segs[i].offset = IMAGE_MAX_SIZE - segs[i].size;
786 if (size < segs[i].offset + segs[i].offset)
787 size = segs[i].offset + segs[i].offset;
788 }
789 if (size > IMAGE_MAX_SIZE)
790 size = IMAGE_MAX_SIZE;
791 int memfd = syscall(SYZ_memfd_create, "syz_mount_image", 0);
792 if (memfd == -1) {
793 err = errno;
794 goto error;
795 }
796 if (ftruncate(memfd, size)) {
797 err = errno;
798 goto error_close_memfd;
799 }
800 for (i = 0; i < nsegs; i++) {
801 if (pwrite(memfd, segs[i].data, segs[i].size, segs[i].offset) < 0) {
802 debug("syz_mount_image: pwrite[%u] failed: %d\n", (int)i, errno);
803 }
804 }
805 snprintf(loopname, sizeof(loopname), "/dev/loop%llu", procid);
806 loopfd = open(loopname, O_RDWR);
807 if (loopfd == -1) {
808 err = errno;
809 goto error_close_memfd;
810 }
811 if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
812 if (errno != EBUSY) {
813 err = errno;
814 goto error_close_loop;
815 }
816 ioctl(loopfd, LOOP_CLR_FD, 0);
817 usleep(1000);
818 if (ioctl(loopfd, LOOP_SET_FD, memfd)) {
819 err = errno;
820 goto error_close_loop;
821 }
822 }
823 mkdir((char*)dir, 0777);
824 memset(fs, 0, sizeof(fs));
825 NONFAILING(strncpy(fs, (char*)fsarg, sizeof(fs) - 1));
826 memset(opts, 0, sizeof(opts));
827 // Leave some space for the additional options we append below.
828 NONFAILING(strncpy(opts, (char*)optsarg, sizeof(opts) - 32));
829 if (strcmp(fs, "iso9660") == 0) {
830 flags |= MS_RDONLY;
831 } else if (strncmp(fs, "ext", 3) == 0) {
832 // For ext2/3/4 we have to have errors=continue because the image
833 // can contain errors=panic flag and can legally crash kernel.
834 if (strstr(opts, "errors=panic") || strstr(opts, "errors=remount-ro") == 0)
835 strcat(opts, ",errors=continue");
836 } else if (strcmp(fs, "xfs") == 0) {
837 // For xfs we need nouuid because xfs has a global uuids table
838 // and if two parallel executors mounts fs with the same uuid, second mount fails.
839 strcat(opts, ",nouuid");
840 }
841 debug("syz_mount_image: size=%llu segs=%llu loop='%s' dir='%s' fs='%s' flags=%llu opts='%s'\n", (uint64)size, (uint64)nsegs, loopname, (char*)dir, fs, (uint64)flags, opts);
842 #if SYZ_EXECUTOR
843 cover_reset(0);
844 #endif
845 if (mount(loopname, (char*)dir, fs, flags, opts)) {
846 err = errno;
847 goto error_clear_loop;
848 }
849 res = 0;
850 error_clear_loop:
851 ioctl(loopfd, LOOP_CLR_FD, 0);
852 error_close_loop:
853 close(loopfd);
854 error_close_memfd:
855 close(memfd);
856 error:
857 errno = err;
858 return res;
859 }
860 #endif
861
862 #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
863 #include <errno.h>
864 #include <fcntl.h>
865 #include <linux/kvm.h>
866 #include <stdarg.h>
867 #include <stddef.h>
868 #include <sys/ioctl.h>
869 #include <sys/stat.h>
870
871 #if defined(__x86_64__)
872 #include "common_kvm_amd64.h"
873 #elif defined(__aarch64__)
874 #include "common_kvm_arm64.h"
875 #else
syz_kvm_setup_cpu(long a0,long a1,long a2,long a3,long a4,long a5,long a6,long a7)876 static long syz_kvm_setup_cpu(long a0, long a1, long a2, long a3, long a4, long a5, long a6, long a7)
877 {
878 return 0;
879 }
880 #endif
881 #endif
882
883 #if SYZ_EXECUTOR || SYZ_FAULT_INJECTION || SYZ_SANDBOX_NAMESPACE || SYZ_ENABLE_CGROUPS
884 #include <errno.h>
885 #include <fcntl.h>
886 #include <stdarg.h>
887 #include <stdbool.h>
888 #include <string.h>
889 #include <sys/stat.h>
890 #include <sys/types.h>
891
write_file(const char * file,const char * what,...)892 static bool write_file(const char* file, const char* what, ...)
893 {
894 char buf[1024];
895 va_list args;
896 va_start(args, what);
897 vsnprintf(buf, sizeof(buf), what, args);
898 va_end(args);
899 buf[sizeof(buf) - 1] = 0;
900 int len = strlen(buf);
901
902 int fd = open(file, O_WRONLY | O_CLOEXEC);
903 if (fd == -1)
904 return false;
905 if (write(fd, buf, len) != len) {
906 int err = errno;
907 close(fd);
908 errno = err;
909 return false;
910 }
911 close(fd);
912 return true;
913 }
914 #endif
915
916 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
917 #include <errno.h>
918 #include <linux/net.h>
919 #include <netinet/in.h>
920 #include <string.h>
921 #include <sys/socket.h>
922
923 // checkpoint/reset_net_namespace partially resets net namespace to initial state
924 // after each test. Currently it resets only ipv4 netfilter state.
925 // Ideally, we just create a new net namespace for each test,
926 // however it's too slow (1-1.5 seconds per namespace, not parallelizable).
927
928 // Linux headers do not compile for C++, so we have to define the structs manualy.
929 #define XT_TABLE_SIZE 1536
930 #define XT_MAX_ENTRIES 10
931
932 struct xt_counters {
933 uint64 pcnt, bcnt;
934 };
935
936 struct ipt_getinfo {
937 char name[32];
938 unsigned int valid_hooks;
939 unsigned int hook_entry[5];
940 unsigned int underflow[5];
941 unsigned int num_entries;
942 unsigned int size;
943 };
944
945 struct ipt_get_entries {
946 char name[32];
947 unsigned int size;
948 void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
949 };
950
951 struct ipt_replace {
952 char name[32];
953 unsigned int valid_hooks;
954 unsigned int num_entries;
955 unsigned int size;
956 unsigned int hook_entry[5];
957 unsigned int underflow[5];
958 unsigned int num_counters;
959 struct xt_counters* counters;
960 char entrytable[XT_TABLE_SIZE];
961 };
962
963 struct ipt_table_desc {
964 const char* name;
965 struct ipt_getinfo info;
966 struct ipt_replace replace;
967 };
968
969 static struct ipt_table_desc ipv4_tables[] = {
970 {.name = "filter"},
971 {.name = "nat"},
972 {.name = "mangle"},
973 {.name = "raw"},
974 {.name = "security"},
975 };
976
977 static struct ipt_table_desc ipv6_tables[] = {
978 {.name = "filter"},
979 {.name = "nat"},
980 {.name = "mangle"},
981 {.name = "raw"},
982 {.name = "security"},
983 };
984
985 #define IPT_BASE_CTL 64
986 #define IPT_SO_SET_REPLACE (IPT_BASE_CTL)
987 #define IPT_SO_GET_INFO (IPT_BASE_CTL)
988 #define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1)
989
990 struct arpt_getinfo {
991 char name[32];
992 unsigned int valid_hooks;
993 unsigned int hook_entry[3];
994 unsigned int underflow[3];
995 unsigned int num_entries;
996 unsigned int size;
997 };
998
999 struct arpt_get_entries {
1000 char name[32];
1001 unsigned int size;
1002 void* entrytable[XT_TABLE_SIZE / sizeof(void*)];
1003 };
1004
1005 struct arpt_replace {
1006 char name[32];
1007 unsigned int valid_hooks;
1008 unsigned int num_entries;
1009 unsigned int size;
1010 unsigned int hook_entry[3];
1011 unsigned int underflow[3];
1012 unsigned int num_counters;
1013 struct xt_counters* counters;
1014 char entrytable[XT_TABLE_SIZE];
1015 };
1016
1017 struct arpt_table_desc {
1018 const char* name;
1019 struct arpt_getinfo info;
1020 struct arpt_replace replace;
1021 };
1022
1023 static struct arpt_table_desc arpt_tables[] = {
1024 {.name = "filter"},
1025 };
1026
1027 #define ARPT_BASE_CTL 96
1028 #define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL)
1029 #define ARPT_SO_GET_INFO (ARPT_BASE_CTL)
1030 #define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1)
1031
checkpoint_iptables(struct ipt_table_desc * tables,int num_tables,int family,int level)1032 static void checkpoint_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
1033 {
1034 struct ipt_get_entries entries;
1035 socklen_t optlen;
1036 int fd, i;
1037
1038 fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
1039 if (fd == -1) {
1040 switch (errno) {
1041 case EAFNOSUPPORT:
1042 case ENOPROTOOPT:
1043 return;
1044 }
1045 fail("iptable checkpoint %d: socket failed", family);
1046 }
1047 for (i = 0; i < num_tables; i++) {
1048 struct ipt_table_desc* table = &tables[i];
1049 strcpy(table->info.name, table->name);
1050 strcpy(table->replace.name, table->name);
1051 optlen = sizeof(table->info);
1052 if (getsockopt(fd, level, IPT_SO_GET_INFO, &table->info, &optlen)) {
1053 switch (errno) {
1054 case EPERM:
1055 case ENOENT:
1056 case ENOPROTOOPT:
1057 continue;
1058 }
1059 fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
1060 }
1061 debug("iptable checkpoint %s/%d: checkpoint entries=%d hooks=%x size=%d\n",
1062 table->name, family, table->info.num_entries,
1063 table->info.valid_hooks, table->info.size);
1064 if (table->info.size > sizeof(table->replace.entrytable))
1065 fail("iptable checkpoint %s/%d: table size is too large: %u",
1066 table->name, family, table->info.size);
1067 if (table->info.num_entries > XT_MAX_ENTRIES)
1068 fail("iptable checkpoint %s/%d: too many counters: %u",
1069 table->name, family, table->info.num_entries);
1070 memset(&entries, 0, sizeof(entries));
1071 strcpy(entries.name, table->name);
1072 entries.size = table->info.size;
1073 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
1074 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
1075 fail("iptable checkpoint %s/%d: getsockopt(IPT_SO_GET_ENTRIES)",
1076 table->name, family);
1077 table->replace.valid_hooks = table->info.valid_hooks;
1078 table->replace.num_entries = table->info.num_entries;
1079 table->replace.size = table->info.size;
1080 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
1081 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
1082 memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
1083 }
1084 close(fd);
1085 }
1086
reset_iptables(struct ipt_table_desc * tables,int num_tables,int family,int level)1087 static void reset_iptables(struct ipt_table_desc* tables, int num_tables, int family, int level)
1088 {
1089 struct xt_counters counters[XT_MAX_ENTRIES];
1090 struct ipt_get_entries entries;
1091 struct ipt_getinfo info;
1092 socklen_t optlen;
1093 int fd, i;
1094
1095 fd = socket(family, SOCK_STREAM, IPPROTO_TCP);
1096 if (fd == -1) {
1097 switch (errno) {
1098 case EAFNOSUPPORT:
1099 case ENOPROTOOPT:
1100 return;
1101 }
1102 fail("iptable %d: socket failed", family);
1103 }
1104 for (i = 0; i < num_tables; i++) {
1105 struct ipt_table_desc* table = &tables[i];
1106 if (table->info.valid_hooks == 0)
1107 continue;
1108 memset(&info, 0, sizeof(info));
1109 strcpy(info.name, table->name);
1110 optlen = sizeof(info);
1111 if (getsockopt(fd, level, IPT_SO_GET_INFO, &info, &optlen))
1112 fail("iptable %s/%d: getsockopt(IPT_SO_GET_INFO)", table->name, family);
1113 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
1114 memset(&entries, 0, sizeof(entries));
1115 strcpy(entries.name, table->name);
1116 entries.size = table->info.size;
1117 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
1118 if (getsockopt(fd, level, IPT_SO_GET_ENTRIES, &entries, &optlen))
1119 fail("iptable %s/%d: getsockopt(IPT_SO_GET_ENTRIES)", table->name, family);
1120 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
1121 continue;
1122 }
1123 debug("iptable %s/%d: resetting\n", table->name, family);
1124 table->replace.num_counters = info.num_entries;
1125 table->replace.counters = counters;
1126 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
1127 if (setsockopt(fd, level, IPT_SO_SET_REPLACE, &table->replace, optlen))
1128 fail("iptable %s/%d: setsockopt(IPT_SO_SET_REPLACE)", table->name, family);
1129 }
1130 close(fd);
1131 }
1132
checkpoint_arptables(void)1133 static void checkpoint_arptables(void)
1134 {
1135 struct arpt_get_entries entries;
1136 socklen_t optlen;
1137 unsigned i;
1138 int fd;
1139
1140 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1141 if (fd == -1) {
1142 switch (errno) {
1143 case EAFNOSUPPORT:
1144 case ENOPROTOOPT:
1145 return;
1146 }
1147 fail("arptable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
1148 }
1149 for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
1150 struct arpt_table_desc* table = &arpt_tables[i];
1151 strcpy(table->info.name, table->name);
1152 strcpy(table->replace.name, table->name);
1153 optlen = sizeof(table->info);
1154 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &table->info, &optlen)) {
1155 switch (errno) {
1156 case EPERM:
1157 case ENOENT:
1158 case ENOPROTOOPT:
1159 continue;
1160 }
1161 fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_INFO)", table->name);
1162 }
1163 debug("arptable checkpoint %s: entries=%d hooks=%x size=%d\n",
1164 table->name, table->info.num_entries, table->info.valid_hooks, table->info.size);
1165 if (table->info.size > sizeof(table->replace.entrytable))
1166 fail("arptable checkpoint %s: table size is too large: %u",
1167 table->name, table->info.size);
1168 if (table->info.num_entries > XT_MAX_ENTRIES)
1169 fail("arptable checkpoint %s: too many counters: %u",
1170 table->name, table->info.num_entries);
1171 memset(&entries, 0, sizeof(entries));
1172 strcpy(entries.name, table->name);
1173 entries.size = table->info.size;
1174 optlen = sizeof(entries) - sizeof(entries.entrytable) + table->info.size;
1175 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
1176 fail("arptable checkpoint %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
1177 table->replace.valid_hooks = table->info.valid_hooks;
1178 table->replace.num_entries = table->info.num_entries;
1179 table->replace.size = table->info.size;
1180 memcpy(table->replace.hook_entry, table->info.hook_entry, sizeof(table->replace.hook_entry));
1181 memcpy(table->replace.underflow, table->info.underflow, sizeof(table->replace.underflow));
1182 memcpy(table->replace.entrytable, entries.entrytable, table->info.size);
1183 }
1184 close(fd);
1185 }
1186
reset_arptables()1187 static void reset_arptables()
1188 {
1189 struct xt_counters counters[XT_MAX_ENTRIES];
1190 struct arpt_get_entries entries;
1191 struct arpt_getinfo info;
1192 socklen_t optlen;
1193 unsigned i;
1194 int fd;
1195
1196 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1197 if (fd == -1) {
1198 switch (errno) {
1199 case EAFNOSUPPORT:
1200 case ENOPROTOOPT:
1201 return;
1202 }
1203 fail("arptable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
1204 }
1205 for (i = 0; i < sizeof(arpt_tables) / sizeof(arpt_tables[0]); i++) {
1206 struct arpt_table_desc* table = &arpt_tables[i];
1207 if (table->info.valid_hooks == 0)
1208 continue;
1209 memset(&info, 0, sizeof(info));
1210 strcpy(info.name, table->name);
1211 optlen = sizeof(info);
1212 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_INFO, &info, &optlen))
1213 fail("arptable %s:getsockopt(ARPT_SO_GET_INFO)", table->name);
1214 if (memcmp(&table->info, &info, sizeof(table->info)) == 0) {
1215 memset(&entries, 0, sizeof(entries));
1216 strcpy(entries.name, table->name);
1217 entries.size = table->info.size;
1218 optlen = sizeof(entries) - sizeof(entries.entrytable) + entries.size;
1219 if (getsockopt(fd, SOL_IP, ARPT_SO_GET_ENTRIES, &entries, &optlen))
1220 fail("arptable %s: getsockopt(ARPT_SO_GET_ENTRIES)", table->name);
1221 if (memcmp(table->replace.entrytable, entries.entrytable, table->info.size) == 0)
1222 continue;
1223 debug("arptable %s: data changed\n", table->name);
1224 } else {
1225 debug("arptable %s: header changed\n", table->name);
1226 }
1227 debug("arptable %s: resetting\n", table->name);
1228 table->replace.num_counters = info.num_entries;
1229 table->replace.counters = counters;
1230 optlen = sizeof(table->replace) - sizeof(table->replace.entrytable) + table->replace.size;
1231 if (setsockopt(fd, SOL_IP, ARPT_SO_SET_REPLACE, &table->replace, optlen))
1232 fail("arptable %s: setsockopt(ARPT_SO_SET_REPLACE)", table->name);
1233 }
1234 close(fd);
1235 }
1236
1237 #include <linux/if.h>
1238 #include <linux/netfilter_bridge/ebtables.h>
1239
1240 struct ebt_table_desc {
1241 const char* name;
1242 struct ebt_replace replace;
1243 char entrytable[XT_TABLE_SIZE];
1244 };
1245
1246 static struct ebt_table_desc ebt_tables[] = {
1247 {.name = "filter"},
1248 {.name = "nat"},
1249 {.name = "broute"},
1250 };
1251
checkpoint_ebtables(void)1252 static void checkpoint_ebtables(void)
1253 {
1254 socklen_t optlen;
1255 unsigned i;
1256 int fd;
1257
1258 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1259 if (fd == -1) {
1260 switch (errno) {
1261 case EAFNOSUPPORT:
1262 case ENOPROTOOPT:
1263 return;
1264 }
1265 fail("ebtable checkpoint: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
1266 }
1267 for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
1268 struct ebt_table_desc* table = &ebt_tables[i];
1269 strcpy(table->replace.name, table->name);
1270 optlen = sizeof(table->replace);
1271 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_INFO, &table->replace, &optlen)) {
1272 switch (errno) {
1273 case EPERM:
1274 case ENOENT:
1275 case ENOPROTOOPT:
1276 continue;
1277 }
1278 fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_INFO)", table->name);
1279 }
1280 debug("ebtable checkpoint %s: entries=%d hooks=%x size=%d\n",
1281 table->name, table->replace.nentries, table->replace.valid_hooks,
1282 table->replace.entries_size);
1283 if (table->replace.entries_size > sizeof(table->entrytable))
1284 fail("ebtable checkpoint %s: table size is too large: %u",
1285 table->name, table->replace.entries_size);
1286 table->replace.num_counters = 0;
1287 table->replace.entries = table->entrytable;
1288 optlen = sizeof(table->replace) + table->replace.entries_size;
1289 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INIT_ENTRIES, &table->replace, &optlen))
1290 fail("ebtable checkpoint %s: getsockopt(EBT_SO_GET_INIT_ENTRIES)", table->name);
1291 }
1292 close(fd);
1293 }
1294
reset_ebtables()1295 static void reset_ebtables()
1296 {
1297 struct ebt_replace replace;
1298 char entrytable[XT_TABLE_SIZE];
1299 socklen_t optlen;
1300 unsigned i, j, h;
1301 int fd;
1302
1303 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
1304 if (fd == -1) {
1305 switch (errno) {
1306 case EAFNOSUPPORT:
1307 case ENOPROTOOPT:
1308 return;
1309 }
1310 fail("ebtable: socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)");
1311 }
1312 for (i = 0; i < sizeof(ebt_tables) / sizeof(ebt_tables[0]); i++) {
1313 struct ebt_table_desc* table = &ebt_tables[i];
1314 if (table->replace.valid_hooks == 0)
1315 continue;
1316 memset(&replace, 0, sizeof(replace));
1317 strcpy(replace.name, table->name);
1318 optlen = sizeof(replace);
1319 if (getsockopt(fd, SOL_IP, EBT_SO_GET_INFO, &replace, &optlen))
1320 fail("ebtable %s: getsockopt(EBT_SO_GET_INFO)", table->name);
1321 replace.num_counters = 0;
1322 table->replace.entries = 0;
1323 for (h = 0; h < NF_BR_NUMHOOKS; h++)
1324 table->replace.hook_entry[h] = 0;
1325 if (memcmp(&table->replace, &replace, sizeof(table->replace)) == 0) {
1326 memset(&entrytable, 0, sizeof(entrytable));
1327 replace.entries = entrytable;
1328 optlen = sizeof(replace) + replace.entries_size;
1329 if (getsockopt(fd, SOL_IP, EBT_SO_GET_ENTRIES, &replace, &optlen))
1330 fail("ebtable %s: getsockopt(EBT_SO_GET_ENTRIES)", table->name);
1331 if (memcmp(table->entrytable, entrytable, replace.entries_size) == 0)
1332 continue;
1333 }
1334 debug("ebtable %s: resetting\n", table->name);
1335 // Kernel does not seem to return actual entry points (wat?).
1336 for (j = 0, h = 0; h < NF_BR_NUMHOOKS; h++) {
1337 if (table->replace.valid_hooks & (1 << h)) {
1338 table->replace.hook_entry[h] = (struct ebt_entries*)table->entrytable + j;
1339 j++;
1340 }
1341 }
1342 table->replace.entries = table->entrytable;
1343 optlen = sizeof(table->replace) + table->replace.entries_size;
1344 if (setsockopt(fd, SOL_IP, EBT_SO_SET_ENTRIES, &table->replace, optlen))
1345 fail("ebtable %s: setsockopt(EBT_SO_SET_ENTRIES)", table->name);
1346 }
1347 close(fd);
1348 }
1349
checkpoint_net_namespace(void)1350 static void checkpoint_net_namespace(void)
1351 {
1352 #if SYZ_EXECUTOR
1353 if (flag_sandbox == sandbox_setuid)
1354 return;
1355 #endif
1356 checkpoint_ebtables();
1357 checkpoint_arptables();
1358 checkpoint_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
1359 checkpoint_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
1360 }
1361
reset_net_namespace(void)1362 static void reset_net_namespace(void)
1363 {
1364 #if SYZ_EXECUTOR
1365 if (flag_sandbox == sandbox_setuid)
1366 return;
1367 #endif
1368 reset_ebtables();
1369 reset_arptables();
1370 reset_iptables(ipv4_tables, sizeof(ipv4_tables) / sizeof(ipv4_tables[0]), AF_INET, SOL_IP);
1371 reset_iptables(ipv6_tables, sizeof(ipv6_tables) / sizeof(ipv6_tables[0]), AF_INET6, SOL_IPV6);
1372 }
1373 #endif
1374
1375 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
1376 #include <fcntl.h>
1377 #include <sys/mount.h>
1378 #include <sys/stat.h>
1379 #include <sys/types.h>
1380
setup_cgroups()1381 static void setup_cgroups()
1382 {
1383 if (mkdir("/syzcgroup", 0777)) {
1384 debug("mkdir(/syzcgroup) failed: %d\n", errno);
1385 }
1386 if (mkdir("/syzcgroup/unified", 0777)) {
1387 debug("mkdir(/syzcgroup/unified) failed: %d\n", errno);
1388 }
1389 if (mount("none", "/syzcgroup/unified", "cgroup2", 0, NULL)) {
1390 debug("mount(cgroup2) failed: %d\n", errno);
1391 }
1392 if (chmod("/syzcgroup/unified", 0777)) {
1393 debug("chmod(/syzcgroup/unified) failed: %d\n", errno);
1394 }
1395 if (!write_file("/syzcgroup/unified/cgroup.subtree_control", "+cpu +memory +io +pids +rdma")) {
1396 debug("write(cgroup.subtree_control) failed: %d\n", errno);
1397 }
1398 if (mkdir("/syzcgroup/cpu", 0777)) {
1399 debug("mkdir(/syzcgroup/cpu) failed: %d\n", errno);
1400 }
1401 if (mount("none", "/syzcgroup/cpu", "cgroup", 0, "cpuset,cpuacct,perf_event,hugetlb")) {
1402 debug("mount(cgroup cpu) failed: %d\n", errno);
1403 }
1404 if (!write_file("/syzcgroup/cpu/cgroup.clone_children", "1")) {
1405 debug("write(/syzcgroup/cpu/cgroup.clone_children) failed: %d\n", errno);
1406 }
1407 if (chmod("/syzcgroup/cpu", 0777)) {
1408 debug("chmod(/syzcgroup/cpu) failed: %d\n", errno);
1409 }
1410 if (mkdir("/syzcgroup/net", 0777)) {
1411 debug("mkdir(/syzcgroup/net) failed: %d\n", errno);
1412 }
1413 if (mount("none", "/syzcgroup/net", "cgroup", 0, "net_cls,net_prio,devices,freezer")) {
1414 debug("mount(cgroup net) failed: %d\n", errno);
1415 }
1416 if (chmod("/syzcgroup/net", 0777)) {
1417 debug("chmod(/syzcgroup/net) failed: %d\n", errno);
1418 }
1419 }
1420
1421 // TODO(dvyukov): this should be under a separate define for separate minimization,
1422 // but for now we bundle this with cgroups.
setup_binfmt_misc()1423 static void setup_binfmt_misc()
1424 {
1425 if (mount(0, "/proc/sys/fs/binfmt_misc", "binfmt_misc", 0, 0)) {
1426 debug("mount(binfmt_misc) failed: %d\n", errno);
1427 }
1428 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz0:M:0:\x01::./file0:")) {
1429 debug("write(/proc/sys/fs/binfmt_misc/register, syz0) failed: %d\n", errno);
1430 }
1431 if (!write_file("/proc/sys/fs/binfmt_misc/register", ":syz1:M:1:\x02::./file0:POC")) {
1432 debug("write(/proc/sys/fs/binfmt_misc/register, syz1) failed: %d\n", errno);
1433 }
1434 }
1435 #endif
1436
1437 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
1438 #include <errno.h>
1439 #include <sys/mount.h>
1440
setup_common()1441 static void setup_common()
1442 {
1443 if (mount(0, "/sys/fs/fuse/connections", "fusectl", 0, 0)) {
1444 debug("mount(fusectl) failed: %d\n", errno);
1445 }
1446 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
1447 setup_cgroups();
1448 setup_binfmt_misc();
1449 #endif
1450 }
1451 #endif
1452
1453 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE || SYZ_SANDBOX_SETUID || SYZ_SANDBOX_NAMESPACE
1454 #include <sched.h>
1455 #include <sys/prctl.h>
1456 #include <sys/resource.h>
1457 #include <sys/time.h>
1458 #include <sys/wait.h>
1459
1460 static void loop();
1461
sandbox_common()1462 static void sandbox_common()
1463 {
1464 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
1465 setpgrp();
1466 setsid();
1467
1468 #if SYZ_EXECUTOR || __NR_syz_init_net_socket
1469 int netns = open("/proc/self/ns/net", O_RDONLY);
1470 if (netns == -1)
1471 fail("open(/proc/self/ns/net) failed");
1472 if (dup2(netns, kInitNetNsFd) < 0)
1473 fail("dup2(netns, kInitNetNsFd) failed");
1474 close(netns);
1475 #endif
1476
1477 struct rlimit rlim;
1478 rlim.rlim_cur = rlim.rlim_max = 160 << 20;
1479 setrlimit(RLIMIT_AS, &rlim);
1480 rlim.rlim_cur = rlim.rlim_max = 8 << 20;
1481 setrlimit(RLIMIT_MEMLOCK, &rlim);
1482 rlim.rlim_cur = rlim.rlim_max = 136 << 20;
1483 setrlimit(RLIMIT_FSIZE, &rlim);
1484 rlim.rlim_cur = rlim.rlim_max = 1 << 20;
1485 setrlimit(RLIMIT_STACK, &rlim);
1486 rlim.rlim_cur = rlim.rlim_max = 0;
1487 setrlimit(RLIMIT_CORE, &rlim);
1488 rlim.rlim_cur = rlim.rlim_max = 256; // see kMaxFd
1489 setrlimit(RLIMIT_NOFILE, &rlim);
1490
1491 // CLONE_NEWNS/NEWCGROUP cause EINVAL on some systems,
1492 // so we do them separately of clone in do_sandbox_namespace.
1493 if (unshare(CLONE_NEWNS)) {
1494 debug("unshare(CLONE_NEWNS): %d\n", errno);
1495 }
1496 if (unshare(CLONE_NEWIPC)) {
1497 debug("unshare(CLONE_NEWIPC): %d\n", errno);
1498 }
1499 if (unshare(0x02000000)) {
1500 debug("unshare(CLONE_NEWCGROUP): %d\n", errno);
1501 }
1502 if (unshare(CLONE_NEWUTS)) {
1503 debug("unshare(CLONE_NEWUTS): %d\n", errno);
1504 }
1505 if (unshare(CLONE_SYSVSEM)) {
1506 debug("unshare(CLONE_SYSVSEM): %d\n", errno);
1507 }
1508 }
1509
wait_for_loop(int pid)1510 int wait_for_loop(int pid)
1511 {
1512 if (pid < 0)
1513 fail("sandbox fork failed");
1514 debug("spawned loop pid %d\n", pid);
1515 int status = 0;
1516 while (waitpid(-1, &status, __WALL) != pid) {
1517 }
1518 return WEXITSTATUS(status);
1519 }
1520 #endif
1521
1522 #if SYZ_EXECUTOR || SYZ_SANDBOX_NONE
1523 #include <sched.h>
1524 #include <sys/types.h>
1525
do_sandbox_none(void)1526 static int do_sandbox_none(void)
1527 {
1528 // CLONE_NEWPID takes effect for the first child of the current process,
1529 // so we do it before fork to make the loop "init" process of the namespace.
1530 // We ought to do fail here, but sandbox=none is used in pkg/ipc tests
1531 // and they are usually run under non-root.
1532 // Also since debug is stripped by pkg/csource, we need to do {}
1533 // even though we generally don't do {} around single statements.
1534 if (unshare(CLONE_NEWPID)) {
1535 debug("unshare(CLONE_NEWPID): %d\n", errno);
1536 }
1537 int pid = fork();
1538 if (pid != 0)
1539 return wait_for_loop(pid);
1540
1541 setup_common();
1542 sandbox_common();
1543 if (unshare(CLONE_NEWNET)) {
1544 debug("unshare(CLONE_NEWNET): %d\n", errno);
1545 }
1546 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE
1547 initialize_tun();
1548 #endif
1549 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
1550 initialize_netdevices();
1551 #endif
1552 loop();
1553 doexit(1);
1554 }
1555 #endif
1556
1557 #if SYZ_EXECUTOR || SYZ_SANDBOX_SETUID
1558 #include <grp.h>
1559 #include <sched.h>
1560 #include <sys/prctl.h>
1561
do_sandbox_setuid(void)1562 static int do_sandbox_setuid(void)
1563 {
1564 if (unshare(CLONE_NEWPID)) {
1565 debug("unshare(CLONE_NEWPID): %d\n", errno);
1566 }
1567 int pid = fork();
1568 if (pid != 0)
1569 return wait_for_loop(pid);
1570
1571 setup_common();
1572 sandbox_common();
1573 if (unshare(CLONE_NEWNET)) {
1574 debug("unshare(CLONE_NEWNET): %d\n", errno);
1575 }
1576 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE
1577 initialize_tun();
1578 #endif
1579 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
1580 initialize_netdevices();
1581 #endif
1582
1583 const int nobody = 65534;
1584 if (setgroups(0, NULL))
1585 fail("failed to setgroups");
1586 if (syscall(SYS_setresgid, nobody, nobody, nobody))
1587 fail("failed to setresgid");
1588 if (syscall(SYS_setresuid, nobody, nobody, nobody))
1589 fail("failed to setresuid");
1590
1591 // This is required to open /proc/self/* files.
1592 // Otherwise they are owned by root and we can't open them after setuid.
1593 // See task_dump_owner function in kernel.
1594 prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
1595
1596 loop();
1597 doexit(1);
1598 }
1599 #endif
1600
1601 #if SYZ_EXECUTOR || SYZ_SANDBOX_NAMESPACE
1602 #include <linux/capability.h>
1603 #include <sched.h>
1604 #include <sys/mman.h>
1605 #include <sys/mount.h>
1606
1607 static int real_uid;
1608 static int real_gid;
1609 __attribute__((aligned(64 << 10))) static char sandbox_stack[1 << 20];
1610
namespace_sandbox_proc(void * arg)1611 static int namespace_sandbox_proc(void* arg)
1612 {
1613 sandbox_common();
1614
1615 // /proc/self/setgroups is not present on some systems, ignore error.
1616 write_file("/proc/self/setgroups", "deny");
1617 if (!write_file("/proc/self/uid_map", "0 %d 1\n", real_uid))
1618 fail("write of /proc/self/uid_map failed");
1619 if (!write_file("/proc/self/gid_map", "0 %d 1\n", real_gid))
1620 fail("write of /proc/self/gid_map failed");
1621
1622 // CLONE_NEWNET must always happen before tun setup,
1623 // because we want the tun device in the test namespace.
1624 if (unshare(CLONE_NEWNET))
1625 fail("unshare(CLONE_NEWNET)");
1626 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE
1627 // We setup tun here as it needs to be in the test net namespace,
1628 // which in turn needs to be in the test user namespace.
1629 // However, IFF_NAPI_FRAGS will fail as we are not root already.
1630 // There does not seem to be a call sequence that would satisfy all of that.
1631 initialize_tun();
1632 #endif
1633 #if SYZ_EXECUTOR || SYZ_ENABLE_NETDEV
1634 initialize_netdevices();
1635 #endif
1636
1637 if (mkdir("./syz-tmp", 0777))
1638 fail("mkdir(syz-tmp) failed");
1639 if (mount("", "./syz-tmp", "tmpfs", 0, NULL))
1640 fail("mount(tmpfs) failed");
1641 if (mkdir("./syz-tmp/newroot", 0777))
1642 fail("mkdir failed");
1643 if (mkdir("./syz-tmp/newroot/dev", 0700))
1644 fail("mkdir failed");
1645 unsigned bind_mount_flags = MS_BIND | MS_REC | MS_PRIVATE;
1646 if (mount("/dev", "./syz-tmp/newroot/dev", NULL, bind_mount_flags, NULL))
1647 fail("mount(dev) failed");
1648 if (mkdir("./syz-tmp/newroot/proc", 0700))
1649 fail("mkdir failed");
1650 if (mount(NULL, "./syz-tmp/newroot/proc", "proc", 0, NULL))
1651 fail("mount(proc) failed");
1652 if (mkdir("./syz-tmp/newroot/selinux", 0700))
1653 fail("mkdir failed");
1654 // selinux mount used to be at /selinux, but then moved to /sys/fs/selinux.
1655 const char* selinux_path = "./syz-tmp/newroot/selinux";
1656 if (mount("/selinux", selinux_path, NULL, bind_mount_flags, NULL)) {
1657 if (errno != ENOENT)
1658 fail("mount(/selinux) failed");
1659 if (mount("/sys/fs/selinux", selinux_path, NULL, bind_mount_flags, NULL) && errno != ENOENT)
1660 fail("mount(/sys/fs/selinux) failed");
1661 }
1662 if (mkdir("./syz-tmp/newroot/sys", 0700))
1663 fail("mkdir failed");
1664 if (mount("/sys", "./syz-tmp/newroot/sys", 0, bind_mount_flags, NULL))
1665 fail("mount(sysfs) failed");
1666 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
1667 if (mkdir("./syz-tmp/newroot/syzcgroup", 0700))
1668 fail("mkdir failed");
1669 if (mkdir("./syz-tmp/newroot/syzcgroup/unified", 0700))
1670 fail("mkdir failed");
1671 if (mkdir("./syz-tmp/newroot/syzcgroup/cpu", 0700))
1672 fail("mkdir failed");
1673 if (mkdir("./syz-tmp/newroot/syzcgroup/net", 0700))
1674 fail("mkdir failed");
1675 if (mount("/syzcgroup/unified", "./syz-tmp/newroot/syzcgroup/unified", NULL, bind_mount_flags, NULL)) {
1676 debug("mount(cgroup2, MS_BIND) failed: %d\n", errno);
1677 }
1678 if (mount("/syzcgroup/cpu", "./syz-tmp/newroot/syzcgroup/cpu", NULL, bind_mount_flags, NULL)) {
1679 debug("mount(cgroup/cpu, MS_BIND) failed: %d\n", errno);
1680 }
1681 if (mount("/syzcgroup/net", "./syz-tmp/newroot/syzcgroup/net", NULL, bind_mount_flags, NULL)) {
1682 debug("mount(cgroup/net, MS_BIND) failed: %d\n", errno);
1683 }
1684 #endif
1685 if (mkdir("./syz-tmp/pivot", 0777))
1686 fail("mkdir failed");
1687 if (syscall(SYS_pivot_root, "./syz-tmp", "./syz-tmp/pivot")) {
1688 debug("pivot_root failed\n");
1689 if (chdir("./syz-tmp"))
1690 fail("chdir failed");
1691 } else {
1692 debug("pivot_root OK\n");
1693 if (chdir("/"))
1694 fail("chdir failed");
1695 if (umount2("./pivot", MNT_DETACH))
1696 fail("umount failed");
1697 }
1698 if (chroot("./newroot"))
1699 fail("chroot failed");
1700 if (chdir("/"))
1701 fail("chdir failed");
1702
1703 // Drop CAP_SYS_PTRACE so that test processes can't attach to parent processes.
1704 // Previously it lead to hangs because the loop process stopped due to SIGSTOP.
1705 // Note that a process can always ptrace its direct children, which is enough
1706 // for testing purposes.
1707 struct __user_cap_header_struct cap_hdr = {};
1708 struct __user_cap_data_struct cap_data[2] = {};
1709 cap_hdr.version = _LINUX_CAPABILITY_VERSION_3;
1710 cap_hdr.pid = getpid();
1711 if (syscall(SYS_capget, &cap_hdr, &cap_data))
1712 fail("capget failed");
1713 cap_data[0].effective &= ~(1 << CAP_SYS_PTRACE);
1714 cap_data[0].permitted &= ~(1 << CAP_SYS_PTRACE);
1715 cap_data[0].inheritable &= ~(1 << CAP_SYS_PTRACE);
1716 if (syscall(SYS_capset, &cap_hdr, &cap_data))
1717 fail("capset failed");
1718
1719 loop();
1720 doexit(1);
1721 }
1722
do_sandbox_namespace(void)1723 static int do_sandbox_namespace(void)
1724 {
1725 int pid;
1726
1727 setup_common();
1728 real_uid = getuid();
1729 real_gid = getgid();
1730 mprotect(sandbox_stack, 4096, PROT_NONE); // to catch stack underflows
1731 pid = clone(namespace_sandbox_proc, &sandbox_stack[sizeof(sandbox_stack) - 64],
1732 CLONE_NEWUSER | CLONE_NEWPID, 0);
1733 return wait_for_loop(pid);
1734 }
1735 #endif
1736
1737 #if SYZ_EXECUTOR || SYZ_REPEAT && SYZ_USE_TMP_DIR
1738 #include <dirent.h>
1739 #include <errno.h>
1740 #include <string.h>
1741 #include <sys/ioctl.h>
1742 #include <sys/mount.h>
1743
1744 #define FS_IOC_SETFLAGS _IOW('f', 2, long)
1745
1746 // One does not simply remove a directory.
1747 // There can be mounts, so we need to try to umount.
1748 // Moreover, a mount can be mounted several times, so we need to try to umount in a loop.
1749 // Moreover, after umount a dir can become non-empty again, so we need another loop.
1750 // Moreover, a mount can be re-mounted as read-only and then we will fail to make a dir empty.
remove_dir(const char * dir)1751 static void remove_dir(const char* dir)
1752 {
1753 DIR* dp;
1754 struct dirent* ep;
1755 int iter = 0;
1756 retry:
1757 while (umount2(dir, MNT_DETACH) == 0) {
1758 debug("umount(%s)\n", dir);
1759 }
1760 dp = opendir(dir);
1761 if (dp == NULL) {
1762 if (errno == EMFILE) {
1763 // This happens when the test process casts prlimit(NOFILE) on us.
1764 // Ideally we somehow prevent test processes from messing with parent processes.
1765 // But full sandboxing is expensive, so let's ignore this error for now.
1766 exitf("opendir(%s) failed due to NOFILE, exiting", dir);
1767 }
1768 exitf("opendir(%s) failed", dir);
1769 }
1770 while ((ep = readdir(dp))) {
1771 if (strcmp(ep->d_name, ".") == 0 || strcmp(ep->d_name, "..") == 0)
1772 continue;
1773 char filename[FILENAME_MAX];
1774 snprintf(filename, sizeof(filename), "%s/%s", dir, ep->d_name);
1775 // If it's 9p mount with broken transport, lstat will fail.
1776 // So try to umount first.
1777 while (umount2(filename, MNT_DETACH) == 0) {
1778 debug("umount(%s)\n", filename);
1779 }
1780 struct stat st;
1781 if (lstat(filename, &st))
1782 exitf("lstat(%s) failed", filename);
1783 if (S_ISDIR(st.st_mode)) {
1784 remove_dir(filename);
1785 continue;
1786 }
1787 int i;
1788 for (i = 0;; i++) {
1789 debug("unlink(%s)\n", filename);
1790 if (unlink(filename) == 0)
1791 break;
1792 if (errno == EPERM) {
1793 // Try to reset FS_XFLAG_IMMUTABLE.
1794 int fd = open(filename, O_RDONLY);
1795 if (fd != -1) {
1796 long flags = 0;
1797 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
1798 debug("reset FS_XFLAG_IMMUTABLE\n");
1799 close(fd);
1800 continue;
1801 }
1802 }
1803 if (errno == EROFS) {
1804 debug("ignoring EROFS\n");
1805 break;
1806 }
1807 if (errno != EBUSY || i > 100)
1808 exitf("unlink(%s) failed", filename);
1809 debug("umount(%s)\n", filename);
1810 if (umount2(filename, MNT_DETACH))
1811 exitf("umount(%s) failed", filename);
1812 }
1813 }
1814 closedir(dp);
1815 int i;
1816 for (i = 0;; i++) {
1817 debug("rmdir(%s)\n", dir);
1818 if (rmdir(dir) == 0)
1819 break;
1820 if (i < 100) {
1821 if (errno == EPERM) {
1822 // Try to reset FS_XFLAG_IMMUTABLE.
1823 int fd = open(dir, O_RDONLY);
1824 if (fd != -1) {
1825 long flags = 0;
1826 if (ioctl(fd, FS_IOC_SETFLAGS, &flags) == 0)
1827 debug("reset FS_XFLAG_IMMUTABLE\n");
1828 close(fd);
1829 continue;
1830 }
1831 }
1832 if (errno == EROFS) {
1833 debug("ignoring EROFS\n");
1834 break;
1835 }
1836 if (errno == EBUSY) {
1837 debug("umount(%s)\n", dir);
1838 if (umount2(dir, MNT_DETACH))
1839 exitf("umount(%s) failed", dir);
1840 continue;
1841 }
1842 if (errno == ENOTEMPTY) {
1843 if (iter < 100) {
1844 iter++;
1845 goto retry;
1846 }
1847 }
1848 }
1849 exitf("rmdir(%s) failed", dir);
1850 }
1851 }
1852 #endif
1853
1854 #if SYZ_EXECUTOR || SYZ_FAULT_INJECTION
1855 #include <fcntl.h>
1856 #include <string.h>
1857 #include <sys/stat.h>
1858 #include <sys/types.h>
1859
inject_fault(int nth)1860 static int inject_fault(int nth)
1861 {
1862 int fd;
1863 char buf[16];
1864
1865 fd = open("/proc/thread-self/fail-nth", O_RDWR);
1866 // We treat errors here as temporal/non-critical because we see
1867 // occasional ENOENT/EACCES errors returned. It seems that fuzzer
1868 // somehow gets its hands to it.
1869 if (fd == -1)
1870 exitf("failed to open /proc/thread-self/fail-nth");
1871 sprintf(buf, "%d", nth + 1);
1872 if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
1873 exitf("failed to write /proc/thread-self/fail-nth");
1874 return fd;
1875 }
1876 #endif
1877
1878 #if SYZ_EXECUTOR
fault_injected(int fail_fd)1879 static int fault_injected(int fail_fd)
1880 {
1881 char buf[16];
1882 int n = read(fail_fd, buf, sizeof(buf) - 1);
1883 if (n <= 0)
1884 exitf("failed to read /proc/thread-self/fail-nth");
1885 int res = n == 2 && buf[0] == '0' && buf[1] == '\n';
1886 buf[0] = '0';
1887 if (write(fail_fd, buf, 1) != 1)
1888 exitf("failed to write /proc/thread-self/fail-nth");
1889 close(fail_fd);
1890 return res;
1891 }
1892 #endif
1893
1894 #if SYZ_EXECUTOR || SYZ_REPEAT
1895 #include <dirent.h>
1896 #include <errno.h>
1897 #include <fcntl.h>
1898 #include <signal.h>
1899 #include <string.h>
1900 #include <sys/stat.h>
1901 #include <sys/types.h>
1902 #include <sys/wait.h>
1903
kill_and_wait(int pid,int * status)1904 static void kill_and_wait(int pid, int* status)
1905 {
1906 kill(-pid, SIGKILL);
1907 kill(pid, SIGKILL);
1908 int i;
1909 // First, give it up to 100 ms to surrender.
1910 for (i = 0; i < 100; i++) {
1911 if (waitpid(-1, status, WNOHANG | __WALL) == pid)
1912 return;
1913 usleep(1000);
1914 }
1915 // Now, try to abort fuse connections as they cause deadlocks,
1916 // see Documentation/filesystems/fuse.txt for details.
1917 // There is no good way to figure out the right connections
1918 // provided that the process could use unshare(CLONE_NEWNS),
1919 // so we abort all.
1920 debug("kill is not working\n");
1921 DIR* dir = opendir("/sys/fs/fuse/connections");
1922 if (dir) {
1923 for (;;) {
1924 struct dirent* ent = readdir(dir);
1925 if (!ent)
1926 break;
1927 if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
1928 continue;
1929 char abort[300];
1930 snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort", ent->d_name);
1931 int fd = open(abort, O_WRONLY);
1932 if (fd == -1) {
1933 debug("failed to open %s: %d\n", abort, errno);
1934 continue;
1935 }
1936 debug("aborting fuse conn %s\n", ent->d_name);
1937 if (write(fd, abort, 1) < 0) {
1938 debug("failed to abort: %d\n", errno);
1939 }
1940 close(fd);
1941 }
1942 closedir(dir);
1943 } else {
1944 debug("failed to open /sys/fs/fuse/connections: %d\n", errno);
1945 }
1946 // Now, just wait, no other options.
1947 while (waitpid(-1, status, __WALL) != pid) {
1948 }
1949 }
1950 #endif
1951
1952 #if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_ENABLE_CGROUPS || SYZ_RESET_NET_NAMESPACE)
1953 #include <fcntl.h>
1954 #include <sys/ioctl.h>
1955 #include <sys/stat.h>
1956 #include <sys/types.h>
1957 #include <unistd.h>
1958
1959 #define SYZ_HAVE_SETUP_LOOP 1
setup_loop()1960 static void setup_loop()
1961 {
1962 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
1963 int pid = getpid();
1964 char cgroupdir[64];
1965 char procs_file[128];
1966 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
1967 if (mkdir(cgroupdir, 0777)) {
1968 debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
1969 }
1970 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir);
1971 if (!write_file(procs_file, "%d", pid)) {
1972 debug("write(%s) failed: %d\n", procs_file, errno);
1973 }
1974 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
1975 if (mkdir(cgroupdir, 0777)) {
1976 debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
1977 }
1978 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir);
1979 if (!write_file(procs_file, "%d", pid)) {
1980 debug("write(%s) failed: %d\n", procs_file, errno);
1981 }
1982 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
1983 if (mkdir(cgroupdir, 0777)) {
1984 debug("mkdir(%s) failed: %d\n", cgroupdir, errno);
1985 }
1986 snprintf(procs_file, sizeof(procs_file), "%s/cgroup.procs", cgroupdir);
1987 if (!write_file(procs_file, "%d", pid)) {
1988 debug("write(%s) failed: %d\n", procs_file, errno);
1989 }
1990 #endif
1991 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
1992 checkpoint_net_namespace();
1993 #endif
1994 }
1995 #endif
1996
1997 #if SYZ_EXECUTOR || SYZ_REPEAT && (SYZ_RESET_NET_NAMESPACE || __NR_syz_mount_image || __NR_syz_read_part_table)
1998 #define SYZ_HAVE_RESET_LOOP 1
reset_loop()1999 static void reset_loop()
2000 {
2001 #if SYZ_EXECUTOR || __NR_syz_mount_image || __NR_syz_read_part_table
2002 char buf[64];
2003 snprintf(buf, sizeof(buf), "/dev/loop%llu", procid);
2004 int loopfd = open(buf, O_RDWR);
2005 if (loopfd != -1) {
2006 ioctl(loopfd, LOOP_CLR_FD, 0);
2007 close(loopfd);
2008 }
2009 #endif
2010 #if SYZ_EXECUTOR || SYZ_RESET_NET_NAMESPACE
2011 reset_net_namespace();
2012 #endif
2013 }
2014 #endif
2015
2016 #if SYZ_EXECUTOR || SYZ_REPEAT
2017 #include <sys/prctl.h>
2018
2019 #define SYZ_HAVE_SETUP_TEST 1
setup_test()2020 static void setup_test()
2021 {
2022 prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
2023 setpgrp();
2024 #if SYZ_EXECUTOR || SYZ_ENABLE_CGROUPS
2025 char cgroupdir[64];
2026 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/unified/syz%llu", procid);
2027 if (symlink(cgroupdir, "./cgroup")) {
2028 debug("symlink(%s, ./cgroup) failed: %d\n", cgroupdir, errno);
2029 }
2030 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/cpu/syz%llu", procid);
2031 if (symlink(cgroupdir, "./cgroup.cpu")) {
2032 debug("symlink(%s, ./cgroup.cpu) failed: %d\n", cgroupdir, errno);
2033 }
2034 snprintf(cgroupdir, sizeof(cgroupdir), "/syzcgroup/net/syz%llu", procid);
2035 if (symlink(cgroupdir, "./cgroup.net")) {
2036 debug("symlink(%s, ./cgroup.net) failed: %d\n", cgroupdir, errno);
2037 }
2038 #endif
2039 #if SYZ_EXECUTOR || SYZ_TUN_ENABLE
2040 // Read all remaining packets from tun to better
2041 // isolate consequently executing programs.
2042 flush_tun();
2043 #endif
2044 }
2045
2046 #define SYZ_HAVE_RESET_TEST 1
reset_test()2047 static void reset_test()
2048 {
2049 // Keeping a 9p transport pipe open will hang the proccess dead,
2050 // so close all opened file descriptors.
2051 int fd;
2052 for (fd = 3; fd < 30; fd++)
2053 close(fd);
2054 }
2055 #endif
2056