• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if defined(__GLIBC_PREREQ)
17 #if !__GLIBC_PREREQ(2, 26)
18 # include <asm/siginfo.h>
19 # define __have_siginfo_t 1
20 # define __have_sigval_t 1
21 # define __have_sigevent_t 1
22 #endif
23 #endif
24 
25 #include <errno.h>
26 #include <linux/filter.h>
27 #include <sys/prctl.h>
28 #include <sys/ptrace.h>
29 #include <sys/user.h>
30 #include <linux/prctl.h>
31 #include <linux/ptrace.h>
32 #include <linux/seccomp.h>
33 #include <pthread.h>
34 #include <semaphore.h>
35 #include <signal.h>
36 #include <stddef.h>
37 #include <stdbool.h>
38 #include <string.h>
39 #include <time.h>
40 #include <limits.h>
41 #include <linux/elf.h>
42 #include <sys/uio.h>
43 #include <sys/utsname.h>
44 #include <sys/fcntl.h>
45 #include <sys/mman.h>
46 #include <sys/times.h>
47 #include <sys/socket.h>
48 #include <sys/ioctl.h>
49 #include <linux/kcmp.h>
50 
51 #include <unistd.h>
52 #include <sys/syscall.h>
53 #include <poll.h>
54 
55 #include "../kselftest_harness.h"
56 
57 #ifndef PR_SET_PTRACER
58 # define PR_SET_PTRACER 0x59616d61
59 #endif
60 
61 #ifndef PR_SET_NO_NEW_PRIVS
62 #define PR_SET_NO_NEW_PRIVS 38
63 #define PR_GET_NO_NEW_PRIVS 39
64 #endif
65 
66 #ifndef PR_SECCOMP_EXT
67 #define PR_SECCOMP_EXT 43
68 #endif
69 
70 #ifndef SECCOMP_EXT_ACT
71 #define SECCOMP_EXT_ACT 1
72 #endif
73 
74 #ifndef SECCOMP_EXT_ACT_TSYNC
75 #define SECCOMP_EXT_ACT_TSYNC 1
76 #endif
77 
78 #ifndef SECCOMP_MODE_STRICT
79 #define SECCOMP_MODE_STRICT 1
80 #endif
81 
82 #ifndef SECCOMP_MODE_FILTER
83 #define SECCOMP_MODE_FILTER 2
84 #endif
85 
86 #ifndef SECCOMP_RET_ALLOW
87 struct seccomp_data {
88 	int nr;
89 	__u32 arch;
90 	__u64 instruction_pointer;
91 	__u64 args[6];
92 };
93 #endif
94 
95 #ifndef SECCOMP_RET_KILL_PROCESS
96 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
97 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
98 #endif
99 #ifndef SECCOMP_RET_KILL
100 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
101 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
102 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
103 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
104 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
105 #endif
106 #ifndef SECCOMP_RET_LOG
107 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
108 #endif
109 
110 #ifndef __NR_seccomp
111 # if defined(__i386__)
112 #  define __NR_seccomp 354
113 # elif defined(__x86_64__)
114 #  define __NR_seccomp 317
115 # elif defined(__arm__)
116 #  define __NR_seccomp 383
117 # elif defined(__aarch64__)
118 #  define __NR_seccomp 277
119 # elif defined(__riscv)
120 #  define __NR_seccomp 277
121 # elif defined(__hppa__)
122 #  define __NR_seccomp 338
123 # elif defined(__powerpc__)
124 #  define __NR_seccomp 358
125 # elif defined(__s390__)
126 #  define __NR_seccomp 348
127 # else
128 #  warning "seccomp syscall number unknown for this architecture"
129 #  define __NR_seccomp 0xffff
130 # endif
131 #endif
132 
133 #ifndef SECCOMP_SET_MODE_STRICT
134 #define SECCOMP_SET_MODE_STRICT 0
135 #endif
136 
137 #ifndef SECCOMP_SET_MODE_FILTER
138 #define SECCOMP_SET_MODE_FILTER 1
139 #endif
140 
141 #ifndef SECCOMP_GET_ACTION_AVAIL
142 #define SECCOMP_GET_ACTION_AVAIL 2
143 #endif
144 
145 #ifndef SECCOMP_GET_NOTIF_SIZES
146 #define SECCOMP_GET_NOTIF_SIZES 3
147 #endif
148 
149 #ifndef SECCOMP_FILTER_FLAG_TSYNC
150 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
151 #endif
152 
153 #ifndef SECCOMP_FILTER_FLAG_LOG
154 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
155 #endif
156 
157 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
158 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
159 #endif
160 
161 #ifndef PTRACE_SECCOMP_GET_METADATA
162 #define PTRACE_SECCOMP_GET_METADATA	0x420d
163 
164 struct seccomp_metadata {
165 	__u64 filter_off;       /* Input: which filter */
166 	__u64 flags;             /* Output: filter's flags */
167 };
168 #endif
169 
170 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
171 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
172 
173 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
174 
175 #define SECCOMP_IOC_MAGIC		'!'
176 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
177 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
178 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
179 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
180 
181 /* Flags for seccomp notification fd ioctl. */
182 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
183 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
184 						struct seccomp_notif_resp)
185 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOR(2, __u64)
186 
187 struct seccomp_notif {
188 	__u64 id;
189 	__u32 pid;
190 	__u32 flags;
191 	struct seccomp_data data;
192 };
193 
194 struct seccomp_notif_resp {
195 	__u64 id;
196 	__s64 val;
197 	__s32 error;
198 	__u32 flags;
199 };
200 
201 struct seccomp_notif_sizes {
202 	__u16 seccomp_notif;
203 	__u16 seccomp_notif_resp;
204 	__u16 seccomp_data;
205 };
206 #endif
207 
208 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
209 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
210 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
211 #endif
212 
213 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
214 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
215 #endif
216 
217 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)218 int seccomp(unsigned int op, unsigned int flags, void *args)
219 {
220 	errno = 0;
221 	return syscall(__NR_seccomp, op, flags, args);
222 }
223 #endif
224 
225 #if __BYTE_ORDER == __LITTLE_ENDIAN
226 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
227 #elif __BYTE_ORDER == __BIG_ENDIAN
228 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
229 #else
230 #error "wut? Unknown __BYTE_ORDER?!"
231 #endif
232 
233 #define SIBLING_EXIT_UNKILLED	0xbadbeef
234 #define SIBLING_EXIT_FAILURE	0xbadface
235 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
236 
TEST(mode_strict_support)237 TEST(mode_strict_support)
238 {
239 	long ret;
240 
241 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
242 	ASSERT_EQ(0, ret) {
243 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
244 	}
245 	syscall(__NR_exit, 0);
246 }
247 
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)248 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
249 {
250 	long ret;
251 
252 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
253 	ASSERT_EQ(0, ret) {
254 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
255 	}
256 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
257 		NULL, NULL, NULL);
258 	EXPECT_FALSE(true) {
259 		TH_LOG("Unreachable!");
260 	}
261 }
262 
263 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)264 TEST(no_new_privs_support)
265 {
266 	long ret;
267 
268 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
269 	EXPECT_EQ(0, ret) {
270 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
271 	}
272 }
273 
274 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
TEST(mode_filter_support)275 TEST(mode_filter_support)
276 {
277 	long ret;
278 
279 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
280 	ASSERT_EQ(0, ret) {
281 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
282 	}
283 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
284 	EXPECT_EQ(-1, ret);
285 	EXPECT_EQ(EFAULT, errno) {
286 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
287 	}
288 }
289 
TEST(mode_filter_without_nnp)290 TEST(mode_filter_without_nnp)
291 {
292 	struct sock_filter filter[] = {
293 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
294 	};
295 	struct sock_fprog prog = {
296 		.len = (unsigned short)ARRAY_SIZE(filter),
297 		.filter = filter,
298 	};
299 	long ret;
300 
301 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
302 	ASSERT_LE(0, ret) {
303 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
304 	}
305 	errno = 0;
306 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
307 	/* Succeeds with CAP_SYS_ADMIN, fails without */
308 	/* TODO(wad) check caps not euid */
309 	if (geteuid()) {
310 		EXPECT_EQ(-1, ret);
311 		EXPECT_EQ(EACCES, errno);
312 	} else {
313 		EXPECT_EQ(0, ret);
314 	}
315 }
316 
317 #define MAX_INSNS_PER_PATH 32768
318 
TEST(filter_size_limits)319 TEST(filter_size_limits)
320 {
321 	int i;
322 	int count = BPF_MAXINSNS + 1;
323 	struct sock_filter allow[] = {
324 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
325 	};
326 	struct sock_filter *filter;
327 	struct sock_fprog prog = { };
328 	long ret;
329 
330 	filter = calloc(count, sizeof(*filter));
331 	ASSERT_NE(NULL, filter);
332 
333 	for (i = 0; i < count; i++)
334 		filter[i] = allow[0];
335 
336 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
337 	ASSERT_EQ(0, ret);
338 
339 	prog.filter = filter;
340 	prog.len = count;
341 
342 	/* Too many filter instructions in a single filter. */
343 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
344 	ASSERT_NE(0, ret) {
345 		TH_LOG("Installing %d insn filter was allowed", prog.len);
346 	}
347 
348 	/* One less is okay, though. */
349 	prog.len -= 1;
350 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
351 	ASSERT_EQ(0, ret) {
352 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
353 	}
354 }
355 
TEST(filter_chain_limits)356 TEST(filter_chain_limits)
357 {
358 	int i;
359 	int count = BPF_MAXINSNS;
360 	struct sock_filter allow[] = {
361 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
362 	};
363 	struct sock_filter *filter;
364 	struct sock_fprog prog = { };
365 	long ret;
366 
367 	filter = calloc(count, sizeof(*filter));
368 	ASSERT_NE(NULL, filter);
369 
370 	for (i = 0; i < count; i++)
371 		filter[i] = allow[0];
372 
373 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
374 	ASSERT_EQ(0, ret);
375 
376 	prog.filter = filter;
377 	prog.len = 1;
378 
379 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
380 	ASSERT_EQ(0, ret);
381 
382 	prog.len = count;
383 
384 	/* Too many total filter instructions. */
385 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
386 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
387 		if (ret != 0)
388 			break;
389 	}
390 	ASSERT_NE(0, ret) {
391 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
392 		       i, count, i * (count + 4));
393 	}
394 }
395 
TEST(mode_filter_cannot_move_to_strict)396 TEST(mode_filter_cannot_move_to_strict)
397 {
398 	struct sock_filter filter[] = {
399 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
400 	};
401 	struct sock_fprog prog = {
402 		.len = (unsigned short)ARRAY_SIZE(filter),
403 		.filter = filter,
404 	};
405 	long ret;
406 
407 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
408 	ASSERT_EQ(0, ret);
409 
410 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
411 	ASSERT_EQ(0, ret);
412 
413 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
414 	EXPECT_EQ(-1, ret);
415 	EXPECT_EQ(EINVAL, errno);
416 }
417 
418 
TEST(mode_filter_get_seccomp)419 TEST(mode_filter_get_seccomp)
420 {
421 	struct sock_filter filter[] = {
422 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
423 	};
424 	struct sock_fprog prog = {
425 		.len = (unsigned short)ARRAY_SIZE(filter),
426 		.filter = filter,
427 	};
428 	long ret;
429 
430 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
431 	ASSERT_EQ(0, ret);
432 
433 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
434 	EXPECT_EQ(0, ret);
435 
436 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
437 	ASSERT_EQ(0, ret);
438 
439 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
440 	EXPECT_EQ(2, ret);
441 }
442 
443 
TEST(ALLOW_all)444 TEST(ALLOW_all)
445 {
446 	struct sock_filter filter[] = {
447 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
448 	};
449 	struct sock_fprog prog = {
450 		.len = (unsigned short)ARRAY_SIZE(filter),
451 		.filter = filter,
452 	};
453 	long ret;
454 
455 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
456 	ASSERT_EQ(0, ret);
457 
458 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
459 	ASSERT_EQ(0, ret);
460 }
461 
TEST(empty_prog)462 TEST(empty_prog)
463 {
464 	struct sock_filter filter[] = {
465 	};
466 	struct sock_fprog prog = {
467 		.len = (unsigned short)ARRAY_SIZE(filter),
468 		.filter = filter,
469 	};
470 	long ret;
471 
472 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
473 	ASSERT_EQ(0, ret);
474 
475 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
476 	EXPECT_EQ(-1, ret);
477 	EXPECT_EQ(EINVAL, errno);
478 }
479 
480 /* b/147676645 */
481 #if 0
482 TEST(log_all)
483 {
484 	struct sock_filter filter[] = {
485 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
486 	};
487 	struct sock_fprog prog = {
488 		.len = (unsigned short)ARRAY_SIZE(filter),
489 		.filter = filter,
490 	};
491 	long ret;
492 	pid_t parent = getppid();
493 
494 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
495 	ASSERT_EQ(0, ret);
496 
497 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
498 	ASSERT_EQ(0, ret);
499 
500 	/* getppid() should succeed and be logged (no check for logging) */
501 	EXPECT_EQ(parent, syscall(__NR_getppid));
502 }
503 #endif
504 
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)505 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
506 {
507 	struct sock_filter filter[] = {
508 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
509 	};
510 	struct sock_fprog prog = {
511 		.len = (unsigned short)ARRAY_SIZE(filter),
512 		.filter = filter,
513 	};
514 	long ret;
515 
516 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
517 	ASSERT_EQ(0, ret);
518 
519 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
520 	ASSERT_EQ(0, ret);
521 	EXPECT_EQ(0, syscall(__NR_getpid)) {
522 		TH_LOG("getpid() shouldn't ever return");
523 	}
524 }
525 
526 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)527 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
528 {
529 	struct sock_filter filter[] = {
530 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
531 	};
532 	struct sock_fprog prog = {
533 		.len = (unsigned short)ARRAY_SIZE(filter),
534 		.filter = filter,
535 	};
536 	long ret;
537 
538 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
539 	ASSERT_EQ(0, ret);
540 
541 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
542 	ASSERT_EQ(0, ret);
543 	EXPECT_EQ(0, syscall(__NR_getpid)) {
544 		TH_LOG("getpid() shouldn't ever return");
545 	}
546 }
547 
TEST_SIGNAL(KILL_all,SIGSYS)548 TEST_SIGNAL(KILL_all, SIGSYS)
549 {
550 	struct sock_filter filter[] = {
551 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
552 	};
553 	struct sock_fprog prog = {
554 		.len = (unsigned short)ARRAY_SIZE(filter),
555 		.filter = filter,
556 	};
557 	long ret;
558 
559 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
560 	ASSERT_EQ(0, ret);
561 
562 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
563 	ASSERT_EQ(0, ret);
564 }
565 
TEST_SIGNAL(KILL_one,SIGSYS)566 TEST_SIGNAL(KILL_one, SIGSYS)
567 {
568 	struct sock_filter filter[] = {
569 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
570 			offsetof(struct seccomp_data, nr)),
571 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
572 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
573 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
574 	};
575 	struct sock_fprog prog = {
576 		.len = (unsigned short)ARRAY_SIZE(filter),
577 		.filter = filter,
578 	};
579 	long ret;
580 	pid_t parent = getppid();
581 
582 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
583 	ASSERT_EQ(0, ret);
584 
585 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
586 	ASSERT_EQ(0, ret);
587 
588 	EXPECT_EQ(parent, syscall(__NR_getppid));
589 	/* getpid() should never return. */
590 	EXPECT_EQ(0, syscall(__NR_getpid));
591 }
592 
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)593 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
594 {
595 	void *fatal_address;
596 	struct sock_filter filter[] = {
597 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
598 			offsetof(struct seccomp_data, nr)),
599 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
600 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
601 		/* Only both with lower 32-bit for now. */
602 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
603 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
604 			(unsigned long)&fatal_address, 0, 1),
605 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
606 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
607 	};
608 	struct sock_fprog prog = {
609 		.len = (unsigned short)ARRAY_SIZE(filter),
610 		.filter = filter,
611 	};
612 	long ret;
613 	pid_t parent = getppid();
614 	struct tms timebuf;
615 	clock_t clock = times(&timebuf);
616 
617 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
618 	ASSERT_EQ(0, ret);
619 
620 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
621 	ASSERT_EQ(0, ret);
622 
623 	EXPECT_EQ(parent, syscall(__NR_getppid));
624 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
625 	/* times() should never return. */
626 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
627 }
628 
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)629 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
630 {
631 #ifndef __NR_mmap2
632 	int sysno = __NR_mmap;
633 #else
634 	int sysno = __NR_mmap2;
635 #endif
636 	struct sock_filter filter[] = {
637 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
638 			offsetof(struct seccomp_data, nr)),
639 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
640 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
641 		/* Only both with lower 32-bit for now. */
642 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
643 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
644 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
645 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
646 	};
647 	struct sock_fprog prog = {
648 		.len = (unsigned short)ARRAY_SIZE(filter),
649 		.filter = filter,
650 	};
651 	long ret;
652 	pid_t parent = getppid();
653 	int fd;
654 	void *map1, *map2;
655 	int page_size = sysconf(_SC_PAGESIZE);
656 
657 	ASSERT_LT(0, page_size);
658 
659 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
660 	ASSERT_EQ(0, ret);
661 
662 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
663 	ASSERT_EQ(0, ret);
664 
665 	fd = open("/dev/zero", O_RDONLY);
666 	ASSERT_NE(-1, fd);
667 
668 	EXPECT_EQ(parent, syscall(__NR_getppid));
669 	map1 = (void *)syscall(sysno,
670 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
671 	EXPECT_NE(MAP_FAILED, map1);
672 	/* mmap2() should never return. */
673 	map2 = (void *)syscall(sysno,
674 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
675 	EXPECT_EQ(MAP_FAILED, map2);
676 
677 	/* The test failed, so clean up the resources. */
678 	munmap(map1, page_size);
679 	munmap(map2, page_size);
680 	close(fd);
681 }
682 
683 /* This is a thread task to die via seccomp filter violation. */
kill_thread(void * data)684 void *kill_thread(void *data)
685 {
686 	bool die = (bool)data;
687 
688 	if (die) {
689 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
690 		return (void *)SIBLING_EXIT_FAILURE;
691 	}
692 
693 	return (void *)SIBLING_EXIT_UNKILLED;
694 }
695 
696 /* Prepare a thread that will kill itself or both of us. */
kill_thread_or_group(struct __test_metadata * _metadata,bool kill_process)697 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
698 {
699 	pthread_t thread;
700 	void *status;
701 	/* Kill only when calling __NR_prctl. */
702 	struct sock_filter filter_thread[] = {
703 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
704 			offsetof(struct seccomp_data, nr)),
705 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
706 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
707 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
708 	};
709 	struct sock_fprog prog_thread = {
710 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
711 		.filter = filter_thread,
712 	};
713 	struct sock_filter filter_process[] = {
714 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
715 			offsetof(struct seccomp_data, nr)),
716 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
717 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
718 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
719 	};
720 	struct sock_fprog prog_process = {
721 		.len = (unsigned short)ARRAY_SIZE(filter_process),
722 		.filter = filter_process,
723 	};
724 
725 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
726 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
727 	}
728 
729 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
730 			     kill_process ? &prog_process : &prog_thread));
731 
732 	/*
733 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
734 	 * flag cannot be downgraded by a new filter.
735 	 */
736 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
737 
738 	/* Start a thread that will exit immediately. */
739 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
740 	ASSERT_EQ(0, pthread_join(thread, &status));
741 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
742 
743 	/* Start a thread that will die immediately. */
744 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
745 	ASSERT_EQ(0, pthread_join(thread, &status));
746 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
747 
748 	/*
749 	 * If we get here, only the spawned thread died. Let the parent know
750 	 * the whole process didn't die (i.e. this thread, the spawner,
751 	 * stayed running).
752 	 */
753 	exit(42);
754 }
755 
756 /* b/147676645 */
757 #if 0
758 TEST(KILL_thread)
759 {
760 	int status;
761 	pid_t child_pid;
762 
763 	child_pid = fork();
764 	ASSERT_LE(0, child_pid);
765 	if (child_pid == 0) {
766 		kill_thread_or_group(_metadata, false);
767 		_exit(38);
768 	}
769 
770 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
771 
772 	/* If only the thread was killed, we'll see exit 42. */
773 	ASSERT_TRUE(WIFEXITED(status));
774 	ASSERT_EQ(42, WEXITSTATUS(status));
775 }
776 #endif
777 
TEST(KILL_process)778 TEST(KILL_process)
779 {
780 	int status;
781 	pid_t child_pid;
782 
783 	child_pid = fork();
784 	ASSERT_LE(0, child_pid);
785 	if (child_pid == 0) {
786 		kill_thread_or_group(_metadata, true);
787 		_exit(38);
788 	}
789 
790 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
791 
792 	/* If the entire process was killed, we'll see SIGSYS. */
793 	ASSERT_TRUE(WIFSIGNALED(status));
794 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
795 }
796 
797 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)798 TEST(arg_out_of_range)
799 {
800 	struct sock_filter filter[] = {
801 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
802 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
803 	};
804 	struct sock_fprog prog = {
805 		.len = (unsigned short)ARRAY_SIZE(filter),
806 		.filter = filter,
807 	};
808 	long ret;
809 
810 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
811 	ASSERT_EQ(0, ret);
812 
813 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
814 	EXPECT_EQ(-1, ret);
815 	EXPECT_EQ(EINVAL, errno);
816 }
817 
818 #define ERRNO_FILTER(name, errno)					\
819 	struct sock_filter _read_filter_##name[] = {			\
820 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
821 			offsetof(struct seccomp_data, nr)),		\
822 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
823 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
824 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
825 	};								\
826 	struct sock_fprog prog_##name = {				\
827 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
828 		.filter = _read_filter_##name,				\
829 	}
830 
831 /* Make sure basic errno values are correctly passed through a filter. */
TEST(ERRNO_valid)832 TEST(ERRNO_valid)
833 {
834 	ERRNO_FILTER(valid, E2BIG);
835 	long ret;
836 	pid_t parent = getppid();
837 
838 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
839 	ASSERT_EQ(0, ret);
840 
841 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
842 	ASSERT_EQ(0, ret);
843 
844 	EXPECT_EQ(parent, syscall(__NR_getppid));
845 	EXPECT_EQ(-1, read(0, NULL, 0));
846 	EXPECT_EQ(E2BIG, errno);
847 }
848 
849 /* Make sure an errno of zero is correctly handled by the arch code. */
TEST(ERRNO_zero)850 TEST(ERRNO_zero)
851 {
852 	ERRNO_FILTER(zero, 0);
853 	long ret;
854 	pid_t parent = getppid();
855 
856 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
857 	ASSERT_EQ(0, ret);
858 
859 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
860 	ASSERT_EQ(0, ret);
861 
862 	EXPECT_EQ(parent, syscall(__NR_getppid));
863 	/* "errno" of 0 is ok. */
864 	EXPECT_EQ(0, read(0, NULL, 0));
865 }
866 
867 /*
868  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
869  * This tests that the errno value gets capped correctly, fixed by
870  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
871  */
TEST(ERRNO_capped)872 TEST(ERRNO_capped)
873 {
874 	ERRNO_FILTER(capped, 4096);
875 	long ret;
876 	pid_t parent = getppid();
877 
878 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
879 	ASSERT_EQ(0, ret);
880 
881 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
882 	ASSERT_EQ(0, ret);
883 
884 	EXPECT_EQ(parent, syscall(__NR_getppid));
885 	EXPECT_EQ(-1, read(0, NULL, 0));
886 	EXPECT_EQ(4095, errno);
887 }
888 
889 /*
890  * Filters are processed in reverse order: last applied is executed first.
891  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
892  * SECCOMP_RET_DATA mask results will follow the most recently applied
893  * matching filter return (and not the lowest or highest value).
894  */
TEST(ERRNO_order)895 TEST(ERRNO_order)
896 {
897 	ERRNO_FILTER(first,  11);
898 	ERRNO_FILTER(second, 13);
899 	ERRNO_FILTER(third,  12);
900 	long ret;
901 	pid_t parent = getppid();
902 
903 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
904 	ASSERT_EQ(0, ret);
905 
906 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
907 	ASSERT_EQ(0, ret);
908 
909 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
910 	ASSERT_EQ(0, ret);
911 
912 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
913 	ASSERT_EQ(0, ret);
914 
915 	EXPECT_EQ(parent, syscall(__NR_getppid));
916 	EXPECT_EQ(-1, read(0, NULL, 0));
917 	EXPECT_EQ(12, errno);
918 }
919 
FIXTURE_DATA(TRAP)920 FIXTURE_DATA(TRAP) {
921 	struct sock_fprog prog;
922 };
923 
FIXTURE_SETUP(TRAP)924 FIXTURE_SETUP(TRAP)
925 {
926 	struct sock_filter filter[] = {
927 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
928 			offsetof(struct seccomp_data, nr)),
929 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
930 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
931 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
932 	};
933 
934 	memset(&self->prog, 0, sizeof(self->prog));
935 	self->prog.filter = malloc(sizeof(filter));
936 	ASSERT_NE(NULL, self->prog.filter);
937 	memcpy(self->prog.filter, filter, sizeof(filter));
938 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
939 }
940 
FIXTURE_TEARDOWN(TRAP)941 FIXTURE_TEARDOWN(TRAP)
942 {
943 	if (self->prog.filter)
944 		free(self->prog.filter);
945 }
946 
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)947 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
948 {
949 	long ret;
950 
951 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
952 	ASSERT_EQ(0, ret);
953 
954 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
955 	ASSERT_EQ(0, ret);
956 	syscall(__NR_getpid);
957 }
958 
959 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)960 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
961 {
962 	long ret;
963 
964 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
965 	ASSERT_EQ(0, ret);
966 
967 	signal(SIGSYS, SIG_IGN);
968 
969 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
970 	ASSERT_EQ(0, ret);
971 	syscall(__NR_getpid);
972 }
973 
974 static siginfo_t TRAP_info;
975 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)976 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
977 {
978 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
979 	TRAP_nr = nr;
980 }
981 
TEST_F(TRAP,handler)982 TEST_F(TRAP, handler)
983 {
984 	int ret, test;
985 	struct sigaction act;
986 	sigset_t mask;
987 
988 	memset(&act, 0, sizeof(act));
989 	sigemptyset(&mask);
990 	sigaddset(&mask, SIGSYS);
991 
992 	act.sa_sigaction = &TRAP_action;
993 	act.sa_flags = SA_SIGINFO;
994 	ret = sigaction(SIGSYS, &act, NULL);
995 	ASSERT_EQ(0, ret) {
996 		TH_LOG("sigaction failed");
997 	}
998 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
999 	ASSERT_EQ(0, ret) {
1000 		TH_LOG("sigprocmask failed");
1001 	}
1002 
1003 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1004 	ASSERT_EQ(0, ret);
1005 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
1006 	ASSERT_EQ(0, ret);
1007 	TRAP_nr = 0;
1008 	memset(&TRAP_info, 0, sizeof(TRAP_info));
1009 	/* Expect the registers to be rolled back. (nr = error) may vary
1010 	 * based on arch. */
1011 	ret = syscall(__NR_getpid);
1012 	/* Silence gcc warning about volatile. */
1013 	test = TRAP_nr;
1014 	EXPECT_EQ(SIGSYS, test);
1015 	struct local_sigsys {
1016 		void *_call_addr;	/* calling user insn */
1017 		int _syscall;		/* triggering system call number */
1018 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1019 	} *sigsys = (struct local_sigsys *)
1020 #ifdef si_syscall
1021 		&(TRAP_info.si_call_addr);
1022 #else
1023 		&TRAP_info.si_pid;
1024 #endif
1025 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1026 	/* Make sure arch is non-zero. */
1027 	EXPECT_NE(0, sigsys->_arch);
1028 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1029 }
1030 
FIXTURE_DATA(precedence)1031 FIXTURE_DATA(precedence) {
1032 	struct sock_fprog allow;
1033 	struct sock_fprog log;
1034 	struct sock_fprog trace;
1035 	struct sock_fprog error;
1036 	struct sock_fprog trap;
1037 	struct sock_fprog kill;
1038 };
1039 
FIXTURE_SETUP(precedence)1040 FIXTURE_SETUP(precedence)
1041 {
1042 	struct sock_filter allow_insns[] = {
1043 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1044 	};
1045 	struct sock_filter log_insns[] = {
1046 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1047 			offsetof(struct seccomp_data, nr)),
1048 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1049 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1050 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1051 	};
1052 	struct sock_filter trace_insns[] = {
1053 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1054 			offsetof(struct seccomp_data, nr)),
1055 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1056 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1057 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1058 	};
1059 	struct sock_filter error_insns[] = {
1060 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1061 			offsetof(struct seccomp_data, nr)),
1062 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1063 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1064 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1065 	};
1066 	struct sock_filter trap_insns[] = {
1067 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1068 			offsetof(struct seccomp_data, nr)),
1069 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1070 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1071 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1072 	};
1073 	struct sock_filter kill_insns[] = {
1074 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1075 			offsetof(struct seccomp_data, nr)),
1076 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1077 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1078 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1079 	};
1080 
1081 	memset(self, 0, sizeof(*self));
1082 #define FILTER_ALLOC(_x) \
1083 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1084 	ASSERT_NE(NULL, self->_x.filter); \
1085 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1086 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1087 	FILTER_ALLOC(allow);
1088 	FILTER_ALLOC(log);
1089 	FILTER_ALLOC(trace);
1090 	FILTER_ALLOC(error);
1091 	FILTER_ALLOC(trap);
1092 	FILTER_ALLOC(kill);
1093 }
1094 
FIXTURE_TEARDOWN(precedence)1095 FIXTURE_TEARDOWN(precedence)
1096 {
1097 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1098 	FILTER_FREE(allow);
1099 	FILTER_FREE(log);
1100 	FILTER_FREE(trace);
1101 	FILTER_FREE(error);
1102 	FILTER_FREE(trap);
1103 	FILTER_FREE(kill);
1104 }
1105 
TEST_F(precedence,allow_ok)1106 TEST_F(precedence, allow_ok)
1107 {
1108 	pid_t parent, res = 0;
1109 	long ret;
1110 
1111 	parent = getppid();
1112 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1113 	ASSERT_EQ(0, ret);
1114 
1115 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1116 	ASSERT_EQ(0, ret);
1117 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1118 	ASSERT_EQ(0, ret);
1119 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1120 	ASSERT_EQ(0, ret);
1121 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1122 	ASSERT_EQ(0, ret);
1123 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1124 	ASSERT_EQ(0, ret);
1125 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1126 	ASSERT_EQ(0, ret);
1127 	/* Should work just fine. */
1128 	res = syscall(__NR_getppid);
1129 	EXPECT_EQ(parent, res);
1130 }
1131 
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)1132 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1133 {
1134 	pid_t parent, res = 0;
1135 	long ret;
1136 
1137 	parent = getppid();
1138 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1139 	ASSERT_EQ(0, ret);
1140 
1141 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1142 	ASSERT_EQ(0, ret);
1143 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1144 	ASSERT_EQ(0, ret);
1145 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1146 	ASSERT_EQ(0, ret);
1147 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1148 	ASSERT_EQ(0, ret);
1149 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1150 	ASSERT_EQ(0, ret);
1151 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1152 	ASSERT_EQ(0, ret);
1153 	/* Should work just fine. */
1154 	res = syscall(__NR_getppid);
1155 	EXPECT_EQ(parent, res);
1156 	/* getpid() should never return. */
1157 	res = syscall(__NR_getpid);
1158 	EXPECT_EQ(0, res);
1159 }
1160 
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)1161 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1162 {
1163 	pid_t parent;
1164 	long ret;
1165 
1166 	parent = getppid();
1167 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1168 	ASSERT_EQ(0, ret);
1169 
1170 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1171 	ASSERT_EQ(0, ret);
1172 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1173 	ASSERT_EQ(0, ret);
1174 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1175 	ASSERT_EQ(0, ret);
1176 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1177 	ASSERT_EQ(0, ret);
1178 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1179 	ASSERT_EQ(0, ret);
1180 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1181 	ASSERT_EQ(0, ret);
1182 	/* Should work just fine. */
1183 	EXPECT_EQ(parent, syscall(__NR_getppid));
1184 	/* getpid() should never return. */
1185 	EXPECT_EQ(0, syscall(__NR_getpid));
1186 }
1187 
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)1188 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1189 {
1190 	pid_t parent;
1191 	long ret;
1192 
1193 	parent = getppid();
1194 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1195 	ASSERT_EQ(0, ret);
1196 
1197 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1198 	ASSERT_EQ(0, ret);
1199 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1200 	ASSERT_EQ(0, ret);
1201 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1202 	ASSERT_EQ(0, ret);
1203 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1204 	ASSERT_EQ(0, ret);
1205 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1206 	ASSERT_EQ(0, ret);
1207 	/* Should work just fine. */
1208 	EXPECT_EQ(parent, syscall(__NR_getppid));
1209 	/* getpid() should never return. */
1210 	EXPECT_EQ(0, syscall(__NR_getpid));
1211 }
1212 
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)1213 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1214 {
1215 	pid_t parent;
1216 	long ret;
1217 
1218 	parent = getppid();
1219 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1220 	ASSERT_EQ(0, ret);
1221 
1222 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1223 	ASSERT_EQ(0, ret);
1224 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1225 	ASSERT_EQ(0, ret);
1226 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1227 	ASSERT_EQ(0, ret);
1228 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1229 	ASSERT_EQ(0, ret);
1230 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1231 	ASSERT_EQ(0, ret);
1232 	/* Should work just fine. */
1233 	EXPECT_EQ(parent, syscall(__NR_getppid));
1234 	/* getpid() should never return. */
1235 	EXPECT_EQ(0, syscall(__NR_getpid));
1236 }
1237 
TEST_F(precedence,errno_is_third)1238 TEST_F(precedence, errno_is_third)
1239 {
1240 	pid_t parent;
1241 	long ret;
1242 
1243 	parent = getppid();
1244 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1245 	ASSERT_EQ(0, ret);
1246 
1247 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1248 	ASSERT_EQ(0, ret);
1249 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1250 	ASSERT_EQ(0, ret);
1251 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1252 	ASSERT_EQ(0, ret);
1253 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1254 	ASSERT_EQ(0, ret);
1255 	/* Should work just fine. */
1256 	EXPECT_EQ(parent, syscall(__NR_getppid));
1257 	EXPECT_EQ(0, syscall(__NR_getpid));
1258 }
1259 
TEST_F(precedence,errno_is_third_in_any_order)1260 TEST_F(precedence, errno_is_third_in_any_order)
1261 {
1262 	pid_t parent;
1263 	long ret;
1264 
1265 	parent = getppid();
1266 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1267 	ASSERT_EQ(0, ret);
1268 
1269 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1270 	ASSERT_EQ(0, ret);
1271 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1272 	ASSERT_EQ(0, ret);
1273 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1274 	ASSERT_EQ(0, ret);
1275 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1276 	ASSERT_EQ(0, ret);
1277 	/* Should work just fine. */
1278 	EXPECT_EQ(parent, syscall(__NR_getppid));
1279 	EXPECT_EQ(0, syscall(__NR_getpid));
1280 }
1281 
TEST_F(precedence,trace_is_fourth)1282 TEST_F(precedence, trace_is_fourth)
1283 {
1284 	pid_t parent;
1285 	long ret;
1286 
1287 	parent = getppid();
1288 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1289 	ASSERT_EQ(0, ret);
1290 
1291 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1292 	ASSERT_EQ(0, ret);
1293 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1294 	ASSERT_EQ(0, ret);
1295 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1296 	ASSERT_EQ(0, ret);
1297 	/* Should work just fine. */
1298 	EXPECT_EQ(parent, syscall(__NR_getppid));
1299 	/* No ptracer */
1300 	EXPECT_EQ(-1, syscall(__NR_getpid));
1301 }
1302 
TEST_F(precedence,trace_is_fourth_in_any_order)1303 TEST_F(precedence, trace_is_fourth_in_any_order)
1304 {
1305 	pid_t parent;
1306 	long ret;
1307 
1308 	parent = getppid();
1309 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1310 	ASSERT_EQ(0, ret);
1311 
1312 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1313 	ASSERT_EQ(0, ret);
1314 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1315 	ASSERT_EQ(0, ret);
1316 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1317 	ASSERT_EQ(0, ret);
1318 	/* Should work just fine. */
1319 	EXPECT_EQ(parent, syscall(__NR_getppid));
1320 	/* No ptracer */
1321 	EXPECT_EQ(-1, syscall(__NR_getpid));
1322 }
1323 
1324 /* b/147676645 */
1325 #if 0
1326 TEST_F(precedence, log_is_fifth)
1327 {
1328 	pid_t mypid, parent;
1329 	long ret;
1330 
1331 	mypid = getpid();
1332 	parent = getppid();
1333 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1334 	ASSERT_EQ(0, ret);
1335 
1336 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1337 	ASSERT_EQ(0, ret);
1338 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1339 	ASSERT_EQ(0, ret);
1340 	/* Should work just fine. */
1341 	EXPECT_EQ(parent, syscall(__NR_getppid));
1342 	/* Should also work just fine */
1343 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1344 }
1345 
1346 TEST_F(precedence, log_is_fifth_in_any_order)
1347 {
1348 	pid_t mypid, parent;
1349 	long ret;
1350 
1351 	mypid = getpid();
1352 	parent = getppid();
1353 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1354 	ASSERT_EQ(0, ret);
1355 
1356 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1357 	ASSERT_EQ(0, ret);
1358 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1359 	ASSERT_EQ(0, ret);
1360 	/* Should work just fine. */
1361 	EXPECT_EQ(parent, syscall(__NR_getppid));
1362 	/* Should also work just fine */
1363 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1364 }
1365 #endif
1366 
1367 #ifndef PTRACE_O_TRACESECCOMP
1368 #define PTRACE_O_TRACESECCOMP	0x00000080
1369 #endif
1370 
1371 /* Catch the Ubuntu 12.04 value error. */
1372 #if PTRACE_EVENT_SECCOMP != 7
1373 #undef PTRACE_EVENT_SECCOMP
1374 #endif
1375 
1376 #ifndef PTRACE_EVENT_SECCOMP
1377 #define PTRACE_EVENT_SECCOMP 7
1378 #endif
1379 
1380 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1381 bool tracer_running;
tracer_stop(int sig)1382 void tracer_stop(int sig)
1383 {
1384 	tracer_running = false;
1385 }
1386 
1387 typedef void tracer_func_t(struct __test_metadata *_metadata,
1388 			   pid_t tracee, int status, void *args);
1389 
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1390 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1391 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1392 {
1393 	int ret = -1;
1394 	struct sigaction action = {
1395 		.sa_handler = tracer_stop,
1396 	};
1397 
1398 	/* Allow external shutdown. */
1399 	tracer_running = true;
1400 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1401 
1402 	errno = 0;
1403 	while (ret == -1 && errno != EINVAL)
1404 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1405 	ASSERT_EQ(0, ret) {
1406 		kill(tracee, SIGKILL);
1407 	}
1408 	/* Wait for attach stop */
1409 	wait(NULL);
1410 
1411 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1412 						      PTRACE_O_TRACESYSGOOD :
1413 						      PTRACE_O_TRACESECCOMP);
1414 	ASSERT_EQ(0, ret) {
1415 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1416 		kill(tracee, SIGKILL);
1417 	}
1418 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1419 		     tracee, NULL, 0);
1420 	ASSERT_EQ(0, ret);
1421 
1422 	/* Unblock the tracee */
1423 	ASSERT_EQ(1, write(fd, "A", 1));
1424 	ASSERT_EQ(0, close(fd));
1425 
1426 	/* Run until we're shut down. Must assert to stop execution. */
1427 	while (tracer_running) {
1428 		int status;
1429 
1430 		if (wait(&status) != tracee)
1431 			continue;
1432 		if (WIFSIGNALED(status) || WIFEXITED(status))
1433 			/* Child is dead. Time to go. */
1434 			return;
1435 
1436 		/* Check if this is a seccomp event. */
1437 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1438 
1439 		tracer_func(_metadata, tracee, status, args);
1440 
1441 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1442 			     tracee, NULL, 0);
1443 		ASSERT_EQ(0, ret);
1444 	}
1445 	/* Directly report the status of our test harness results. */
1446 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1447 }
1448 
1449 /* Common tracer setup/teardown functions. */
cont_handler(int num)1450 void cont_handler(int num)
1451 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1452 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1453 			  tracer_func_t func, void *args, bool ptrace_syscall)
1454 {
1455 	char sync;
1456 	int pipefd[2];
1457 	pid_t tracer_pid;
1458 	pid_t tracee = getpid();
1459 
1460 	/* Setup a pipe for clean synchronization. */
1461 	ASSERT_EQ(0, pipe(pipefd));
1462 
1463 	/* Fork a child which we'll promote to tracer */
1464 	tracer_pid = fork();
1465 	ASSERT_LE(0, tracer_pid);
1466 	signal(SIGALRM, cont_handler);
1467 	if (tracer_pid == 0) {
1468 		close(pipefd[0]);
1469 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1470 			     ptrace_syscall);
1471 		syscall(__NR_exit, 0);
1472 	}
1473 	close(pipefd[1]);
1474 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1475 	read(pipefd[0], &sync, 1);
1476 	close(pipefd[0]);
1477 
1478 	return tracer_pid;
1479 }
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1480 void teardown_trace_fixture(struct __test_metadata *_metadata,
1481 			    pid_t tracer)
1482 {
1483 	if (tracer) {
1484 		int status;
1485 		/*
1486 		 * Extract the exit code from the other process and
1487 		 * adopt it for ourselves in case its asserts failed.
1488 		 */
1489 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1490 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1491 		if (WEXITSTATUS(status))
1492 			_metadata->passed = 0;
1493 	}
1494 }
1495 
1496 /* "poke" tracer arguments and function. */
1497 struct tracer_args_poke_t {
1498 	unsigned long poke_addr;
1499 };
1500 
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1501 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1502 		 void *args)
1503 {
1504 	int ret;
1505 	unsigned long msg;
1506 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1507 
1508 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1509 	EXPECT_EQ(0, ret);
1510 	/* If this fails, don't try to recover. */
1511 	ASSERT_EQ(0x1001, msg) {
1512 		kill(tracee, SIGKILL);
1513 	}
1514 	/*
1515 	 * Poke in the message.
1516 	 * Registers are not touched to try to keep this relatively arch
1517 	 * agnostic.
1518 	 */
1519 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1520 	EXPECT_EQ(0, ret);
1521 }
1522 
FIXTURE_DATA(TRACE_poke)1523 FIXTURE_DATA(TRACE_poke) {
1524 	struct sock_fprog prog;
1525 	pid_t tracer;
1526 	long poked;
1527 	struct tracer_args_poke_t tracer_args;
1528 };
1529 
FIXTURE_SETUP(TRACE_poke)1530 FIXTURE_SETUP(TRACE_poke)
1531 {
1532 	struct sock_filter filter[] = {
1533 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1534 			offsetof(struct seccomp_data, nr)),
1535 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1536 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1537 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1538 	};
1539 
1540 	self->poked = 0;
1541 	memset(&self->prog, 0, sizeof(self->prog));
1542 	self->prog.filter = malloc(sizeof(filter));
1543 	ASSERT_NE(NULL, self->prog.filter);
1544 	memcpy(self->prog.filter, filter, sizeof(filter));
1545 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1546 
1547 	/* Set up tracer args. */
1548 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1549 
1550 	/* Launch tracer. */
1551 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1552 					   &self->tracer_args, false);
1553 }
1554 
FIXTURE_TEARDOWN(TRACE_poke)1555 FIXTURE_TEARDOWN(TRACE_poke)
1556 {
1557 	teardown_trace_fixture(_metadata, self->tracer);
1558 	if (self->prog.filter)
1559 		free(self->prog.filter);
1560 }
1561 
TEST_F(TRACE_poke,read_has_side_effects)1562 TEST_F(TRACE_poke, read_has_side_effects)
1563 {
1564 	ssize_t ret;
1565 
1566 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1567 	ASSERT_EQ(0, ret);
1568 
1569 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1570 	ASSERT_EQ(0, ret);
1571 
1572 	EXPECT_EQ(0, self->poked);
1573 	ret = read(-1, NULL, 0);
1574 	EXPECT_EQ(-1, ret);
1575 	EXPECT_EQ(0x1001, self->poked);
1576 }
1577 
TEST_F(TRACE_poke,getpid_runs_normally)1578 TEST_F(TRACE_poke, getpid_runs_normally)
1579 {
1580 	long ret;
1581 
1582 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1583 	ASSERT_EQ(0, ret);
1584 
1585 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1586 	ASSERT_EQ(0, ret);
1587 
1588 	EXPECT_EQ(0, self->poked);
1589 	EXPECT_NE(0, syscall(__NR_getpid));
1590 	EXPECT_EQ(0, self->poked);
1591 }
1592 
1593 #if defined(__x86_64__)
1594 # define ARCH_REGS	struct user_regs_struct
1595 # define SYSCALL_NUM	orig_rax
1596 # define SYSCALL_RET	rax
1597 #elif defined(__i386__)
1598 # define ARCH_REGS	struct user_regs_struct
1599 # define SYSCALL_NUM	orig_eax
1600 # define SYSCALL_RET	eax
1601 #elif defined(__arm__)
1602 # define ARCH_REGS	struct pt_regs
1603 # define SYSCALL_NUM	ARM_r7
1604 # define SYSCALL_RET	ARM_r0
1605 #elif defined(__aarch64__)
1606 # define ARCH_REGS	struct user_pt_regs
1607 # define SYSCALL_NUM	regs[8]
1608 # define SYSCALL_RET	regs[0]
1609 #elif defined(__riscv) && __riscv_xlen == 64
1610 # define ARCH_REGS	struct user_regs_struct
1611 # define SYSCALL_NUM	a7
1612 # define SYSCALL_RET	a0
1613 #elif defined(__hppa__)
1614 # define ARCH_REGS	struct user_regs_struct
1615 # define SYSCALL_NUM	gr[20]
1616 # define SYSCALL_RET	gr[28]
1617 #elif defined(__powerpc__)
1618 # define ARCH_REGS	struct pt_regs
1619 # define SYSCALL_NUM	gpr[0]
1620 # define SYSCALL_RET	gpr[3]
1621 #elif defined(__s390__)
1622 # define ARCH_REGS     s390_regs
1623 # define SYSCALL_NUM   gprs[2]
1624 # define SYSCALL_RET   gprs[2]
1625 #elif defined(__mips__)
1626 # define ARCH_REGS	struct pt_regs
1627 # define SYSCALL_NUM	regs[2]
1628 # define SYSCALL_SYSCALL_NUM regs[4]
1629 # define SYSCALL_RET	regs[2]
1630 # define SYSCALL_NUM_RET_SHARE_REG
1631 #else
1632 # error "Do not know how to find your architecture's registers and syscalls"
1633 #endif
1634 
1635 /* When the syscall return can't be changed, stub out the tests for it. */
1636 #ifdef SYSCALL_NUM_RET_SHARE_REG
1637 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1638 #else
1639 # define EXPECT_SYSCALL_RETURN(val, action)		\
1640 	do {						\
1641 		errno = 0;				\
1642 		if (val < 0) {				\
1643 			EXPECT_EQ(-1, action);		\
1644 			EXPECT_EQ(-(val), errno);	\
1645 		} else {				\
1646 			EXPECT_EQ(val, action);		\
1647 		}					\
1648 	} while (0)
1649 #endif
1650 
1651 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1652  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1653  */
1654 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1655 #define HAVE_GETREGS
1656 #endif
1657 
1658 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1659 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1660 {
1661 	ARCH_REGS regs;
1662 #ifdef HAVE_GETREGS
1663 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1664 		TH_LOG("PTRACE_GETREGS failed");
1665 		return -1;
1666 	}
1667 #else
1668 	struct iovec iov;
1669 
1670 	iov.iov_base = &regs;
1671 	iov.iov_len = sizeof(regs);
1672 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1673 		TH_LOG("PTRACE_GETREGSET failed");
1674 		return -1;
1675 	}
1676 #endif
1677 
1678 #if defined(__mips__)
1679 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1680 		return regs.SYSCALL_SYSCALL_NUM;
1681 #endif
1682 	return regs.SYSCALL_NUM;
1683 }
1684 
1685 /* Architecture-specific syscall changing routine. */
change_syscall(struct __test_metadata * _metadata,pid_t tracee,int syscall,int result)1686 void change_syscall(struct __test_metadata *_metadata,
1687 		    pid_t tracee, int syscall, int result)
1688 {
1689 	int ret;
1690 	ARCH_REGS regs;
1691 #ifdef HAVE_GETREGS
1692 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1693 #else
1694 	struct iovec iov;
1695 	iov.iov_base = &regs;
1696 	iov.iov_len = sizeof(regs);
1697 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1698 #endif
1699 	EXPECT_EQ(0, ret) {}
1700 
1701 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1702 	defined(__s390__) || defined(__hppa__) || defined(__riscv)
1703 	{
1704 		regs.SYSCALL_NUM = syscall;
1705 	}
1706 #elif defined(__mips__)
1707 	{
1708 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1709 			regs.SYSCALL_SYSCALL_NUM = syscall;
1710 		else
1711 			regs.SYSCALL_NUM = syscall;
1712 	}
1713 
1714 #elif defined(__arm__)
1715 # ifndef PTRACE_SET_SYSCALL
1716 #  define PTRACE_SET_SYSCALL   23
1717 # endif
1718 	{
1719 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1720 		EXPECT_EQ(0, ret);
1721 	}
1722 
1723 #elif defined(__aarch64__)
1724 # ifndef NT_ARM_SYSTEM_CALL
1725 #  define NT_ARM_SYSTEM_CALL 0x404
1726 # endif
1727 	{
1728 		iov.iov_base = &syscall;
1729 		iov.iov_len = sizeof(syscall);
1730 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1731 			     &iov);
1732 		EXPECT_EQ(0, ret);
1733 	}
1734 
1735 #else
1736 	ASSERT_EQ(1, 0) {
1737 		TH_LOG("How is the syscall changed on this architecture?");
1738 	}
1739 #endif
1740 
1741 	/* If syscall is skipped, change return value. */
1742 	if (syscall == -1)
1743 #ifdef SYSCALL_NUM_RET_SHARE_REG
1744 		TH_LOG("Can't modify syscall return on this architecture");
1745 #else
1746 		regs.SYSCALL_RET = result;
1747 #endif
1748 
1749 #ifdef HAVE_GETREGS
1750 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1751 #else
1752 	iov.iov_base = &regs;
1753 	iov.iov_len = sizeof(regs);
1754 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1755 #endif
1756 	EXPECT_EQ(0, ret);
1757 }
1758 
tracer_syscall(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1759 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1760 		    int status, void *args)
1761 {
1762 	int ret;
1763 	unsigned long msg;
1764 
1765 	/* Make sure we got the right message. */
1766 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1767 	EXPECT_EQ(0, ret);
1768 
1769 	/* Validate and take action on expected syscalls. */
1770 	switch (msg) {
1771 	case 0x1002:
1772 		/* change getpid to getppid. */
1773 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1774 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1775 		break;
1776 	case 0x1003:
1777 		/* skip gettid with valid return code. */
1778 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1779 		change_syscall(_metadata, tracee, -1, 45000);
1780 		break;
1781 	case 0x1004:
1782 		/* skip openat with error. */
1783 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1784 		change_syscall(_metadata, tracee, -1, -ESRCH);
1785 		break;
1786 	case 0x1005:
1787 		/* do nothing (allow getppid) */
1788 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1789 		break;
1790 	default:
1791 		EXPECT_EQ(0, msg) {
1792 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1793 			kill(tracee, SIGKILL);
1794 		}
1795 	}
1796 
1797 }
1798 
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1799 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1800 		   int status, void *args)
1801 {
1802 	int ret, nr;
1803 	unsigned long msg;
1804 	static bool entry;
1805 
1806 	/*
1807 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
1808 	 * is by counting.
1809 	 */
1810 	entry = !entry;
1811 
1812 	/* Make sure we got an appropriate message. */
1813 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1814 	EXPECT_EQ(0, ret);
1815 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1816 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1817 
1818 	if (!entry)
1819 		return;
1820 
1821 	nr = get_syscall(_metadata, tracee);
1822 
1823 	if (nr == __NR_getpid)
1824 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1825 	if (nr == __NR_gettid)
1826 		change_syscall(_metadata, tracee, -1, 45000);
1827 	if (nr == __NR_openat)
1828 		change_syscall(_metadata, tracee, -1, -ESRCH);
1829 }
1830 
FIXTURE_DATA(TRACE_syscall)1831 FIXTURE_DATA(TRACE_syscall) {
1832 	struct sock_fprog prog;
1833 	pid_t tracer, mytid, mypid, parent;
1834 };
1835 
FIXTURE_SETUP(TRACE_syscall)1836 FIXTURE_SETUP(TRACE_syscall)
1837 {
1838 	struct sock_filter filter[] = {
1839 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1840 			offsetof(struct seccomp_data, nr)),
1841 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1842 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1843 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1844 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1845 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1846 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1847 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1848 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1849 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1850 	};
1851 
1852 	memset(&self->prog, 0, sizeof(self->prog));
1853 	self->prog.filter = malloc(sizeof(filter));
1854 	ASSERT_NE(NULL, self->prog.filter);
1855 	memcpy(self->prog.filter, filter, sizeof(filter));
1856 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1857 
1858 	/* Prepare some testable syscall results. */
1859 	self->mytid = syscall(__NR_gettid);
1860 	ASSERT_GT(self->mytid, 0);
1861 	ASSERT_NE(self->mytid, 1) {
1862 		TH_LOG("Running this test as init is not supported. :)");
1863 	}
1864 
1865 	self->mypid = getpid();
1866 	ASSERT_GT(self->mypid, 0);
1867 	ASSERT_EQ(self->mytid, self->mypid);
1868 
1869 	self->parent = getppid();
1870 	ASSERT_GT(self->parent, 0);
1871 	ASSERT_NE(self->parent, self->mypid);
1872 
1873 	/* Launch tracer. */
1874 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1875 					   false);
1876 }
1877 
FIXTURE_TEARDOWN(TRACE_syscall)1878 FIXTURE_TEARDOWN(TRACE_syscall)
1879 {
1880 	teardown_trace_fixture(_metadata, self->tracer);
1881 	if (self->prog.filter)
1882 		free(self->prog.filter);
1883 }
1884 
1885 /* b/147676645 */
1886 #if 0
1887 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1888 {
1889 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1890 	teardown_trace_fixture(_metadata, self->tracer);
1891 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1892 					   true);
1893 
1894 	/* Tracer will redirect getpid to getppid. */
1895 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1896 }
1897 
1898 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1899 {
1900 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1901 	teardown_trace_fixture(_metadata, self->tracer);
1902 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1903 					   true);
1904 
1905 	/* Tracer should skip the open syscall, resulting in ESRCH. */
1906 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1907 }
1908 
1909 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1910 {
1911 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1912 	teardown_trace_fixture(_metadata, self->tracer);
1913 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1914 					   true);
1915 
1916 	/* Tracer should skip the gettid syscall, resulting fake pid. */
1917 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1918 }
1919 #endif
1920 
TEST_F(TRACE_syscall,syscall_allowed)1921 TEST_F(TRACE_syscall, syscall_allowed)
1922 {
1923 	long ret;
1924 
1925 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1926 	ASSERT_EQ(0, ret);
1927 
1928 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1929 	ASSERT_EQ(0, ret);
1930 
1931 	/* getppid works as expected (no changes). */
1932 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1933 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1934 }
1935 
TEST_F(TRACE_syscall,syscall_redirected)1936 TEST_F(TRACE_syscall, syscall_redirected)
1937 {
1938 	long ret;
1939 
1940 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1941 	ASSERT_EQ(0, ret);
1942 
1943 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1944 	ASSERT_EQ(0, ret);
1945 
1946 	/* getpid has been redirected to getppid as expected. */
1947 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1948 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1949 }
1950 
TEST_F(TRACE_syscall,syscall_errno)1951 TEST_F(TRACE_syscall, syscall_errno)
1952 {
1953 	long ret;
1954 
1955 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1956 	ASSERT_EQ(0, ret);
1957 
1958 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1959 	ASSERT_EQ(0, ret);
1960 
1961 	/* openat has been skipped and an errno return. */
1962 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1963 }
1964 
TEST_F(TRACE_syscall,syscall_faked)1965 TEST_F(TRACE_syscall, syscall_faked)
1966 {
1967 	long ret;
1968 
1969 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1970 	ASSERT_EQ(0, ret);
1971 
1972 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1973 	ASSERT_EQ(0, ret);
1974 
1975 	/* gettid has been skipped and an altered return value stored. */
1976 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1977 }
1978 
1979 /*
1980  * TODO: b/33027081
1981  * These tests do not work on kernels prior to 4.8.
1982  */
1983 #ifndef __ANDROID__
TEST_F(TRACE_syscall,skip_after_RET_TRACE)1984 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1985 {
1986 	struct sock_filter filter[] = {
1987 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1988 			offsetof(struct seccomp_data, nr)),
1989 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1990 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1991 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1992 	};
1993 	struct sock_fprog prog = {
1994 		.len = (unsigned short)ARRAY_SIZE(filter),
1995 		.filter = filter,
1996 	};
1997 	long ret;
1998 
1999 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2000 	ASSERT_EQ(0, ret);
2001 
2002 	/* Install fixture filter. */
2003 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2004 	ASSERT_EQ(0, ret);
2005 
2006 	/* Install "errno on getppid" filter. */
2007 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2008 	ASSERT_EQ(0, ret);
2009 
2010 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2011 	errno = 0;
2012 	EXPECT_EQ(-1, syscall(__NR_getpid));
2013 	EXPECT_EQ(EPERM, errno);
2014 }
2015 
TEST_F_SIGNAL(TRACE_syscall,kill_after_RET_TRACE,SIGSYS)2016 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
2017 {
2018 	struct sock_filter filter[] = {
2019 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2020 			offsetof(struct seccomp_data, nr)),
2021 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2022 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2023 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2024 	};
2025 	struct sock_fprog prog = {
2026 		.len = (unsigned short)ARRAY_SIZE(filter),
2027 		.filter = filter,
2028 	};
2029 	long ret;
2030 
2031 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2032 	ASSERT_EQ(0, ret);
2033 
2034 	/* Install fixture filter. */
2035 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2036 	ASSERT_EQ(0, ret);
2037 
2038 	/* Install "death on getppid" filter. */
2039 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2040 	ASSERT_EQ(0, ret);
2041 
2042 	/* Tracer will redirect getpid to getppid, and we should die. */
2043 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2044 }
2045 
TEST_F(TRACE_syscall,skip_after_ptrace)2046 TEST_F(TRACE_syscall, skip_after_ptrace)
2047 {
2048 	struct sock_filter filter[] = {
2049 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2050 			offsetof(struct seccomp_data, nr)),
2051 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2052 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2053 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2054 	};
2055 	struct sock_fprog prog = {
2056 		.len = (unsigned short)ARRAY_SIZE(filter),
2057 		.filter = filter,
2058 	};
2059 	long ret;
2060 
2061 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2062 	teardown_trace_fixture(_metadata, self->tracer);
2063 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2064 					   true);
2065 
2066 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2067 	ASSERT_EQ(0, ret);
2068 
2069 	/* Install "errno on getppid" filter. */
2070 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2071 	ASSERT_EQ(0, ret);
2072 
2073 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2074 	EXPECT_EQ(-1, syscall(__NR_getpid));
2075 	EXPECT_EQ(EPERM, errno);
2076 }
2077 
TEST_F_SIGNAL(TRACE_syscall,kill_after_ptrace,SIGSYS)2078 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2079 {
2080 	struct sock_filter filter[] = {
2081 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2082 			offsetof(struct seccomp_data, nr)),
2083 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2084 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2085 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2086 	};
2087 	struct sock_fprog prog = {
2088 		.len = (unsigned short)ARRAY_SIZE(filter),
2089 		.filter = filter,
2090 	};
2091 	long ret;
2092 
2093 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2094 	teardown_trace_fixture(_metadata, self->tracer);
2095 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2096 					   true);
2097 
2098 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2099 	ASSERT_EQ(0, ret);
2100 
2101 	/* Install "death on getppid" filter. */
2102 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2103 	ASSERT_EQ(0, ret);
2104 
2105 	/* Tracer will redirect getpid to getppid, and we should die. */
2106 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2107 }
2108 #endif
2109 
TEST(seccomp_syscall)2110 TEST(seccomp_syscall)
2111 {
2112 	struct sock_filter filter[] = {
2113 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2114 	};
2115 	struct sock_fprog prog = {
2116 		.len = (unsigned short)ARRAY_SIZE(filter),
2117 		.filter = filter,
2118 	};
2119 	long ret;
2120 
2121 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2122 	ASSERT_EQ(0, ret) {
2123 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2124 	}
2125 
2126 	/* Reject insane operation. */
2127 	ret = seccomp(-1, 0, &prog);
2128 	ASSERT_NE(ENOSYS, errno) {
2129 		TH_LOG("Kernel does not support seccomp syscall!");
2130 	}
2131 	EXPECT_EQ(EINVAL, errno) {
2132 		TH_LOG("Did not reject crazy op value!");
2133 	}
2134 
2135 	/* Reject strict with flags or pointer. */
2136 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2137 	EXPECT_EQ(EINVAL, errno) {
2138 		TH_LOG("Did not reject mode strict with flags!");
2139 	}
2140 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2141 	EXPECT_EQ(EINVAL, errno) {
2142 		TH_LOG("Did not reject mode strict with uargs!");
2143 	}
2144 
2145 	/* Reject insane args for filter. */
2146 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2147 	EXPECT_EQ(EINVAL, errno) {
2148 		TH_LOG("Did not reject crazy filter flags!");
2149 	}
2150 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2151 	EXPECT_EQ(EFAULT, errno) {
2152 		TH_LOG("Did not reject NULL filter!");
2153 	}
2154 
2155 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2156 	EXPECT_EQ(0, errno) {
2157 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2158 			strerror(errno));
2159 	}
2160 }
2161 
TEST(seccomp_syscall_mode_lock)2162 TEST(seccomp_syscall_mode_lock)
2163 {
2164 	struct sock_filter filter[] = {
2165 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2166 	};
2167 	struct sock_fprog prog = {
2168 		.len = (unsigned short)ARRAY_SIZE(filter),
2169 		.filter = filter,
2170 	};
2171 	long ret;
2172 
2173 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2174 	ASSERT_EQ(0, ret) {
2175 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2176 	}
2177 
2178 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2179 	ASSERT_NE(ENOSYS, errno) {
2180 		TH_LOG("Kernel does not support seccomp syscall!");
2181 	}
2182 	EXPECT_EQ(0, ret) {
2183 		TH_LOG("Could not install filter!");
2184 	}
2185 
2186 	/* Make sure neither entry point will switch to strict. */
2187 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2188 	EXPECT_EQ(EINVAL, errno) {
2189 		TH_LOG("Switched to mode strict!");
2190 	}
2191 
2192 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2193 	EXPECT_EQ(EINVAL, errno) {
2194 		TH_LOG("Switched to mode strict!");
2195 	}
2196 }
2197 
2198 /* b/147676645 */
2199 #if 0
2200 /*
2201  * Test detection of known and unknown filter flags. Userspace needs to be able
2202  * to check if a filter flag is supported by the current kernel and a good way
2203  * of doing that is by attempting to enter filter mode, with the flag bit in
2204  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2205  * that the flag is valid and EINVAL indicates that the flag is invalid.
2206  */
2207 TEST(detect_seccomp_filter_flags)
2208 {
2209 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2210 				 SECCOMP_FILTER_FLAG_LOG,
2211 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2212 				 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2213 	unsigned int exclusive[] = {
2214 				SECCOMP_FILTER_FLAG_TSYNC,
2215 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2216 	unsigned int flag, all_flags, exclusive_mask;
2217 	int i;
2218 	long ret;
2219 
2220 	/* Test detection of individual known-good filter flags */
2221 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2222 		int bits = 0;
2223 
2224 		flag = flags[i];
2225 		/* Make sure the flag is a single bit! */
2226 		while (flag) {
2227 			if (flag & 0x1)
2228 				bits ++;
2229 			flag >>= 1;
2230 		}
2231 		ASSERT_EQ(1, bits);
2232 		flag = flags[i];
2233 
2234 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2235 		ASSERT_NE(ENOSYS, errno) {
2236 			TH_LOG("Kernel does not support seccomp syscall!");
2237 		}
2238 		EXPECT_EQ(-1, ret);
2239 		EXPECT_EQ(EFAULT, errno) {
2240 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2241 			       flag);
2242 		}
2243 
2244 		all_flags |= flag;
2245 	}
2246 
2247 	/*
2248 	 * Test detection of all known-good filter flags combined. But
2249 	 * for the exclusive flags we need to mask them out and try them
2250 	 * individually for the "all flags" testing.
2251 	 */
2252 	exclusive_mask = 0;
2253 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2254 		exclusive_mask |= exclusive[i];
2255 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2256 		flag = all_flags & ~exclusive_mask;
2257 		flag |= exclusive[i];
2258 
2259 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2260 		EXPECT_EQ(-1, ret);
2261 		EXPECT_EQ(EFAULT, errno) {
2262 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2263 			       flag);
2264 		}
2265 	}
2266 
2267 	/* Test detection of an unknown filter flags, without exclusives. */
2268 	flag = -1;
2269 	flag &= ~exclusive_mask;
2270 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2271 	EXPECT_EQ(-1, ret);
2272 	EXPECT_EQ(EINVAL, errno) {
2273 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2274 		       flag);
2275 	}
2276 
2277 	/*
2278 	 * Test detection of an unknown filter flag that may simply need to be
2279 	 * added to this test
2280 	 */
2281 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2282 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2283 	EXPECT_EQ(-1, ret);
2284 	EXPECT_EQ(EINVAL, errno) {
2285 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2286 		       flag);
2287 	}
2288 }
2289 #endif
2290 
TEST(TSYNC_first)2291 TEST(TSYNC_first)
2292 {
2293 	struct sock_filter filter[] = {
2294 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2295 	};
2296 	struct sock_fprog prog = {
2297 		.len = (unsigned short)ARRAY_SIZE(filter),
2298 		.filter = filter,
2299 	};
2300 	long ret;
2301 
2302 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2303 	ASSERT_EQ(0, ret) {
2304 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2305 	}
2306 
2307 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2308 		      &prog);
2309 	ASSERT_NE(ENOSYS, errno) {
2310 		TH_LOG("Kernel does not support seccomp syscall!");
2311 	}
2312 	EXPECT_EQ(0, ret) {
2313 		TH_LOG("Could not install initial filter with TSYNC!");
2314 	}
2315 }
2316 
2317 #define TSYNC_SIBLINGS 2
2318 struct tsync_sibling {
2319 	pthread_t tid;
2320 	pid_t system_tid;
2321 	sem_t *started;
2322 	pthread_cond_t *cond;
2323 	pthread_mutex_t *mutex;
2324 	int diverge;
2325 	int num_waits;
2326 	struct sock_fprog *prog;
2327 	struct __test_metadata *metadata;
2328 };
2329 
2330 /*
2331  * To avoid joining joined threads (which is not allowed by Bionic),
2332  * make sure we both successfully join and clear the tid to skip a
2333  * later join attempt during fixture teardown. Any remaining threads
2334  * will be directly killed during teardown.
2335  */
2336 #define PTHREAD_JOIN(tid, status)					\
2337 	do {								\
2338 		int _rc = pthread_join(tid, status);			\
2339 		if (_rc) {						\
2340 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2341 				(unsigned int)tid, _rc);		\
2342 		} else {						\
2343 			tid = 0;					\
2344 		}							\
2345 	} while (0)
2346 
FIXTURE_DATA(TSYNC)2347 FIXTURE_DATA(TSYNC) {
2348 	struct sock_fprog root_prog, apply_prog;
2349 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2350 	sem_t started;
2351 	pthread_cond_t cond;
2352 	pthread_mutex_t mutex;
2353 	int sibling_count;
2354 };
2355 
FIXTURE_SETUP(TSYNC)2356 FIXTURE_SETUP(TSYNC)
2357 {
2358 	struct sock_filter root_filter[] = {
2359 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2360 	};
2361 	struct sock_filter apply_filter[] = {
2362 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2363 			offsetof(struct seccomp_data, nr)),
2364 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2365 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2366 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2367 	};
2368 
2369 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2370 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2371 	memset(&self->sibling, 0, sizeof(self->sibling));
2372 	self->root_prog.filter = malloc(sizeof(root_filter));
2373 	ASSERT_NE(NULL, self->root_prog.filter);
2374 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2375 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2376 
2377 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2378 	ASSERT_NE(NULL, self->apply_prog.filter);
2379 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2380 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2381 
2382 	self->sibling_count = 0;
2383 	pthread_mutex_init(&self->mutex, NULL);
2384 	pthread_cond_init(&self->cond, NULL);
2385 	sem_init(&self->started, 0, 0);
2386 	self->sibling[0].tid = 0;
2387 	self->sibling[0].cond = &self->cond;
2388 	self->sibling[0].started = &self->started;
2389 	self->sibling[0].mutex = &self->mutex;
2390 	self->sibling[0].diverge = 0;
2391 	self->sibling[0].num_waits = 1;
2392 	self->sibling[0].prog = &self->root_prog;
2393 	self->sibling[0].metadata = _metadata;
2394 	self->sibling[1].tid = 0;
2395 	self->sibling[1].cond = &self->cond;
2396 	self->sibling[1].started = &self->started;
2397 	self->sibling[1].mutex = &self->mutex;
2398 	self->sibling[1].diverge = 0;
2399 	self->sibling[1].prog = &self->root_prog;
2400 	self->sibling[1].num_waits = 1;
2401 	self->sibling[1].metadata = _metadata;
2402 }
2403 
FIXTURE_TEARDOWN(TSYNC)2404 FIXTURE_TEARDOWN(TSYNC)
2405 {
2406 	int sib = 0;
2407 
2408 	if (self->root_prog.filter)
2409 		free(self->root_prog.filter);
2410 	if (self->apply_prog.filter)
2411 		free(self->apply_prog.filter);
2412 
2413 	for ( ; sib < self->sibling_count; ++sib) {
2414 		struct tsync_sibling *s = &self->sibling[sib];
2415 
2416 		if (!s->tid)
2417 			continue;
2418 		/*
2419 		 * If a thread is still running, it may be stuck, so hit
2420 		 * it over the head really hard.
2421 		 */
2422 		pthread_kill(s->tid, 9);
2423 	}
2424 	pthread_mutex_destroy(&self->mutex);
2425 	pthread_cond_destroy(&self->cond);
2426 	sem_destroy(&self->started);
2427 }
2428 
tsync_sibling(void * data)2429 void *tsync_sibling(void *data)
2430 {
2431 	long ret = 0;
2432 	struct tsync_sibling *me = data;
2433 
2434 	me->system_tid = syscall(__NR_gettid);
2435 
2436 	pthread_mutex_lock(me->mutex);
2437 	if (me->diverge) {
2438 		/* Just re-apply the root prog to fork the tree */
2439 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2440 				me->prog, 0, 0);
2441 	}
2442 	sem_post(me->started);
2443 	/* Return outside of started so parent notices failures. */
2444 	if (ret) {
2445 		pthread_mutex_unlock(me->mutex);
2446 		return (void *)SIBLING_EXIT_FAILURE;
2447 	}
2448 	do {
2449 		pthread_cond_wait(me->cond, me->mutex);
2450 		me->num_waits = me->num_waits - 1;
2451 	} while (me->num_waits);
2452 	pthread_mutex_unlock(me->mutex);
2453 
2454 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2455 	if (!ret)
2456 		return (void *)SIBLING_EXIT_NEWPRIVS;
2457 	read(0, NULL, 0);
2458 	return (void *)SIBLING_EXIT_UNKILLED;
2459 }
2460 
tsync_start_sibling(struct tsync_sibling * sibling)2461 void tsync_start_sibling(struct tsync_sibling *sibling)
2462 {
2463 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2464 }
2465 
TEST_F(TSYNC,siblings_fail_prctl)2466 TEST_F(TSYNC, siblings_fail_prctl)
2467 {
2468 	long ret;
2469 	void *status;
2470 	struct sock_filter filter[] = {
2471 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2472 			offsetof(struct seccomp_data, nr)),
2473 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2474 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2475 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2476 	};
2477 	struct sock_fprog prog = {
2478 		.len = (unsigned short)ARRAY_SIZE(filter),
2479 		.filter = filter,
2480 	};
2481 
2482 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2483 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2484 	}
2485 
2486 	/* Check prctl failure detection by requesting sib 0 diverge. */
2487 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2488 	ASSERT_NE(ENOSYS, errno) {
2489 		TH_LOG("Kernel does not support seccomp syscall!");
2490 	}
2491 	ASSERT_EQ(0, ret) {
2492 		TH_LOG("setting filter failed");
2493 	}
2494 
2495 	self->sibling[0].diverge = 1;
2496 	tsync_start_sibling(&self->sibling[0]);
2497 	tsync_start_sibling(&self->sibling[1]);
2498 
2499 	while (self->sibling_count < TSYNC_SIBLINGS) {
2500 		sem_wait(&self->started);
2501 		self->sibling_count++;
2502 	}
2503 
2504 	/* Signal the threads to clean up*/
2505 	pthread_mutex_lock(&self->mutex);
2506 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2507 		TH_LOG("cond broadcast non-zero");
2508 	}
2509 	pthread_mutex_unlock(&self->mutex);
2510 
2511 	/* Ensure diverging sibling failed to call prctl. */
2512 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2513 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2514 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2515 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2516 }
2517 
TEST_F(TSYNC,two_siblings_with_ancestor)2518 TEST_F(TSYNC, two_siblings_with_ancestor)
2519 {
2520 	long ret;
2521 	void *status;
2522 
2523 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2524 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2525 	}
2526 
2527 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2528 	ASSERT_NE(ENOSYS, errno) {
2529 		TH_LOG("Kernel does not support seccomp syscall!");
2530 	}
2531 	ASSERT_EQ(0, ret) {
2532 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2533 	}
2534 	tsync_start_sibling(&self->sibling[0]);
2535 	tsync_start_sibling(&self->sibling[1]);
2536 
2537 	while (self->sibling_count < TSYNC_SIBLINGS) {
2538 		sem_wait(&self->started);
2539 		self->sibling_count++;
2540 	}
2541 
2542 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2543 		      &self->apply_prog);
2544 	ASSERT_EQ(0, ret) {
2545 		TH_LOG("Could install filter on all threads!");
2546 	}
2547 	/* Tell the siblings to test the policy */
2548 	pthread_mutex_lock(&self->mutex);
2549 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2550 		TH_LOG("cond broadcast non-zero");
2551 	}
2552 	pthread_mutex_unlock(&self->mutex);
2553 	/* Ensure they are both killed and don't exit cleanly. */
2554 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2555 	EXPECT_EQ(0x0, (long)status);
2556 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2557 	EXPECT_EQ(0x0, (long)status);
2558 }
2559 
TEST_F(TSYNC,two_sibling_want_nnp)2560 TEST_F(TSYNC, two_sibling_want_nnp)
2561 {
2562 	void *status;
2563 
2564 	/* start siblings before any prctl() operations */
2565 	tsync_start_sibling(&self->sibling[0]);
2566 	tsync_start_sibling(&self->sibling[1]);
2567 	while (self->sibling_count < TSYNC_SIBLINGS) {
2568 		sem_wait(&self->started);
2569 		self->sibling_count++;
2570 	}
2571 
2572 	/* Tell the siblings to test no policy */
2573 	pthread_mutex_lock(&self->mutex);
2574 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2575 		TH_LOG("cond broadcast non-zero");
2576 	}
2577 	pthread_mutex_unlock(&self->mutex);
2578 
2579 	/* Ensure they are both upset about lacking nnp. */
2580 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2581 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2582 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2583 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2584 }
2585 
TEST_F(TSYNC,two_siblings_with_no_filter)2586 TEST_F(TSYNC, two_siblings_with_no_filter)
2587 {
2588 	long ret;
2589 	void *status;
2590 
2591 	/* start siblings before any prctl() operations */
2592 	tsync_start_sibling(&self->sibling[0]);
2593 	tsync_start_sibling(&self->sibling[1]);
2594 	while (self->sibling_count < TSYNC_SIBLINGS) {
2595 		sem_wait(&self->started);
2596 		self->sibling_count++;
2597 	}
2598 
2599 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2600 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2601 	}
2602 
2603 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2604 		      &self->apply_prog);
2605 	ASSERT_NE(ENOSYS, errno) {
2606 		TH_LOG("Kernel does not support seccomp syscall!");
2607 	}
2608 	ASSERT_EQ(0, ret) {
2609 		TH_LOG("Could install filter on all threads!");
2610 	}
2611 
2612 	/* Tell the siblings to test the policy */
2613 	pthread_mutex_lock(&self->mutex);
2614 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2615 		TH_LOG("cond broadcast non-zero");
2616 	}
2617 	pthread_mutex_unlock(&self->mutex);
2618 
2619 	/* Ensure they are both killed and don't exit cleanly. */
2620 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2621 	EXPECT_EQ(0x0, (long)status);
2622 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2623 	EXPECT_EQ(0x0, (long)status);
2624 }
2625 
TEST_F(TSYNC,two_siblings_with_one_divergence)2626 TEST_F(TSYNC, two_siblings_with_one_divergence)
2627 {
2628 	long ret;
2629 	void *status;
2630 
2631 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2632 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2633 	}
2634 
2635 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2636 	ASSERT_NE(ENOSYS, errno) {
2637 		TH_LOG("Kernel does not support seccomp syscall!");
2638 	}
2639 	ASSERT_EQ(0, ret) {
2640 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2641 	}
2642 	self->sibling[0].diverge = 1;
2643 	tsync_start_sibling(&self->sibling[0]);
2644 	tsync_start_sibling(&self->sibling[1]);
2645 
2646 	while (self->sibling_count < TSYNC_SIBLINGS) {
2647 		sem_wait(&self->started);
2648 		self->sibling_count++;
2649 	}
2650 
2651 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2652 		      &self->apply_prog);
2653 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2654 		TH_LOG("Did not fail on diverged sibling.");
2655 	}
2656 
2657 	/* Wake the threads */
2658 	pthread_mutex_lock(&self->mutex);
2659 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2660 		TH_LOG("cond broadcast non-zero");
2661 	}
2662 	pthread_mutex_unlock(&self->mutex);
2663 
2664 	/* Ensure they are both unkilled. */
2665 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2666 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2667 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2668 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2669 }
2670 
TEST_F(TSYNC,two_siblings_not_under_filter)2671 TEST_F(TSYNC, two_siblings_not_under_filter)
2672 {
2673 	long ret, sib;
2674 	void *status;
2675 	struct timespec delay = { .tv_nsec = 100000000 };
2676 
2677 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2678 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2679 	}
2680 
2681 	/*
2682 	 * Sibling 0 will have its own seccomp policy
2683 	 * and Sibling 1 will not be under seccomp at
2684 	 * all. Sibling 1 will enter seccomp and 0
2685 	 * will cause failure.
2686 	 */
2687 	self->sibling[0].diverge = 1;
2688 	tsync_start_sibling(&self->sibling[0]);
2689 	tsync_start_sibling(&self->sibling[1]);
2690 
2691 	while (self->sibling_count < TSYNC_SIBLINGS) {
2692 		sem_wait(&self->started);
2693 		self->sibling_count++;
2694 	}
2695 
2696 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2697 	ASSERT_NE(ENOSYS, errno) {
2698 		TH_LOG("Kernel does not support seccomp syscall!");
2699 	}
2700 	ASSERT_EQ(0, ret) {
2701 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2702 	}
2703 
2704 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2705 		      &self->apply_prog);
2706 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2707 		TH_LOG("Did not fail on diverged sibling.");
2708 	}
2709 	sib = 1;
2710 	if (ret == self->sibling[0].system_tid)
2711 		sib = 0;
2712 
2713 	pthread_mutex_lock(&self->mutex);
2714 
2715 	/* Increment the other siblings num_waits so we can clean up
2716 	 * the one we just saw.
2717 	 */
2718 	self->sibling[!sib].num_waits += 1;
2719 
2720 	/* Signal the thread to clean up*/
2721 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2722 		TH_LOG("cond broadcast non-zero");
2723 	}
2724 	pthread_mutex_unlock(&self->mutex);
2725 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2726 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2727 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2728 	while (!kill(self->sibling[sib].system_tid, 0))
2729 		nanosleep(&delay, NULL);
2730 	/* Switch to the remaining sibling */
2731 	sib = !sib;
2732 
2733 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2734 		      &self->apply_prog);
2735 	ASSERT_EQ(0, ret) {
2736 		TH_LOG("Expected the remaining sibling to sync");
2737 	};
2738 
2739 	pthread_mutex_lock(&self->mutex);
2740 
2741 	/* If remaining sibling didn't have a chance to wake up during
2742 	 * the first broadcast, manually reduce the num_waits now.
2743 	 */
2744 	if (self->sibling[sib].num_waits > 1)
2745 		self->sibling[sib].num_waits = 1;
2746 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2747 		TH_LOG("cond broadcast non-zero");
2748 	}
2749 	pthread_mutex_unlock(&self->mutex);
2750 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2751 	EXPECT_EQ(0, (long)status);
2752 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2753 	while (!kill(self->sibling[sib].system_tid, 0))
2754 		nanosleep(&delay, NULL);
2755 
2756 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2757 		      &self->apply_prog);
2758 	ASSERT_EQ(0, ret);  /* just us chickens */
2759 }
2760 
2761 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)2762 TEST(syscall_restart)
2763 {
2764 	long ret;
2765 	unsigned long msg;
2766 	pid_t child_pid;
2767 	int pipefd[2];
2768 	int status;
2769 	siginfo_t info = { };
2770 	struct sock_filter filter[] = {
2771 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2772 			 offsetof(struct seccomp_data, nr)),
2773 
2774 #ifdef __NR_sigreturn
2775 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2776 #endif
2777 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2778 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2779 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2780 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2781 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2782 
2783 		/* Allow __NR_write for easy logging. */
2784 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2785 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2786 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2787 		/* The nanosleep jump target. */
2788 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2789 		/* The restart_syscall jump target. */
2790 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2791 	};
2792 	struct sock_fprog prog = {
2793 		.len = (unsigned short)ARRAY_SIZE(filter),
2794 		.filter = filter,
2795 	};
2796 #if defined(__arm__)
2797 	struct utsname utsbuf;
2798 	int arm_version;
2799 #endif
2800 
2801 	ASSERT_EQ(0, pipe(pipefd));
2802 
2803 	child_pid = fork();
2804 	ASSERT_LE(0, child_pid);
2805 	if (child_pid == 0) {
2806 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2807 		char buf = ' ';
2808 		struct timespec timeout = { };
2809 
2810 		/* Attach parent as tracer and stop. */
2811 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2812 		EXPECT_EQ(0, raise(SIGSTOP));
2813 
2814 		EXPECT_EQ(0, close(pipefd[1]));
2815 
2816 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2817 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2818 		}
2819 
2820 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2821 		EXPECT_EQ(0, ret) {
2822 			TH_LOG("Failed to install filter!");
2823 		}
2824 
2825 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2826 			TH_LOG("Failed to read() sync from parent");
2827 		}
2828 		EXPECT_EQ('.', buf) {
2829 			TH_LOG("Failed to get sync data from read()");
2830 		}
2831 
2832 		/* Start nanosleep to be interrupted. */
2833 		timeout.tv_sec = 1;
2834 		errno = 0;
2835 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2836 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2837 		}
2838 
2839 		/* Read final sync from parent. */
2840 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2841 			TH_LOG("Failed final read() from parent");
2842 		}
2843 		EXPECT_EQ('!', buf) {
2844 			TH_LOG("Failed to get final data from read()");
2845 		}
2846 
2847 		/* Directly report the status of our test harness results. */
2848 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2849 						     : EXIT_FAILURE);
2850 	}
2851 	EXPECT_EQ(0, close(pipefd[0]));
2852 
2853 	/* Attach to child, setup options, and release. */
2854 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2855 	ASSERT_EQ(true, WIFSTOPPED(status));
2856 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2857 			    PTRACE_O_TRACESECCOMP));
2858 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2859 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2860 
2861 	/* Wait for nanosleep() to start. */
2862 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2863 	ASSERT_EQ(true, WIFSTOPPED(status));
2864 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2865 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2866 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2867 	ASSERT_EQ(0x100, msg);
2868 	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2869 
2870 	/* Might as well check siginfo for sanity while we're here. */
2871 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2872 	ASSERT_EQ(SIGTRAP, info.si_signo);
2873 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2874 	EXPECT_EQ(0, info.si_errno);
2875 	EXPECT_EQ(getuid(), info.si_uid);
2876 	/* Verify signal delivery came from child (seccomp-triggered). */
2877 	EXPECT_EQ(child_pid, info.si_pid);
2878 
2879 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2880 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2881 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2882 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2883 	ASSERT_EQ(true, WIFSTOPPED(status));
2884 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2885 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2886 	/*
2887 	 * There is no siginfo on SIGSTOP any more, so we can't verify
2888 	 * signal delivery came from parent now (getpid() == info.si_pid).
2889 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2890 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2891 	 */
2892 	EXPECT_EQ(SIGSTOP, info.si_signo);
2893 
2894 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2895 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2896 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2897 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2898 	ASSERT_EQ(true, WIFSTOPPED(status));
2899 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2900 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2901 
2902 	/* Wait for restart_syscall() to start. */
2903 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2904 	ASSERT_EQ(true, WIFSTOPPED(status));
2905 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2906 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2907 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2908 
2909 	ASSERT_EQ(0x200, msg);
2910 	ret = get_syscall(_metadata, child_pid);
2911 #if defined(__arm__)
2912 	/*
2913 	 * - native ARM registers do NOT expose true syscall.
2914 	 * - compat ARM registers on ARM64 DO expose true syscall.
2915 	 */
2916 	ASSERT_EQ(0, uname(&utsbuf));
2917 	if (sscanf(utsbuf.machine, "armv%d", &arm_version) == 1 &&
2918 	    arm_version < 8) {
2919 		EXPECT_EQ(__NR_nanosleep, ret);
2920 	} else
2921 #endif
2922 	{
2923 		EXPECT_EQ(__NR_restart_syscall, ret);
2924 	}
2925 
2926 	/* Write again to end test. */
2927 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2928 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2929 	EXPECT_EQ(0, close(pipefd[1]));
2930 
2931 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2932 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2933 		_metadata->passed = 0;
2934 }
2935 
2936 /* b/147676645 */
2937 #if 0
2938 TEST_SIGNAL(filter_flag_log, SIGSYS)
2939 {
2940 	struct sock_filter allow_filter[] = {
2941 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2942 	};
2943 	struct sock_filter kill_filter[] = {
2944 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2945 			offsetof(struct seccomp_data, nr)),
2946 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2947 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2948 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2949 	};
2950 	struct sock_fprog allow_prog = {
2951 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2952 		.filter = allow_filter,
2953 	};
2954 	struct sock_fprog kill_prog = {
2955 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
2956 		.filter = kill_filter,
2957 	};
2958 	long ret;
2959 	pid_t parent = getppid();
2960 
2961 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2962 	ASSERT_EQ(0, ret);
2963 
2964 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2965 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2966 		      &allow_prog);
2967 	ASSERT_NE(ENOSYS, errno) {
2968 		TH_LOG("Kernel does not support seccomp syscall!");
2969 	}
2970 	EXPECT_NE(0, ret) {
2971 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2972 	}
2973 	EXPECT_EQ(EINVAL, errno) {
2974 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2975 	}
2976 
2977 	/* Verify that a simple, permissive filter can be added with no flags */
2978 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2979 	EXPECT_EQ(0, ret);
2980 
2981 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2982 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2983 		      &allow_prog);
2984 	ASSERT_NE(EINVAL, errno) {
2985 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2986 	}
2987 	EXPECT_EQ(0, ret);
2988 
2989 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2990 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2991 		      &kill_prog);
2992 	EXPECT_EQ(0, ret);
2993 
2994 	EXPECT_EQ(parent, syscall(__NR_getppid));
2995 	/* getpid() should never return. */
2996 	EXPECT_EQ(0, syscall(__NR_getpid));
2997 }
2998 
2999 TEST(get_action_avail)
3000 {
3001 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
3002 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
3003 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
3004 	__u32 unknown_action = 0x10000000U;
3005 	int i;
3006 	long ret;
3007 
3008 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
3009 	ASSERT_NE(ENOSYS, errno) {
3010 		TH_LOG("Kernel does not support seccomp syscall!");
3011 	}
3012 	ASSERT_NE(EINVAL, errno) {
3013 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
3014 	}
3015 	EXPECT_EQ(ret, 0);
3016 
3017 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
3018 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
3019 		EXPECT_EQ(ret, 0) {
3020 			TH_LOG("Expected action (0x%X) not available!",
3021 			       actions[i]);
3022 		}
3023 	}
3024 
3025 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
3026 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
3027 	EXPECT_EQ(ret, -1);
3028 	EXPECT_EQ(errno, EOPNOTSUPP);
3029 }
3030 
3031 TEST(get_metadata)
3032 {
3033 	pid_t pid;
3034 	int pipefd[2];
3035 	char buf;
3036 	struct seccomp_metadata md;
3037 	long ret;
3038 
3039 	/* Only real root can get metadata. */
3040 	if (geteuid()) {
3041 		XFAIL(return, "get_metadata requires real root");
3042 		return;
3043 	}
3044 
3045 	ASSERT_EQ(0, pipe(pipefd));
3046 
3047 	pid = fork();
3048 	ASSERT_GE(pid, 0);
3049 	if (pid == 0) {
3050 		struct sock_filter filter[] = {
3051 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3052 		};
3053 		struct sock_fprog prog = {
3054 			.len = (unsigned short)ARRAY_SIZE(filter),
3055 			.filter = filter,
3056 		};
3057 
3058 		/* one with log, one without */
3059 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3060 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3061 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3062 
3063 		EXPECT_EQ(0, close(pipefd[0]));
3064 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3065 		ASSERT_EQ(0, close(pipefd[1]));
3066 
3067 		while (1)
3068 			sleep(100);
3069 	}
3070 
3071 	ASSERT_EQ(0, close(pipefd[1]));
3072 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3073 
3074 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3075 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3076 
3077 	/* Past here must not use ASSERT or child process is never killed. */
3078 
3079 	md.filter_off = 0;
3080 	errno = 0;
3081 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3082 	EXPECT_EQ(sizeof(md), ret) {
3083 		if (errno == EINVAL)
3084 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3085 	}
3086 
3087 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3088 	EXPECT_EQ(md.filter_off, 0);
3089 
3090 	md.filter_off = 1;
3091 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3092 	EXPECT_EQ(sizeof(md), ret);
3093 	EXPECT_EQ(md.flags, 0);
3094 	EXPECT_EQ(md.filter_off, 1);
3095 
3096 skip:
3097 	ASSERT_EQ(0, kill(pid, SIGKILL));
3098 }
3099 #endif
3100 
3101 /* b/147676645 */
3102 #if 0
3103 static int user_trap_syscall(int nr, unsigned int flags)
3104 {
3105 	struct sock_filter filter[] = {
3106 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3107 			offsetof(struct seccomp_data, nr)),
3108 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3109 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3110 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3111 	};
3112 
3113 	struct sock_fprog prog = {
3114 		.len = (unsigned short)ARRAY_SIZE(filter),
3115 		.filter = filter,
3116 	};
3117 
3118 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3119 }
3120 
3121 #define USER_NOTIF_MAGIC INT_MAX
3122 TEST(user_notification_basic)
3123 {
3124 	pid_t pid;
3125 	long ret;
3126 	int status, listener;
3127 	struct seccomp_notif req = {};
3128 	struct seccomp_notif_resp resp = {};
3129 	struct pollfd pollfd;
3130 
3131 	struct sock_filter filter[] = {
3132 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3133 	};
3134 	struct sock_fprog prog = {
3135 		.len = (unsigned short)ARRAY_SIZE(filter),
3136 		.filter = filter,
3137 	};
3138 
3139 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3140 	ASSERT_EQ(0, ret) {
3141 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3142 	}
3143 
3144 	pid = fork();
3145 	ASSERT_GE(pid, 0);
3146 
3147 	/* Check that we get -ENOSYS with no listener attached */
3148 	if (pid == 0) {
3149 		if (user_trap_syscall(__NR_getppid, 0) < 0)
3150 			exit(1);
3151 		ret = syscall(__NR_getppid);
3152 		exit(ret >= 0 || errno != ENOSYS);
3153 	}
3154 
3155 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3156 	EXPECT_EQ(true, WIFEXITED(status));
3157 	EXPECT_EQ(0, WEXITSTATUS(status));
3158 
3159 	/* Add some no-op filters for grins. */
3160 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3161 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3162 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3163 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3164 
3165 	/* Check that the basic notification machinery works */
3166 	listener = user_trap_syscall(__NR_getppid,
3167 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3168 	ASSERT_GE(listener, 0);
3169 
3170 	/* Installing a second listener in the chain should EBUSY */
3171 	EXPECT_EQ(user_trap_syscall(__NR_getppid,
3172 				    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3173 		  -1);
3174 	EXPECT_EQ(errno, EBUSY);
3175 
3176 	pid = fork();
3177 	ASSERT_GE(pid, 0);
3178 
3179 	if (pid == 0) {
3180 		ret = syscall(__NR_getppid);
3181 		exit(ret != USER_NOTIF_MAGIC);
3182 	}
3183 
3184 	pollfd.fd = listener;
3185 	pollfd.events = POLLIN | POLLOUT;
3186 
3187 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3188 	EXPECT_EQ(pollfd.revents, POLLIN);
3189 
3190 	/* Test that we can't pass garbage to the kernel. */
3191 	memset(&req, 0, sizeof(req));
3192 	req.pid = -1;
3193 	errno = 0;
3194 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3195 	EXPECT_EQ(-1, ret);
3196 	EXPECT_EQ(EINVAL, errno);
3197 
3198 	if (ret) {
3199 		req.pid = 0;
3200 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3201 	}
3202 
3203 	pollfd.fd = listener;
3204 	pollfd.events = POLLIN | POLLOUT;
3205 
3206 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3207 	EXPECT_EQ(pollfd.revents, POLLOUT);
3208 
3209 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3210 
3211 	resp.id = req.id;
3212 	resp.error = 0;
3213 	resp.val = USER_NOTIF_MAGIC;
3214 
3215 	/* check that we make sure flags == 0 */
3216 	resp.flags = 1;
3217 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3218 	EXPECT_EQ(errno, EINVAL);
3219 
3220 	resp.flags = 0;
3221 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3222 
3223 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3224 	EXPECT_EQ(true, WIFEXITED(status));
3225 	EXPECT_EQ(0, WEXITSTATUS(status));
3226 }
3227 
3228 TEST(user_notification_kill_in_middle)
3229 {
3230 	pid_t pid;
3231 	long ret;
3232 	int listener;
3233 	struct seccomp_notif req = {};
3234 	struct seccomp_notif_resp resp = {};
3235 
3236 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3237 	ASSERT_EQ(0, ret) {
3238 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3239 	}
3240 
3241 	listener = user_trap_syscall(__NR_getppid,
3242 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3243 	ASSERT_GE(listener, 0);
3244 
3245 	/*
3246 	 * Check that nothing bad happens when we kill the task in the middle
3247 	 * of a syscall.
3248 	 */
3249 	pid = fork();
3250 	ASSERT_GE(pid, 0);
3251 
3252 	if (pid == 0) {
3253 		ret = syscall(__NR_getppid);
3254 		exit(ret != USER_NOTIF_MAGIC);
3255 	}
3256 
3257 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3258 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3259 
3260 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3261 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3262 
3263 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3264 
3265 	resp.id = req.id;
3266 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3267 	EXPECT_EQ(ret, -1);
3268 	EXPECT_EQ(errno, ENOENT);
3269 }
3270 
3271 static int handled = -1;
3272 
3273 static void signal_handler(int signal)
3274 {
3275 	if (write(handled, "c", 1) != 1)
3276 		perror("write from signal");
3277 }
3278 
3279 TEST(user_notification_signal)
3280 {
3281 	pid_t pid;
3282 	long ret;
3283 	int status, listener, sk_pair[2];
3284 	struct seccomp_notif req = {};
3285 	struct seccomp_notif_resp resp = {};
3286 	char c;
3287 
3288 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3289 	ASSERT_EQ(0, ret) {
3290 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3291 	}
3292 
3293 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3294 
3295 	listener = user_trap_syscall(__NR_gettid,
3296 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3297 	ASSERT_GE(listener, 0);
3298 
3299 	pid = fork();
3300 	ASSERT_GE(pid, 0);
3301 
3302 	if (pid == 0) {
3303 		close(sk_pair[0]);
3304 		handled = sk_pair[1];
3305 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3306 			perror("signal");
3307 			exit(1);
3308 		}
3309 		/*
3310 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3311 		 * to rely on a signal that has not yet been handled. Let's at
3312 		 * least check that the error code gets propagated through, and
3313 		 * hope that it doesn't break when there is actually a signal :)
3314 		 */
3315 		ret = syscall(__NR_gettid);
3316 		exit(!(ret == -1 && errno == 512));
3317 	}
3318 
3319 	close(sk_pair[1]);
3320 
3321 	memset(&req, 0, sizeof(req));
3322 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3323 
3324 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3325 
3326 	/*
3327 	 * Make sure the signal really is delivered, which means we're not
3328 	 * stuck in the user notification code any more and the notification
3329 	 * should be dead.
3330 	 */
3331 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3332 
3333 	resp.id = req.id;
3334 	resp.error = -EPERM;
3335 	resp.val = 0;
3336 
3337 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3338 	EXPECT_EQ(errno, ENOENT);
3339 
3340 	memset(&req, 0, sizeof(req));
3341 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3342 
3343 	resp.id = req.id;
3344 	resp.error = -512; /* -ERESTARTSYS */
3345 	resp.val = 0;
3346 
3347 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3348 
3349 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3350 	EXPECT_EQ(true, WIFEXITED(status));
3351 	EXPECT_EQ(0, WEXITSTATUS(status));
3352 }
3353 
3354 TEST(user_notification_closed_listener)
3355 {
3356 	pid_t pid;
3357 	long ret;
3358 	int status, listener;
3359 
3360 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3361 	ASSERT_EQ(0, ret) {
3362 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3363 	}
3364 
3365 	listener = user_trap_syscall(__NR_getppid,
3366 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3367 	ASSERT_GE(listener, 0);
3368 
3369 	/*
3370 	 * Check that we get an ENOSYS when the listener is closed.
3371 	 */
3372 	pid = fork();
3373 	ASSERT_GE(pid, 0);
3374 	if (pid == 0) {
3375 		close(listener);
3376 		ret = syscall(__NR_getppid);
3377 		exit(ret != -1 && errno != ENOSYS);
3378 	}
3379 
3380 	close(listener);
3381 
3382 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3383 	EXPECT_EQ(true, WIFEXITED(status));
3384 	EXPECT_EQ(0, WEXITSTATUS(status));
3385 }
3386 
3387 /*
3388  * Check that a pid in a child namespace still shows up as valid in ours.
3389  */
3390 TEST(user_notification_child_pid_ns)
3391 {
3392 	pid_t pid;
3393 	int status, listener;
3394 	struct seccomp_notif req = {};
3395 	struct seccomp_notif_resp resp = {};
3396 
3397 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3398 
3399 	listener = user_trap_syscall(__NR_getppid,
3400 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3401 	ASSERT_GE(listener, 0);
3402 
3403 	pid = fork();
3404 	ASSERT_GE(pid, 0);
3405 
3406 	if (pid == 0)
3407 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3408 
3409 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3410 	EXPECT_EQ(req.pid, pid);
3411 
3412 	resp.id = req.id;
3413 	resp.error = 0;
3414 	resp.val = USER_NOTIF_MAGIC;
3415 
3416 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3417 
3418 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3419 	EXPECT_EQ(true, WIFEXITED(status));
3420 	EXPECT_EQ(0, WEXITSTATUS(status));
3421 	close(listener);
3422 }
3423 
3424 /*
3425  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3426  * invalid.
3427  */
3428 TEST(user_notification_sibling_pid_ns)
3429 {
3430 	pid_t pid, pid2;
3431 	int status, listener;
3432 	struct seccomp_notif req = {};
3433 	struct seccomp_notif_resp resp = {};
3434 
3435 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3436 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3437 	}
3438 
3439 	listener = user_trap_syscall(__NR_getppid,
3440 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3441 	ASSERT_GE(listener, 0);
3442 
3443 	pid = fork();
3444 	ASSERT_GE(pid, 0);
3445 
3446 	if (pid == 0) {
3447 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3448 
3449 		pid2 = fork();
3450 		ASSERT_GE(pid2, 0);
3451 
3452 		if (pid2 == 0)
3453 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3454 
3455 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3456 		EXPECT_EQ(true, WIFEXITED(status));
3457 		EXPECT_EQ(0, WEXITSTATUS(status));
3458 		exit(WEXITSTATUS(status));
3459 	}
3460 
3461 	/* Create the sibling ns, and sibling in it. */
3462 	ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3463 	ASSERT_EQ(errno, 0);
3464 
3465 	pid2 = fork();
3466 	ASSERT_GE(pid2, 0);
3467 
3468 	if (pid2 == 0) {
3469 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3470 		/*
3471 		 * The pid should be 0, i.e. the task is in some namespace that
3472 		 * we can't "see".
3473 		 */
3474 		EXPECT_EQ(req.pid, 0);
3475 
3476 		resp.id = req.id;
3477 		resp.error = 0;
3478 		resp.val = USER_NOTIF_MAGIC;
3479 
3480 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3481 		exit(0);
3482 	}
3483 
3484 	close(listener);
3485 
3486 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3487 	EXPECT_EQ(true, WIFEXITED(status));
3488 	EXPECT_EQ(0, WEXITSTATUS(status));
3489 
3490 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3491 	EXPECT_EQ(true, WIFEXITED(status));
3492 	EXPECT_EQ(0, WEXITSTATUS(status));
3493 }
3494 
3495 TEST(user_notification_fault_recv)
3496 {
3497 	pid_t pid;
3498 	int status, listener;
3499 	struct seccomp_notif req = {};
3500 	struct seccomp_notif_resp resp = {};
3501 
3502 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3503 
3504 	listener = user_trap_syscall(__NR_getppid,
3505 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3506 	ASSERT_GE(listener, 0);
3507 
3508 	pid = fork();
3509 	ASSERT_GE(pid, 0);
3510 
3511 	if (pid == 0)
3512 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3513 
3514 	/* Do a bad recv() */
3515 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3516 	EXPECT_EQ(errno, EFAULT);
3517 
3518 	/* We should still be able to receive this notification, though. */
3519 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3520 	EXPECT_EQ(req.pid, pid);
3521 
3522 	resp.id = req.id;
3523 	resp.error = 0;
3524 	resp.val = USER_NOTIF_MAGIC;
3525 
3526 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3527 
3528 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3529 	EXPECT_EQ(true, WIFEXITED(status));
3530 	EXPECT_EQ(0, WEXITSTATUS(status));
3531 }
3532 
3533 TEST(seccomp_get_notif_sizes)
3534 {
3535 	struct seccomp_notif_sizes sizes;
3536 
3537 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3538 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3539 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3540 }
3541 
3542 static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
3543 {
3544 #ifdef __NR_kcmp
3545 	return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
3546 #else
3547 	errno = ENOSYS;
3548 	return -1;
3549 #endif
3550 }
3551 
3552 TEST(user_notification_continue)
3553 {
3554 	pid_t pid;
3555 	long ret;
3556 	int status, listener;
3557 	struct seccomp_notif req = {};
3558 	struct seccomp_notif_resp resp = {};
3559 	struct pollfd pollfd;
3560 
3561 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3562 	ASSERT_EQ(0, ret) {
3563 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3564 	}
3565 
3566 	listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
3567 	ASSERT_GE(listener, 0);
3568 
3569 	pid = fork();
3570 	ASSERT_GE(pid, 0);
3571 
3572 	if (pid == 0) {
3573 		int dup_fd, pipe_fds[2];
3574 		pid_t self;
3575 
3576 		ret = pipe(pipe_fds);
3577 		if (ret < 0)
3578 			exit(1);
3579 
3580 		dup_fd = dup(pipe_fds[0]);
3581 		if (dup_fd < 0)
3582 			exit(1);
3583 
3584 		self = getpid();
3585 
3586 		ret = filecmp(self, self, pipe_fds[0], dup_fd);
3587 		if (ret)
3588 			exit(2);
3589 
3590 		exit(0);
3591 	}
3592 
3593 	pollfd.fd = listener;
3594 	pollfd.events = POLLIN | POLLOUT;
3595 
3596 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3597 	EXPECT_EQ(pollfd.revents, POLLIN);
3598 
3599 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3600 
3601 	pollfd.fd = listener;
3602 	pollfd.events = POLLIN | POLLOUT;
3603 
3604 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3605 	EXPECT_EQ(pollfd.revents, POLLOUT);
3606 
3607 	EXPECT_EQ(req.data.nr, __NR_dup);
3608 
3609 	resp.id = req.id;
3610 	resp.flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
3611 
3612 	/*
3613 	 * Verify that setting SECCOMP_USER_NOTIF_FLAG_CONTINUE enforces other
3614 	 * args be set to 0.
3615 	 */
3616 	resp.error = 0;
3617 	resp.val = USER_NOTIF_MAGIC;
3618 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3619 	EXPECT_EQ(errno, EINVAL);
3620 
3621 	resp.error = USER_NOTIF_MAGIC;
3622 	resp.val = 0;
3623 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3624 	EXPECT_EQ(errno, EINVAL);
3625 
3626 	resp.error = 0;
3627 	resp.val = 0;
3628 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
3629 		if (errno == EINVAL)
3630 			XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
3631 	}
3632 
3633 skip:
3634 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3635 	EXPECT_EQ(true, WIFEXITED(status));
3636 	EXPECT_EQ(0, WEXITSTATUS(status)) {
3637 		if (WEXITSTATUS(status) == 2) {
3638 			XFAIL(return, "Kernel does not support kcmp() syscall");
3639 			return;
3640 		}
3641 	}
3642 }
3643 #endif
3644 
3645 /*
3646  * TODO:
3647  * - add microbenchmarks
3648  * - expand NNP testing
3649  * - better arch-specific TRACE and TRAP handlers.
3650  * - endianness checking when appropriate
3651  * - 64-bit arg prodding
3652  * - arch value testing (x86 modes especially)
3653  * - verify that FILTER_FLAG_LOG filters generate log messages
3654  * - verify that RET_LOG generates log messages
3655  * - ...
3656  */
3657 
3658 TEST_HARNESS_MAIN
3659