• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #define _GNU_SOURCE
9 #include <sys/types.h>
10 
11 /*
12  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
13  * we need to use the kernel's siginfo.h file and trick glibc
14  * into accepting it.
15  */
16 #if !__GLIBC_PREREQ(2, 26)
17 # include <asm/siginfo.h>
18 # define __have_siginfo_t 1
19 # define __have_sigval_t 1
20 # define __have_sigevent_t 1
21 #endif
22 
23 #include <errno.h>
24 #include <linux/filter.h>
25 #include <sys/prctl.h>
26 #include <sys/ptrace.h>
27 #include <sys/user.h>
28 #include <linux/prctl.h>
29 #include <linux/ptrace.h>
30 #include <linux/seccomp.h>
31 #include <pthread.h>
32 #include <semaphore.h>
33 #include <signal.h>
34 #include <stddef.h>
35 #include <stdbool.h>
36 #include <string.h>
37 #include <time.h>
38 #include <limits.h>
39 #include <linux/elf.h>
40 #include <sys/uio.h>
41 #include <sys/utsname.h>
42 #include <sys/fcntl.h>
43 #include <sys/mman.h>
44 #include <sys/times.h>
45 #include <sys/socket.h>
46 #include <sys/ioctl.h>
47 
48 #include <unistd.h>
49 #include <sys/syscall.h>
50 #include <poll.h>
51 
52 #include "../kselftest_harness.h"
53 
54 #ifndef PR_SET_PTRACER
55 # define PR_SET_PTRACER 0x59616d61
56 #endif
57 
58 #ifndef PR_SET_NO_NEW_PRIVS
59 #define PR_SET_NO_NEW_PRIVS 38
60 #define PR_GET_NO_NEW_PRIVS 39
61 #endif
62 
63 #ifndef PR_SECCOMP_EXT
64 #define PR_SECCOMP_EXT 43
65 #endif
66 
67 #ifndef SECCOMP_EXT_ACT
68 #define SECCOMP_EXT_ACT 1
69 #endif
70 
71 #ifndef SECCOMP_EXT_ACT_TSYNC
72 #define SECCOMP_EXT_ACT_TSYNC 1
73 #endif
74 
75 #ifndef SECCOMP_MODE_STRICT
76 #define SECCOMP_MODE_STRICT 1
77 #endif
78 
79 #ifndef SECCOMP_MODE_FILTER
80 #define SECCOMP_MODE_FILTER 2
81 #endif
82 
83 #ifndef SECCOMP_RET_ALLOW
84 struct seccomp_data {
85 	int nr;
86 	__u32 arch;
87 	__u64 instruction_pointer;
88 	__u64 args[6];
89 };
90 #endif
91 
92 #ifndef SECCOMP_RET_KILL_PROCESS
93 #define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
94 #define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
95 #endif
96 #ifndef SECCOMP_RET_KILL
97 #define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
98 #define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
99 #define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
100 #define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
101 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
102 #endif
103 #ifndef SECCOMP_RET_LOG
104 #define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
105 #endif
106 
107 #ifndef __NR_seccomp
108 # if defined(__i386__)
109 #  define __NR_seccomp 354
110 # elif defined(__x86_64__)
111 #  define __NR_seccomp 317
112 # elif defined(__arm__)
113 #  define __NR_seccomp 383
114 # elif defined(__aarch64__)
115 #  define __NR_seccomp 277
116 # elif defined(__hppa__)
117 #  define __NR_seccomp 338
118 # elif defined(__powerpc__)
119 #  define __NR_seccomp 358
120 # elif defined(__s390__)
121 #  define __NR_seccomp 348
122 # else
123 #  warning "seccomp syscall number unknown for this architecture"
124 #  define __NR_seccomp 0xffff
125 # endif
126 #endif
127 
128 #ifndef SECCOMP_SET_MODE_STRICT
129 #define SECCOMP_SET_MODE_STRICT 0
130 #endif
131 
132 #ifndef SECCOMP_SET_MODE_FILTER
133 #define SECCOMP_SET_MODE_FILTER 1
134 #endif
135 
136 #ifndef SECCOMP_GET_ACTION_AVAIL
137 #define SECCOMP_GET_ACTION_AVAIL 2
138 #endif
139 
140 #ifndef SECCOMP_GET_NOTIF_SIZES
141 #define SECCOMP_GET_NOTIF_SIZES 3
142 #endif
143 
144 #ifndef SECCOMP_FILTER_FLAG_TSYNC
145 #define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
146 #endif
147 
148 #ifndef SECCOMP_FILTER_FLAG_LOG
149 #define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
150 #endif
151 
152 #ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
153 #define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
154 #endif
155 
156 #ifndef PTRACE_SECCOMP_GET_METADATA
157 #define PTRACE_SECCOMP_GET_METADATA	0x420d
158 
159 struct seccomp_metadata {
160 	__u64 filter_off;       /* Input: which filter */
161 	__u64 flags;             /* Output: filter's flags */
162 };
163 #endif
164 
165 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
166 #define SECCOMP_FILTER_FLAG_NEW_LISTENER	(1UL << 3)
167 
168 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U
169 
170 #define SECCOMP_IOC_MAGIC		'!'
171 #define SECCOMP_IO(nr)			_IO(SECCOMP_IOC_MAGIC, nr)
172 #define SECCOMP_IOR(nr, type)		_IOR(SECCOMP_IOC_MAGIC, nr, type)
173 #define SECCOMP_IOW(nr, type)		_IOW(SECCOMP_IOC_MAGIC, nr, type)
174 #define SECCOMP_IOWR(nr, type)		_IOWR(SECCOMP_IOC_MAGIC, nr, type)
175 
176 /* Flags for seccomp notification fd ioctl. */
177 #define SECCOMP_IOCTL_NOTIF_RECV	SECCOMP_IOWR(0, struct seccomp_notif)
178 #define SECCOMP_IOCTL_NOTIF_SEND	SECCOMP_IOWR(1,	\
179 						struct seccomp_notif_resp)
180 #define SECCOMP_IOCTL_NOTIF_ID_VALID	SECCOMP_IOR(2, __u64)
181 
182 struct seccomp_notif {
183 	__u64 id;
184 	__u32 pid;
185 	__u32 flags;
186 	struct seccomp_data data;
187 };
188 
189 struct seccomp_notif_resp {
190 	__u64 id;
191 	__s64 val;
192 	__s32 error;
193 	__u32 flags;
194 };
195 
196 struct seccomp_notif_sizes {
197 	__u16 seccomp_notif;
198 	__u16 seccomp_notif_resp;
199 	__u16 seccomp_data;
200 };
201 #endif
202 
203 #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
204 #define PTRACE_EVENTMSG_SYSCALL_ENTRY	1
205 #define PTRACE_EVENTMSG_SYSCALL_EXIT	2
206 #endif
207 
208 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)209 int seccomp(unsigned int op, unsigned int flags, void *args)
210 {
211 	errno = 0;
212 	return syscall(__NR_seccomp, op, flags, args);
213 }
214 #endif
215 
216 #if __BYTE_ORDER == __LITTLE_ENDIAN
217 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
218 #elif __BYTE_ORDER == __BIG_ENDIAN
219 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
220 #else
221 #error "wut? Unknown __BYTE_ORDER?!"
222 #endif
223 
224 #define SIBLING_EXIT_UNKILLED	0xbadbeef
225 #define SIBLING_EXIT_FAILURE	0xbadface
226 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
227 
TEST(mode_strict_support)228 TEST(mode_strict_support)
229 {
230 	long ret;
231 
232 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
233 	ASSERT_EQ(0, ret) {
234 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
235 	}
236 	syscall(__NR_exit, 0);
237 }
238 
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)239 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
240 {
241 	long ret;
242 
243 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
244 	ASSERT_EQ(0, ret) {
245 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
246 	}
247 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
248 		NULL, NULL, NULL);
249 	EXPECT_FALSE(true) {
250 		TH_LOG("Unreachable!");
251 	}
252 }
253 
254 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)255 TEST(no_new_privs_support)
256 {
257 	long ret;
258 
259 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
260 	EXPECT_EQ(0, ret) {
261 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
262 	}
263 }
264 
265 /* Tests kernel support by checking for a copy_from_user() fault on NULL. */
TEST(mode_filter_support)266 TEST(mode_filter_support)
267 {
268 	long ret;
269 
270 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
271 	ASSERT_EQ(0, ret) {
272 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
273 	}
274 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
275 	EXPECT_EQ(-1, ret);
276 	EXPECT_EQ(EFAULT, errno) {
277 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
278 	}
279 }
280 
TEST(mode_filter_without_nnp)281 TEST(mode_filter_without_nnp)
282 {
283 	struct sock_filter filter[] = {
284 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
285 	};
286 	struct sock_fprog prog = {
287 		.len = (unsigned short)ARRAY_SIZE(filter),
288 		.filter = filter,
289 	};
290 	long ret;
291 
292 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
293 	ASSERT_LE(0, ret) {
294 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
295 	}
296 	errno = 0;
297 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
298 	/* Succeeds with CAP_SYS_ADMIN, fails without */
299 	/* TODO(wad) check caps not euid */
300 	if (geteuid()) {
301 		EXPECT_EQ(-1, ret);
302 		EXPECT_EQ(EACCES, errno);
303 	} else {
304 		EXPECT_EQ(0, ret);
305 	}
306 }
307 
308 #define MAX_INSNS_PER_PATH 32768
309 
TEST(filter_size_limits)310 TEST(filter_size_limits)
311 {
312 	int i;
313 	int count = BPF_MAXINSNS + 1;
314 	struct sock_filter allow[] = {
315 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
316 	};
317 	struct sock_filter *filter;
318 	struct sock_fprog prog = { };
319 	long ret;
320 
321 	filter = calloc(count, sizeof(*filter));
322 	ASSERT_NE(NULL, filter);
323 
324 	for (i = 0; i < count; i++)
325 		filter[i] = allow[0];
326 
327 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
328 	ASSERT_EQ(0, ret);
329 
330 	prog.filter = filter;
331 	prog.len = count;
332 
333 	/* Too many filter instructions in a single filter. */
334 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
335 	ASSERT_NE(0, ret) {
336 		TH_LOG("Installing %d insn filter was allowed", prog.len);
337 	}
338 
339 	/* One less is okay, though. */
340 	prog.len -= 1;
341 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
342 	ASSERT_EQ(0, ret) {
343 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
344 	}
345 }
346 
TEST(filter_chain_limits)347 TEST(filter_chain_limits)
348 {
349 	int i;
350 	int count = BPF_MAXINSNS;
351 	struct sock_filter allow[] = {
352 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
353 	};
354 	struct sock_filter *filter;
355 	struct sock_fprog prog = { };
356 	long ret;
357 
358 	filter = calloc(count, sizeof(*filter));
359 	ASSERT_NE(NULL, filter);
360 
361 	for (i = 0; i < count; i++)
362 		filter[i] = allow[0];
363 
364 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
365 	ASSERT_EQ(0, ret);
366 
367 	prog.filter = filter;
368 	prog.len = 1;
369 
370 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
371 	ASSERT_EQ(0, ret);
372 
373 	prog.len = count;
374 
375 	/* Too many total filter instructions. */
376 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
377 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
378 		if (ret != 0)
379 			break;
380 	}
381 	ASSERT_NE(0, ret) {
382 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
383 		       i, count, i * (count + 4));
384 	}
385 }
386 
TEST(mode_filter_cannot_move_to_strict)387 TEST(mode_filter_cannot_move_to_strict)
388 {
389 	struct sock_filter filter[] = {
390 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
391 	};
392 	struct sock_fprog prog = {
393 		.len = (unsigned short)ARRAY_SIZE(filter),
394 		.filter = filter,
395 	};
396 	long ret;
397 
398 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
399 	ASSERT_EQ(0, ret);
400 
401 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
402 	ASSERT_EQ(0, ret);
403 
404 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
405 	EXPECT_EQ(-1, ret);
406 	EXPECT_EQ(EINVAL, errno);
407 }
408 
409 
TEST(mode_filter_get_seccomp)410 TEST(mode_filter_get_seccomp)
411 {
412 	struct sock_filter filter[] = {
413 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
414 	};
415 	struct sock_fprog prog = {
416 		.len = (unsigned short)ARRAY_SIZE(filter),
417 		.filter = filter,
418 	};
419 	long ret;
420 
421 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
422 	ASSERT_EQ(0, ret);
423 
424 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
425 	EXPECT_EQ(0, ret);
426 
427 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
428 	ASSERT_EQ(0, ret);
429 
430 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
431 	EXPECT_EQ(2, ret);
432 }
433 
434 
TEST(ALLOW_all)435 TEST(ALLOW_all)
436 {
437 	struct sock_filter filter[] = {
438 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
439 	};
440 	struct sock_fprog prog = {
441 		.len = (unsigned short)ARRAY_SIZE(filter),
442 		.filter = filter,
443 	};
444 	long ret;
445 
446 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
447 	ASSERT_EQ(0, ret);
448 
449 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
450 	ASSERT_EQ(0, ret);
451 }
452 
TEST(empty_prog)453 TEST(empty_prog)
454 {
455 	struct sock_filter filter[] = {
456 	};
457 	struct sock_fprog prog = {
458 		.len = (unsigned short)ARRAY_SIZE(filter),
459 		.filter = filter,
460 	};
461 	long ret;
462 
463 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
464 	ASSERT_EQ(0, ret);
465 
466 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
467 	EXPECT_EQ(-1, ret);
468 	EXPECT_EQ(EINVAL, errno);
469 }
470 
TEST(log_all)471 TEST(log_all)
472 {
473 	struct sock_filter filter[] = {
474 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
475 	};
476 	struct sock_fprog prog = {
477 		.len = (unsigned short)ARRAY_SIZE(filter),
478 		.filter = filter,
479 	};
480 	long ret;
481 	pid_t parent = getppid();
482 
483 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
484 	ASSERT_EQ(0, ret);
485 
486 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
487 	ASSERT_EQ(0, ret);
488 
489 	/* getppid() should succeed and be logged (no check for logging) */
490 	EXPECT_EQ(parent, syscall(__NR_getppid));
491 }
492 
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)493 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
494 {
495 	struct sock_filter filter[] = {
496 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
497 	};
498 	struct sock_fprog prog = {
499 		.len = (unsigned short)ARRAY_SIZE(filter),
500 		.filter = filter,
501 	};
502 	long ret;
503 
504 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
505 	ASSERT_EQ(0, ret);
506 
507 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
508 	ASSERT_EQ(0, ret);
509 	EXPECT_EQ(0, syscall(__NR_getpid)) {
510 		TH_LOG("getpid() shouldn't ever return");
511 	}
512 }
513 
514 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)515 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
516 {
517 	struct sock_filter filter[] = {
518 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
519 	};
520 	struct sock_fprog prog = {
521 		.len = (unsigned short)ARRAY_SIZE(filter),
522 		.filter = filter,
523 	};
524 	long ret;
525 
526 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
527 	ASSERT_EQ(0, ret);
528 
529 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
530 	ASSERT_EQ(0, ret);
531 	EXPECT_EQ(0, syscall(__NR_getpid)) {
532 		TH_LOG("getpid() shouldn't ever return");
533 	}
534 }
535 
TEST_SIGNAL(KILL_all,SIGSYS)536 TEST_SIGNAL(KILL_all, SIGSYS)
537 {
538 	struct sock_filter filter[] = {
539 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
540 	};
541 	struct sock_fprog prog = {
542 		.len = (unsigned short)ARRAY_SIZE(filter),
543 		.filter = filter,
544 	};
545 	long ret;
546 
547 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
548 	ASSERT_EQ(0, ret);
549 
550 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
551 	ASSERT_EQ(0, ret);
552 }
553 
TEST_SIGNAL(KILL_one,SIGSYS)554 TEST_SIGNAL(KILL_one, SIGSYS)
555 {
556 	struct sock_filter filter[] = {
557 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
558 			offsetof(struct seccomp_data, nr)),
559 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
560 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
561 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
562 	};
563 	struct sock_fprog prog = {
564 		.len = (unsigned short)ARRAY_SIZE(filter),
565 		.filter = filter,
566 	};
567 	long ret;
568 	pid_t parent = getppid();
569 
570 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
571 	ASSERT_EQ(0, ret);
572 
573 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
574 	ASSERT_EQ(0, ret);
575 
576 	EXPECT_EQ(parent, syscall(__NR_getppid));
577 	/* getpid() should never return. */
578 	EXPECT_EQ(0, syscall(__NR_getpid));
579 }
580 
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)581 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
582 {
583 	void *fatal_address;
584 	struct sock_filter filter[] = {
585 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
586 			offsetof(struct seccomp_data, nr)),
587 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
588 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
589 		/* Only both with lower 32-bit for now. */
590 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
591 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
592 			(unsigned long)&fatal_address, 0, 1),
593 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
594 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
595 	};
596 	struct sock_fprog prog = {
597 		.len = (unsigned short)ARRAY_SIZE(filter),
598 		.filter = filter,
599 	};
600 	long ret;
601 	pid_t parent = getppid();
602 	struct tms timebuf;
603 	clock_t clock = times(&timebuf);
604 
605 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
606 	ASSERT_EQ(0, ret);
607 
608 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
609 	ASSERT_EQ(0, ret);
610 
611 	EXPECT_EQ(parent, syscall(__NR_getppid));
612 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
613 	/* times() should never return. */
614 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
615 }
616 
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)617 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
618 {
619 #ifndef __NR_mmap2
620 	int sysno = __NR_mmap;
621 #else
622 	int sysno = __NR_mmap2;
623 #endif
624 	struct sock_filter filter[] = {
625 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
626 			offsetof(struct seccomp_data, nr)),
627 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
628 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
629 		/* Only both with lower 32-bit for now. */
630 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
631 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
632 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
633 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
634 	};
635 	struct sock_fprog prog = {
636 		.len = (unsigned short)ARRAY_SIZE(filter),
637 		.filter = filter,
638 	};
639 	long ret;
640 	pid_t parent = getppid();
641 	int fd;
642 	void *map1, *map2;
643 	int page_size = sysconf(_SC_PAGESIZE);
644 
645 	ASSERT_LT(0, page_size);
646 
647 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
648 	ASSERT_EQ(0, ret);
649 
650 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
651 	ASSERT_EQ(0, ret);
652 
653 	fd = open("/dev/zero", O_RDONLY);
654 	ASSERT_NE(-1, fd);
655 
656 	EXPECT_EQ(parent, syscall(__NR_getppid));
657 	map1 = (void *)syscall(sysno,
658 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
659 	EXPECT_NE(MAP_FAILED, map1);
660 	/* mmap2() should never return. */
661 	map2 = (void *)syscall(sysno,
662 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
663 	EXPECT_EQ(MAP_FAILED, map2);
664 
665 	/* The test failed, so clean up the resources. */
666 	munmap(map1, page_size);
667 	munmap(map2, page_size);
668 	close(fd);
669 }
670 
671 /* This is a thread task to die via seccomp filter violation. */
kill_thread(void * data)672 void *kill_thread(void *data)
673 {
674 	bool die = (bool)data;
675 
676 	if (die) {
677 		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
678 		return (void *)SIBLING_EXIT_FAILURE;
679 	}
680 
681 	return (void *)SIBLING_EXIT_UNKILLED;
682 }
683 
684 /* Prepare a thread that will kill itself or both of us. */
kill_thread_or_group(struct __test_metadata * _metadata,bool kill_process)685 void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
686 {
687 	pthread_t thread;
688 	void *status;
689 	/* Kill only when calling __NR_prctl. */
690 	struct sock_filter filter_thread[] = {
691 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
692 			offsetof(struct seccomp_data, nr)),
693 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
694 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
695 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
696 	};
697 	struct sock_fprog prog_thread = {
698 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
699 		.filter = filter_thread,
700 	};
701 	struct sock_filter filter_process[] = {
702 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
703 			offsetof(struct seccomp_data, nr)),
704 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
705 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
706 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
707 	};
708 	struct sock_fprog prog_process = {
709 		.len = (unsigned short)ARRAY_SIZE(filter_process),
710 		.filter = filter_process,
711 	};
712 
713 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
714 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
715 	}
716 
717 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
718 			     kill_process ? &prog_process : &prog_thread));
719 
720 	/*
721 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
722 	 * flag cannot be downgraded by a new filter.
723 	 */
724 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
725 
726 	/* Start a thread that will exit immediately. */
727 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
728 	ASSERT_EQ(0, pthread_join(thread, &status));
729 	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
730 
731 	/* Start a thread that will die immediately. */
732 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
733 	ASSERT_EQ(0, pthread_join(thread, &status));
734 	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
735 
736 	/*
737 	 * If we get here, only the spawned thread died. Let the parent know
738 	 * the whole process didn't die (i.e. this thread, the spawner,
739 	 * stayed running).
740 	 */
741 	exit(42);
742 }
743 
TEST(KILL_thread)744 TEST(KILL_thread)
745 {
746 	int status;
747 	pid_t child_pid;
748 
749 	child_pid = fork();
750 	ASSERT_LE(0, child_pid);
751 	if (child_pid == 0) {
752 		kill_thread_or_group(_metadata, false);
753 		_exit(38);
754 	}
755 
756 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
757 
758 	/* If only the thread was killed, we'll see exit 42. */
759 	ASSERT_TRUE(WIFEXITED(status));
760 	ASSERT_EQ(42, WEXITSTATUS(status));
761 }
762 
TEST(KILL_process)763 TEST(KILL_process)
764 {
765 	int status;
766 	pid_t child_pid;
767 
768 	child_pid = fork();
769 	ASSERT_LE(0, child_pid);
770 	if (child_pid == 0) {
771 		kill_thread_or_group(_metadata, true);
772 		_exit(38);
773 	}
774 
775 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
776 
777 	/* If the entire process was killed, we'll see SIGSYS. */
778 	ASSERT_TRUE(WIFSIGNALED(status));
779 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
780 }
781 
782 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)783 TEST(arg_out_of_range)
784 {
785 	struct sock_filter filter[] = {
786 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
787 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
788 	};
789 	struct sock_fprog prog = {
790 		.len = (unsigned short)ARRAY_SIZE(filter),
791 		.filter = filter,
792 	};
793 	long ret;
794 
795 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
796 	ASSERT_EQ(0, ret);
797 
798 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
799 	EXPECT_EQ(-1, ret);
800 	EXPECT_EQ(EINVAL, errno);
801 }
802 
803 #define ERRNO_FILTER(name, errno)					\
804 	struct sock_filter _read_filter_##name[] = {			\
805 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
806 			offsetof(struct seccomp_data, nr)),		\
807 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
808 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
809 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
810 	};								\
811 	struct sock_fprog prog_##name = {				\
812 		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
813 		.filter = _read_filter_##name,				\
814 	}
815 
816 /* Make sure basic errno values are correctly passed through a filter. */
TEST(ERRNO_valid)817 TEST(ERRNO_valid)
818 {
819 	ERRNO_FILTER(valid, E2BIG);
820 	long ret;
821 	pid_t parent = getppid();
822 
823 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
824 	ASSERT_EQ(0, ret);
825 
826 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
827 	ASSERT_EQ(0, ret);
828 
829 	EXPECT_EQ(parent, syscall(__NR_getppid));
830 	EXPECT_EQ(-1, read(0, NULL, 0));
831 	EXPECT_EQ(E2BIG, errno);
832 }
833 
834 /* Make sure an errno of zero is correctly handled by the arch code. */
TEST(ERRNO_zero)835 TEST(ERRNO_zero)
836 {
837 	ERRNO_FILTER(zero, 0);
838 	long ret;
839 	pid_t parent = getppid();
840 
841 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
842 	ASSERT_EQ(0, ret);
843 
844 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
845 	ASSERT_EQ(0, ret);
846 
847 	EXPECT_EQ(parent, syscall(__NR_getppid));
848 	/* "errno" of 0 is ok. */
849 	EXPECT_EQ(0, read(0, NULL, 0));
850 }
851 
852 /*
853  * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
854  * This tests that the errno value gets capped correctly, fixed by
855  * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
856  */
TEST(ERRNO_capped)857 TEST(ERRNO_capped)
858 {
859 	ERRNO_FILTER(capped, 4096);
860 	long ret;
861 	pid_t parent = getppid();
862 
863 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
864 	ASSERT_EQ(0, ret);
865 
866 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
867 	ASSERT_EQ(0, ret);
868 
869 	EXPECT_EQ(parent, syscall(__NR_getppid));
870 	EXPECT_EQ(-1, read(0, NULL, 0));
871 	EXPECT_EQ(4095, errno);
872 }
873 
874 /*
875  * Filters are processed in reverse order: last applied is executed first.
876  * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
877  * SECCOMP_RET_DATA mask results will follow the most recently applied
878  * matching filter return (and not the lowest or highest value).
879  */
TEST(ERRNO_order)880 TEST(ERRNO_order)
881 {
882 	ERRNO_FILTER(first,  11);
883 	ERRNO_FILTER(second, 13);
884 	ERRNO_FILTER(third,  12);
885 	long ret;
886 	pid_t parent = getppid();
887 
888 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
889 	ASSERT_EQ(0, ret);
890 
891 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
892 	ASSERT_EQ(0, ret);
893 
894 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
895 	ASSERT_EQ(0, ret);
896 
897 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
898 	ASSERT_EQ(0, ret);
899 
900 	EXPECT_EQ(parent, syscall(__NR_getppid));
901 	EXPECT_EQ(-1, read(0, NULL, 0));
902 	EXPECT_EQ(12, errno);
903 }
904 
FIXTURE_DATA(TRAP)905 FIXTURE_DATA(TRAP) {
906 	struct sock_fprog prog;
907 };
908 
FIXTURE_SETUP(TRAP)909 FIXTURE_SETUP(TRAP)
910 {
911 	struct sock_filter filter[] = {
912 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
913 			offsetof(struct seccomp_data, nr)),
914 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
915 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
916 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
917 	};
918 
919 	memset(&self->prog, 0, sizeof(self->prog));
920 	self->prog.filter = malloc(sizeof(filter));
921 	ASSERT_NE(NULL, self->prog.filter);
922 	memcpy(self->prog.filter, filter, sizeof(filter));
923 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
924 }
925 
FIXTURE_TEARDOWN(TRAP)926 FIXTURE_TEARDOWN(TRAP)
927 {
928 	if (self->prog.filter)
929 		free(self->prog.filter);
930 }
931 
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)932 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
933 {
934 	long ret;
935 
936 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
937 	ASSERT_EQ(0, ret);
938 
939 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
940 	ASSERT_EQ(0, ret);
941 	syscall(__NR_getpid);
942 }
943 
944 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)945 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
946 {
947 	long ret;
948 
949 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
950 	ASSERT_EQ(0, ret);
951 
952 	signal(SIGSYS, SIG_IGN);
953 
954 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
955 	ASSERT_EQ(0, ret);
956 	syscall(__NR_getpid);
957 }
958 
959 static siginfo_t TRAP_info;
960 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)961 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
962 {
963 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
964 	TRAP_nr = nr;
965 }
966 
TEST_F(TRAP,handler)967 TEST_F(TRAP, handler)
968 {
969 	int ret, test;
970 	struct sigaction act;
971 	sigset_t mask;
972 
973 	memset(&act, 0, sizeof(act));
974 	sigemptyset(&mask);
975 	sigaddset(&mask, SIGSYS);
976 
977 	act.sa_sigaction = &TRAP_action;
978 	act.sa_flags = SA_SIGINFO;
979 	ret = sigaction(SIGSYS, &act, NULL);
980 	ASSERT_EQ(0, ret) {
981 		TH_LOG("sigaction failed");
982 	}
983 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
984 	ASSERT_EQ(0, ret) {
985 		TH_LOG("sigprocmask failed");
986 	}
987 
988 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
989 	ASSERT_EQ(0, ret);
990 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
991 	ASSERT_EQ(0, ret);
992 	TRAP_nr = 0;
993 	memset(&TRAP_info, 0, sizeof(TRAP_info));
994 	/* Expect the registers to be rolled back. (nr = error) may vary
995 	 * based on arch. */
996 	ret = syscall(__NR_getpid);
997 	/* Silence gcc warning about volatile. */
998 	test = TRAP_nr;
999 	EXPECT_EQ(SIGSYS, test);
1000 	struct local_sigsys {
1001 		void *_call_addr;	/* calling user insn */
1002 		int _syscall;		/* triggering system call number */
1003 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
1004 	} *sigsys = (struct local_sigsys *)
1005 #ifdef si_syscall
1006 		&(TRAP_info.si_call_addr);
1007 #else
1008 		&TRAP_info.si_pid;
1009 #endif
1010 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
1011 	/* Make sure arch is non-zero. */
1012 	EXPECT_NE(0, sigsys->_arch);
1013 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
1014 }
1015 
FIXTURE_DATA(precedence)1016 FIXTURE_DATA(precedence) {
1017 	struct sock_fprog allow;
1018 	struct sock_fprog log;
1019 	struct sock_fprog trace;
1020 	struct sock_fprog error;
1021 	struct sock_fprog trap;
1022 	struct sock_fprog kill;
1023 };
1024 
FIXTURE_SETUP(precedence)1025 FIXTURE_SETUP(precedence)
1026 {
1027 	struct sock_filter allow_insns[] = {
1028 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1029 	};
1030 	struct sock_filter log_insns[] = {
1031 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1032 			offsetof(struct seccomp_data, nr)),
1033 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1034 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1035 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
1036 	};
1037 	struct sock_filter trace_insns[] = {
1038 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1039 			offsetof(struct seccomp_data, nr)),
1040 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1041 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1042 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
1043 	};
1044 	struct sock_filter error_insns[] = {
1045 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1046 			offsetof(struct seccomp_data, nr)),
1047 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1048 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1049 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
1050 	};
1051 	struct sock_filter trap_insns[] = {
1052 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1053 			offsetof(struct seccomp_data, nr)),
1054 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1055 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1056 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
1057 	};
1058 	struct sock_filter kill_insns[] = {
1059 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1060 			offsetof(struct seccomp_data, nr)),
1061 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
1062 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1063 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1064 	};
1065 
1066 	memset(self, 0, sizeof(*self));
1067 #define FILTER_ALLOC(_x) \
1068 	self->_x.filter = malloc(sizeof(_x##_insns)); \
1069 	ASSERT_NE(NULL, self->_x.filter); \
1070 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
1071 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
1072 	FILTER_ALLOC(allow);
1073 	FILTER_ALLOC(log);
1074 	FILTER_ALLOC(trace);
1075 	FILTER_ALLOC(error);
1076 	FILTER_ALLOC(trap);
1077 	FILTER_ALLOC(kill);
1078 }
1079 
FIXTURE_TEARDOWN(precedence)1080 FIXTURE_TEARDOWN(precedence)
1081 {
1082 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
1083 	FILTER_FREE(allow);
1084 	FILTER_FREE(log);
1085 	FILTER_FREE(trace);
1086 	FILTER_FREE(error);
1087 	FILTER_FREE(trap);
1088 	FILTER_FREE(kill);
1089 }
1090 
TEST_F(precedence,allow_ok)1091 TEST_F(precedence, allow_ok)
1092 {
1093 	pid_t parent, res = 0;
1094 	long ret;
1095 
1096 	parent = getppid();
1097 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1098 	ASSERT_EQ(0, ret);
1099 
1100 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1101 	ASSERT_EQ(0, ret);
1102 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1103 	ASSERT_EQ(0, ret);
1104 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1105 	ASSERT_EQ(0, ret);
1106 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1107 	ASSERT_EQ(0, ret);
1108 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1109 	ASSERT_EQ(0, ret);
1110 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1111 	ASSERT_EQ(0, ret);
1112 	/* Should work just fine. */
1113 	res = syscall(__NR_getppid);
1114 	EXPECT_EQ(parent, res);
1115 }
1116 
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)1117 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
1118 {
1119 	pid_t parent, res = 0;
1120 	long ret;
1121 
1122 	parent = getppid();
1123 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1124 	ASSERT_EQ(0, ret);
1125 
1126 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1127 	ASSERT_EQ(0, ret);
1128 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1129 	ASSERT_EQ(0, ret);
1130 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1131 	ASSERT_EQ(0, ret);
1132 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1133 	ASSERT_EQ(0, ret);
1134 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1135 	ASSERT_EQ(0, ret);
1136 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1137 	ASSERT_EQ(0, ret);
1138 	/* Should work just fine. */
1139 	res = syscall(__NR_getppid);
1140 	EXPECT_EQ(parent, res);
1141 	/* getpid() should never return. */
1142 	res = syscall(__NR_getpid);
1143 	EXPECT_EQ(0, res);
1144 }
1145 
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)1146 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
1147 {
1148 	pid_t parent;
1149 	long ret;
1150 
1151 	parent = getppid();
1152 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1153 	ASSERT_EQ(0, ret);
1154 
1155 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1156 	ASSERT_EQ(0, ret);
1157 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
1158 	ASSERT_EQ(0, ret);
1159 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1160 	ASSERT_EQ(0, ret);
1161 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1162 	ASSERT_EQ(0, ret);
1163 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1164 	ASSERT_EQ(0, ret);
1165 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1166 	ASSERT_EQ(0, ret);
1167 	/* Should work just fine. */
1168 	EXPECT_EQ(parent, syscall(__NR_getppid));
1169 	/* getpid() should never return. */
1170 	EXPECT_EQ(0, syscall(__NR_getpid));
1171 }
1172 
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)1173 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
1174 {
1175 	pid_t parent;
1176 	long ret;
1177 
1178 	parent = getppid();
1179 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1180 	ASSERT_EQ(0, ret);
1181 
1182 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1183 	ASSERT_EQ(0, ret);
1184 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1185 	ASSERT_EQ(0, ret);
1186 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1187 	ASSERT_EQ(0, ret);
1188 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1189 	ASSERT_EQ(0, ret);
1190 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1191 	ASSERT_EQ(0, ret);
1192 	/* Should work just fine. */
1193 	EXPECT_EQ(parent, syscall(__NR_getppid));
1194 	/* getpid() should never return. */
1195 	EXPECT_EQ(0, syscall(__NR_getpid));
1196 }
1197 
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)1198 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
1199 {
1200 	pid_t parent;
1201 	long ret;
1202 
1203 	parent = getppid();
1204 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1205 	ASSERT_EQ(0, ret);
1206 
1207 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1208 	ASSERT_EQ(0, ret);
1209 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
1210 	ASSERT_EQ(0, ret);
1211 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1212 	ASSERT_EQ(0, ret);
1213 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1214 	ASSERT_EQ(0, ret);
1215 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1216 	ASSERT_EQ(0, ret);
1217 	/* Should work just fine. */
1218 	EXPECT_EQ(parent, syscall(__NR_getppid));
1219 	/* getpid() should never return. */
1220 	EXPECT_EQ(0, syscall(__NR_getpid));
1221 }
1222 
TEST_F(precedence,errno_is_third)1223 TEST_F(precedence, errno_is_third)
1224 {
1225 	pid_t parent;
1226 	long ret;
1227 
1228 	parent = getppid();
1229 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1230 	ASSERT_EQ(0, ret);
1231 
1232 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1233 	ASSERT_EQ(0, ret);
1234 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1235 	ASSERT_EQ(0, ret);
1236 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1237 	ASSERT_EQ(0, ret);
1238 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1239 	ASSERT_EQ(0, ret);
1240 	/* Should work just fine. */
1241 	EXPECT_EQ(parent, syscall(__NR_getppid));
1242 	EXPECT_EQ(0, syscall(__NR_getpid));
1243 }
1244 
TEST_F(precedence,errno_is_third_in_any_order)1245 TEST_F(precedence, errno_is_third_in_any_order)
1246 {
1247 	pid_t parent;
1248 	long ret;
1249 
1250 	parent = getppid();
1251 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1252 	ASSERT_EQ(0, ret);
1253 
1254 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1255 	ASSERT_EQ(0, ret);
1256 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
1257 	ASSERT_EQ(0, ret);
1258 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1259 	ASSERT_EQ(0, ret);
1260 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1261 	ASSERT_EQ(0, ret);
1262 	/* Should work just fine. */
1263 	EXPECT_EQ(parent, syscall(__NR_getppid));
1264 	EXPECT_EQ(0, syscall(__NR_getpid));
1265 }
1266 
TEST_F(precedence,trace_is_fourth)1267 TEST_F(precedence, trace_is_fourth)
1268 {
1269 	pid_t parent;
1270 	long ret;
1271 
1272 	parent = getppid();
1273 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1274 	ASSERT_EQ(0, ret);
1275 
1276 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1277 	ASSERT_EQ(0, ret);
1278 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1279 	ASSERT_EQ(0, ret);
1280 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1281 	ASSERT_EQ(0, ret);
1282 	/* Should work just fine. */
1283 	EXPECT_EQ(parent, syscall(__NR_getppid));
1284 	/* No ptracer */
1285 	EXPECT_EQ(-1, syscall(__NR_getpid));
1286 }
1287 
TEST_F(precedence,trace_is_fourth_in_any_order)1288 TEST_F(precedence, trace_is_fourth_in_any_order)
1289 {
1290 	pid_t parent;
1291 	long ret;
1292 
1293 	parent = getppid();
1294 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1295 	ASSERT_EQ(0, ret);
1296 
1297 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1298 	ASSERT_EQ(0, ret);
1299 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1300 	ASSERT_EQ(0, ret);
1301 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1302 	ASSERT_EQ(0, ret);
1303 	/* Should work just fine. */
1304 	EXPECT_EQ(parent, syscall(__NR_getppid));
1305 	/* No ptracer */
1306 	EXPECT_EQ(-1, syscall(__NR_getpid));
1307 }
1308 
TEST_F(precedence,log_is_fifth)1309 TEST_F(precedence, log_is_fifth)
1310 {
1311 	pid_t mypid, parent;
1312 	long ret;
1313 
1314 	mypid = getpid();
1315 	parent = getppid();
1316 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1317 	ASSERT_EQ(0, ret);
1318 
1319 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1320 	ASSERT_EQ(0, ret);
1321 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1322 	ASSERT_EQ(0, ret);
1323 	/* Should work just fine. */
1324 	EXPECT_EQ(parent, syscall(__NR_getppid));
1325 	/* Should also work just fine */
1326 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1327 }
1328 
TEST_F(precedence,log_is_fifth_in_any_order)1329 TEST_F(precedence, log_is_fifth_in_any_order)
1330 {
1331 	pid_t mypid, parent;
1332 	long ret;
1333 
1334 	mypid = getpid();
1335 	parent = getppid();
1336 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1337 	ASSERT_EQ(0, ret);
1338 
1339 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
1340 	ASSERT_EQ(0, ret);
1341 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1342 	ASSERT_EQ(0, ret);
1343 	/* Should work just fine. */
1344 	EXPECT_EQ(parent, syscall(__NR_getppid));
1345 	/* Should also work just fine */
1346 	EXPECT_EQ(mypid, syscall(__NR_getpid));
1347 }
1348 
1349 #ifndef PTRACE_O_TRACESECCOMP
1350 #define PTRACE_O_TRACESECCOMP	0x00000080
1351 #endif
1352 
1353 /* Catch the Ubuntu 12.04 value error. */
1354 #if PTRACE_EVENT_SECCOMP != 7
1355 #undef PTRACE_EVENT_SECCOMP
1356 #endif
1357 
1358 #ifndef PTRACE_EVENT_SECCOMP
1359 #define PTRACE_EVENT_SECCOMP 7
1360 #endif
1361 
1362 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1363 bool tracer_running;
tracer_stop(int sig)1364 void tracer_stop(int sig)
1365 {
1366 	tracer_running = false;
1367 }
1368 
1369 typedef void tracer_func_t(struct __test_metadata *_metadata,
1370 			   pid_t tracee, int status, void *args);
1371 
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1372 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1373 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1374 {
1375 	int ret = -1;
1376 	struct sigaction action = {
1377 		.sa_handler = tracer_stop,
1378 	};
1379 
1380 	/* Allow external shutdown. */
1381 	tracer_running = true;
1382 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1383 
1384 	errno = 0;
1385 	while (ret == -1 && errno != EINVAL)
1386 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1387 	ASSERT_EQ(0, ret) {
1388 		kill(tracee, SIGKILL);
1389 	}
1390 	/* Wait for attach stop */
1391 	wait(NULL);
1392 
1393 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1394 						      PTRACE_O_TRACESYSGOOD :
1395 						      PTRACE_O_TRACESECCOMP);
1396 	ASSERT_EQ(0, ret) {
1397 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1398 		kill(tracee, SIGKILL);
1399 	}
1400 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1401 		     tracee, NULL, 0);
1402 	ASSERT_EQ(0, ret);
1403 
1404 	/* Unblock the tracee */
1405 	ASSERT_EQ(1, write(fd, "A", 1));
1406 	ASSERT_EQ(0, close(fd));
1407 
1408 	/* Run until we're shut down. Must assert to stop execution. */
1409 	while (tracer_running) {
1410 		int status;
1411 
1412 		if (wait(&status) != tracee)
1413 			continue;
1414 		if (WIFSIGNALED(status) || WIFEXITED(status))
1415 			/* Child is dead. Time to go. */
1416 			return;
1417 
1418 		/* Check if this is a seccomp event. */
1419 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1420 
1421 		tracer_func(_metadata, tracee, status, args);
1422 
1423 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1424 			     tracee, NULL, 0);
1425 		ASSERT_EQ(0, ret);
1426 	}
1427 	/* Directly report the status of our test harness results. */
1428 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1429 }
1430 
1431 /* Common tracer setup/teardown functions. */
cont_handler(int num)1432 void cont_handler(int num)
1433 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1434 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1435 			  tracer_func_t func, void *args, bool ptrace_syscall)
1436 {
1437 	char sync;
1438 	int pipefd[2];
1439 	pid_t tracer_pid;
1440 	pid_t tracee = getpid();
1441 
1442 	/* Setup a pipe for clean synchronization. */
1443 	ASSERT_EQ(0, pipe(pipefd));
1444 
1445 	/* Fork a child which we'll promote to tracer */
1446 	tracer_pid = fork();
1447 	ASSERT_LE(0, tracer_pid);
1448 	signal(SIGALRM, cont_handler);
1449 	if (tracer_pid == 0) {
1450 		close(pipefd[0]);
1451 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1452 			     ptrace_syscall);
1453 		syscall(__NR_exit, 0);
1454 	}
1455 	close(pipefd[1]);
1456 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1457 	read(pipefd[0], &sync, 1);
1458 	close(pipefd[0]);
1459 
1460 	return tracer_pid;
1461 }
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1462 void teardown_trace_fixture(struct __test_metadata *_metadata,
1463 			    pid_t tracer)
1464 {
1465 	if (tracer) {
1466 		int status;
1467 		/*
1468 		 * Extract the exit code from the other process and
1469 		 * adopt it for ourselves in case its asserts failed.
1470 		 */
1471 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1472 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1473 		if (WEXITSTATUS(status))
1474 			_metadata->passed = 0;
1475 	}
1476 }
1477 
1478 /* "poke" tracer arguments and function. */
1479 struct tracer_args_poke_t {
1480 	unsigned long poke_addr;
1481 };
1482 
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1483 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1484 		 void *args)
1485 {
1486 	int ret;
1487 	unsigned long msg;
1488 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1489 
1490 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1491 	EXPECT_EQ(0, ret);
1492 	/* If this fails, don't try to recover. */
1493 	ASSERT_EQ(0x1001, msg) {
1494 		kill(tracee, SIGKILL);
1495 	}
1496 	/*
1497 	 * Poke in the message.
1498 	 * Registers are not touched to try to keep this relatively arch
1499 	 * agnostic.
1500 	 */
1501 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1502 	EXPECT_EQ(0, ret);
1503 }
1504 
FIXTURE_DATA(TRACE_poke)1505 FIXTURE_DATA(TRACE_poke) {
1506 	struct sock_fprog prog;
1507 	pid_t tracer;
1508 	long poked;
1509 	struct tracer_args_poke_t tracer_args;
1510 };
1511 
FIXTURE_SETUP(TRACE_poke)1512 FIXTURE_SETUP(TRACE_poke)
1513 {
1514 	struct sock_filter filter[] = {
1515 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1516 			offsetof(struct seccomp_data, nr)),
1517 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1518 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1519 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1520 	};
1521 
1522 	self->poked = 0;
1523 	memset(&self->prog, 0, sizeof(self->prog));
1524 	self->prog.filter = malloc(sizeof(filter));
1525 	ASSERT_NE(NULL, self->prog.filter);
1526 	memcpy(self->prog.filter, filter, sizeof(filter));
1527 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1528 
1529 	/* Set up tracer args. */
1530 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1531 
1532 	/* Launch tracer. */
1533 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1534 					   &self->tracer_args, false);
1535 }
1536 
FIXTURE_TEARDOWN(TRACE_poke)1537 FIXTURE_TEARDOWN(TRACE_poke)
1538 {
1539 	teardown_trace_fixture(_metadata, self->tracer);
1540 	if (self->prog.filter)
1541 		free(self->prog.filter);
1542 }
1543 
TEST_F(TRACE_poke,read_has_side_effects)1544 TEST_F(TRACE_poke, read_has_side_effects)
1545 {
1546 	ssize_t ret;
1547 
1548 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1549 	ASSERT_EQ(0, ret);
1550 
1551 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1552 	ASSERT_EQ(0, ret);
1553 
1554 	EXPECT_EQ(0, self->poked);
1555 	ret = read(-1, NULL, 0);
1556 	EXPECT_EQ(-1, ret);
1557 	EXPECT_EQ(0x1001, self->poked);
1558 }
1559 
TEST_F(TRACE_poke,getpid_runs_normally)1560 TEST_F(TRACE_poke, getpid_runs_normally)
1561 {
1562 	long ret;
1563 
1564 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1565 	ASSERT_EQ(0, ret);
1566 
1567 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1568 	ASSERT_EQ(0, ret);
1569 
1570 	EXPECT_EQ(0, self->poked);
1571 	EXPECT_NE(0, syscall(__NR_getpid));
1572 	EXPECT_EQ(0, self->poked);
1573 }
1574 
1575 #if defined(__x86_64__)
1576 # define ARCH_REGS	struct user_regs_struct
1577 # define SYSCALL_NUM	orig_rax
1578 # define SYSCALL_RET	rax
1579 #elif defined(__i386__)
1580 # define ARCH_REGS	struct user_regs_struct
1581 # define SYSCALL_NUM	orig_eax
1582 # define SYSCALL_RET	eax
1583 #elif defined(__arm__)
1584 # define ARCH_REGS	struct pt_regs
1585 # define SYSCALL_NUM	ARM_r7
1586 # define SYSCALL_RET	ARM_r0
1587 #elif defined(__aarch64__)
1588 # define ARCH_REGS	struct user_pt_regs
1589 # define SYSCALL_NUM	regs[8]
1590 # define SYSCALL_RET	regs[0]
1591 #elif defined(__hppa__)
1592 # define ARCH_REGS	struct user_regs_struct
1593 # define SYSCALL_NUM	gr[20]
1594 # define SYSCALL_RET	gr[28]
1595 #elif defined(__powerpc__)
1596 # define ARCH_REGS	struct pt_regs
1597 # define SYSCALL_NUM	gpr[0]
1598 # define SYSCALL_RET	gpr[3]
1599 #elif defined(__s390__)
1600 # define ARCH_REGS     s390_regs
1601 # define SYSCALL_NUM   gprs[2]
1602 # define SYSCALL_RET   gprs[2]
1603 #elif defined(__mips__)
1604 # define ARCH_REGS	struct pt_regs
1605 # define SYSCALL_NUM	regs[2]
1606 # define SYSCALL_SYSCALL_NUM regs[4]
1607 # define SYSCALL_RET	regs[2]
1608 # define SYSCALL_NUM_RET_SHARE_REG
1609 #else
1610 # error "Do not know how to find your architecture's registers and syscalls"
1611 #endif
1612 
1613 /* When the syscall return can't be changed, stub out the tests for it. */
1614 #ifdef SYSCALL_NUM_RET_SHARE_REG
1615 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
1616 #else
1617 # define EXPECT_SYSCALL_RETURN(val, action)		\
1618 	do {						\
1619 		errno = 0;				\
1620 		if (val < 0) {				\
1621 			EXPECT_EQ(-1, action);		\
1622 			EXPECT_EQ(-(val), errno);	\
1623 		} else {				\
1624 			EXPECT_EQ(val, action);		\
1625 		}					\
1626 	} while (0)
1627 #endif
1628 
1629 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1630  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1631  */
1632 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1633 #define HAVE_GETREGS
1634 #endif
1635 
1636 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1637 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1638 {
1639 	ARCH_REGS regs;
1640 #ifdef HAVE_GETREGS
1641 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1642 		TH_LOG("PTRACE_GETREGS failed");
1643 		return -1;
1644 	}
1645 #else
1646 	struct iovec iov;
1647 
1648 	iov.iov_base = &regs;
1649 	iov.iov_len = sizeof(regs);
1650 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1651 		TH_LOG("PTRACE_GETREGSET failed");
1652 		return -1;
1653 	}
1654 #endif
1655 
1656 #if defined(__mips__)
1657 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1658 		return regs.SYSCALL_SYSCALL_NUM;
1659 #endif
1660 	return regs.SYSCALL_NUM;
1661 }
1662 
1663 /* Architecture-specific syscall changing routine. */
change_syscall(struct __test_metadata * _metadata,pid_t tracee,int syscall,int result)1664 void change_syscall(struct __test_metadata *_metadata,
1665 		    pid_t tracee, int syscall, int result)
1666 {
1667 	int ret;
1668 	ARCH_REGS regs;
1669 #ifdef HAVE_GETREGS
1670 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1671 #else
1672 	struct iovec iov;
1673 	iov.iov_base = &regs;
1674 	iov.iov_len = sizeof(regs);
1675 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1676 #endif
1677 	EXPECT_EQ(0, ret) {}
1678 
1679 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1680     defined(__s390__) || defined(__hppa__)
1681 	{
1682 		regs.SYSCALL_NUM = syscall;
1683 	}
1684 #elif defined(__mips__)
1685 	{
1686 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1687 			regs.SYSCALL_SYSCALL_NUM = syscall;
1688 		else
1689 			regs.SYSCALL_NUM = syscall;
1690 	}
1691 
1692 #elif defined(__arm__)
1693 # ifndef PTRACE_SET_SYSCALL
1694 #  define PTRACE_SET_SYSCALL   23
1695 # endif
1696 	{
1697 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1698 		EXPECT_EQ(0, ret);
1699 	}
1700 
1701 #elif defined(__aarch64__)
1702 # ifndef NT_ARM_SYSTEM_CALL
1703 #  define NT_ARM_SYSTEM_CALL 0x404
1704 # endif
1705 	{
1706 		iov.iov_base = &syscall;
1707 		iov.iov_len = sizeof(syscall);
1708 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1709 			     &iov);
1710 		EXPECT_EQ(0, ret);
1711 	}
1712 
1713 #else
1714 	ASSERT_EQ(1, 0) {
1715 		TH_LOG("How is the syscall changed on this architecture?");
1716 	}
1717 #endif
1718 
1719 	/* If syscall is skipped, change return value. */
1720 	if (syscall == -1)
1721 #ifdef SYSCALL_NUM_RET_SHARE_REG
1722 		TH_LOG("Can't modify syscall return on this architecture");
1723 #else
1724 		regs.SYSCALL_RET = result;
1725 #endif
1726 
1727 #ifdef HAVE_GETREGS
1728 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1729 #else
1730 	iov.iov_base = &regs;
1731 	iov.iov_len = sizeof(regs);
1732 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1733 #endif
1734 	EXPECT_EQ(0, ret);
1735 }
1736 
tracer_syscall(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1737 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1738 		    int status, void *args)
1739 {
1740 	int ret;
1741 	unsigned long msg;
1742 
1743 	/* Make sure we got the right message. */
1744 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1745 	EXPECT_EQ(0, ret);
1746 
1747 	/* Validate and take action on expected syscalls. */
1748 	switch (msg) {
1749 	case 0x1002:
1750 		/* change getpid to getppid. */
1751 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1752 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1753 		break;
1754 	case 0x1003:
1755 		/* skip gettid with valid return code. */
1756 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1757 		change_syscall(_metadata, tracee, -1, 45000);
1758 		break;
1759 	case 0x1004:
1760 		/* skip openat with error. */
1761 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
1762 		change_syscall(_metadata, tracee, -1, -ESRCH);
1763 		break;
1764 	case 0x1005:
1765 		/* do nothing (allow getppid) */
1766 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1767 		break;
1768 	default:
1769 		EXPECT_EQ(0, msg) {
1770 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1771 			kill(tracee, SIGKILL);
1772 		}
1773 	}
1774 
1775 }
1776 
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1777 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1778 		   int status, void *args)
1779 {
1780 	int ret, nr;
1781 	unsigned long msg;
1782 	static bool entry;
1783 
1784 	/*
1785 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
1786 	 * is by counting.
1787 	 */
1788 	entry = !entry;
1789 
1790 	/* Make sure we got an appropriate message. */
1791 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1792 	EXPECT_EQ(0, ret);
1793 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
1794 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
1795 
1796 	if (!entry)
1797 		return;
1798 
1799 	nr = get_syscall(_metadata, tracee);
1800 
1801 	if (nr == __NR_getpid)
1802 		change_syscall(_metadata, tracee, __NR_getppid, 0);
1803 	if (nr == __NR_gettid)
1804 		change_syscall(_metadata, tracee, -1, 45000);
1805 	if (nr == __NR_openat)
1806 		change_syscall(_metadata, tracee, -1, -ESRCH);
1807 }
1808 
FIXTURE_DATA(TRACE_syscall)1809 FIXTURE_DATA(TRACE_syscall) {
1810 	struct sock_fprog prog;
1811 	pid_t tracer, mytid, mypid, parent;
1812 };
1813 
FIXTURE_SETUP(TRACE_syscall)1814 FIXTURE_SETUP(TRACE_syscall)
1815 {
1816 	struct sock_filter filter[] = {
1817 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1818 			offsetof(struct seccomp_data, nr)),
1819 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1820 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1821 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1822 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1823 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
1824 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1825 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1826 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
1827 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1828 	};
1829 
1830 	memset(&self->prog, 0, sizeof(self->prog));
1831 	self->prog.filter = malloc(sizeof(filter));
1832 	ASSERT_NE(NULL, self->prog.filter);
1833 	memcpy(self->prog.filter, filter, sizeof(filter));
1834 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1835 
1836 	/* Prepare some testable syscall results. */
1837 	self->mytid = syscall(__NR_gettid);
1838 	ASSERT_GT(self->mytid, 0);
1839 	ASSERT_NE(self->mytid, 1) {
1840 		TH_LOG("Running this test as init is not supported. :)");
1841 	}
1842 
1843 	self->mypid = getpid();
1844 	ASSERT_GT(self->mypid, 0);
1845 	ASSERT_EQ(self->mytid, self->mypid);
1846 
1847 	self->parent = getppid();
1848 	ASSERT_GT(self->parent, 0);
1849 	ASSERT_NE(self->parent, self->mypid);
1850 
1851 	/* Launch tracer. */
1852 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1853 					   false);
1854 }
1855 
FIXTURE_TEARDOWN(TRACE_syscall)1856 FIXTURE_TEARDOWN(TRACE_syscall)
1857 {
1858 	teardown_trace_fixture(_metadata, self->tracer);
1859 	if (self->prog.filter)
1860 		free(self->prog.filter);
1861 }
1862 
TEST_F(TRACE_syscall,ptrace_syscall_redirected)1863 TEST_F(TRACE_syscall, ptrace_syscall_redirected)
1864 {
1865 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1866 	teardown_trace_fixture(_metadata, self->tracer);
1867 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1868 					   true);
1869 
1870 	/* Tracer will redirect getpid to getppid. */
1871 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1872 }
1873 
TEST_F(TRACE_syscall,ptrace_syscall_errno)1874 TEST_F(TRACE_syscall, ptrace_syscall_errno)
1875 {
1876 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1877 	teardown_trace_fixture(_metadata, self->tracer);
1878 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1879 					   true);
1880 
1881 	/* Tracer should skip the open syscall, resulting in ESRCH. */
1882 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1883 }
1884 
TEST_F(TRACE_syscall,ptrace_syscall_faked)1885 TEST_F(TRACE_syscall, ptrace_syscall_faked)
1886 {
1887 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1888 	teardown_trace_fixture(_metadata, self->tracer);
1889 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1890 					   true);
1891 
1892 	/* Tracer should skip the gettid syscall, resulting fake pid. */
1893 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1894 }
1895 
TEST_F(TRACE_syscall,syscall_allowed)1896 TEST_F(TRACE_syscall, syscall_allowed)
1897 {
1898 	long ret;
1899 
1900 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1901 	ASSERT_EQ(0, ret);
1902 
1903 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1904 	ASSERT_EQ(0, ret);
1905 
1906 	/* getppid works as expected (no changes). */
1907 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1908 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1909 }
1910 
TEST_F(TRACE_syscall,syscall_redirected)1911 TEST_F(TRACE_syscall, syscall_redirected)
1912 {
1913 	long ret;
1914 
1915 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1916 	ASSERT_EQ(0, ret);
1917 
1918 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1919 	ASSERT_EQ(0, ret);
1920 
1921 	/* getpid has been redirected to getppid as expected. */
1922 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1923 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1924 }
1925 
TEST_F(TRACE_syscall,syscall_errno)1926 TEST_F(TRACE_syscall, syscall_errno)
1927 {
1928 	long ret;
1929 
1930 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1931 	ASSERT_EQ(0, ret);
1932 
1933 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1934 	ASSERT_EQ(0, ret);
1935 
1936 	/* openat has been skipped and an errno return. */
1937 	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
1938 }
1939 
TEST_F(TRACE_syscall,syscall_faked)1940 TEST_F(TRACE_syscall, syscall_faked)
1941 {
1942 	long ret;
1943 
1944 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1945 	ASSERT_EQ(0, ret);
1946 
1947 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1948 	ASSERT_EQ(0, ret);
1949 
1950 	/* gettid has been skipped and an altered return value stored. */
1951 	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
1952 }
1953 
TEST_F(TRACE_syscall,skip_after_RET_TRACE)1954 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1955 {
1956 	struct sock_filter filter[] = {
1957 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1958 			offsetof(struct seccomp_data, nr)),
1959 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1960 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1961 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1962 	};
1963 	struct sock_fprog prog = {
1964 		.len = (unsigned short)ARRAY_SIZE(filter),
1965 		.filter = filter,
1966 	};
1967 	long ret;
1968 
1969 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1970 	ASSERT_EQ(0, ret);
1971 
1972 	/* Install fixture filter. */
1973 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1974 	ASSERT_EQ(0, ret);
1975 
1976 	/* Install "errno on getppid" filter. */
1977 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1978 	ASSERT_EQ(0, ret);
1979 
1980 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1981 	errno = 0;
1982 	EXPECT_EQ(-1, syscall(__NR_getpid));
1983 	EXPECT_EQ(EPERM, errno);
1984 }
1985 
TEST_F_SIGNAL(TRACE_syscall,kill_after_RET_TRACE,SIGSYS)1986 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1987 {
1988 	struct sock_filter filter[] = {
1989 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1990 			offsetof(struct seccomp_data, nr)),
1991 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1992 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1993 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1994 	};
1995 	struct sock_fprog prog = {
1996 		.len = (unsigned short)ARRAY_SIZE(filter),
1997 		.filter = filter,
1998 	};
1999 	long ret;
2000 
2001 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2002 	ASSERT_EQ(0, ret);
2003 
2004 	/* Install fixture filter. */
2005 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
2006 	ASSERT_EQ(0, ret);
2007 
2008 	/* Install "death on getppid" filter. */
2009 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2010 	ASSERT_EQ(0, ret);
2011 
2012 	/* Tracer will redirect getpid to getppid, and we should die. */
2013 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2014 }
2015 
TEST_F(TRACE_syscall,skip_after_ptrace)2016 TEST_F(TRACE_syscall, skip_after_ptrace)
2017 {
2018 	struct sock_filter filter[] = {
2019 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2020 			offsetof(struct seccomp_data, nr)),
2021 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2022 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
2023 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2024 	};
2025 	struct sock_fprog prog = {
2026 		.len = (unsigned short)ARRAY_SIZE(filter),
2027 		.filter = filter,
2028 	};
2029 	long ret;
2030 
2031 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2032 	teardown_trace_fixture(_metadata, self->tracer);
2033 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2034 					   true);
2035 
2036 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2037 	ASSERT_EQ(0, ret);
2038 
2039 	/* Install "errno on getppid" filter. */
2040 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2041 	ASSERT_EQ(0, ret);
2042 
2043 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
2044 	EXPECT_EQ(-1, syscall(__NR_getpid));
2045 	EXPECT_EQ(EPERM, errno);
2046 }
2047 
TEST_F_SIGNAL(TRACE_syscall,kill_after_ptrace,SIGSYS)2048 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
2049 {
2050 	struct sock_filter filter[] = {
2051 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2052 			offsetof(struct seccomp_data, nr)),
2053 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
2054 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2055 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2056 	};
2057 	struct sock_fprog prog = {
2058 		.len = (unsigned short)ARRAY_SIZE(filter),
2059 		.filter = filter,
2060 	};
2061 	long ret;
2062 
2063 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
2064 	teardown_trace_fixture(_metadata, self->tracer);
2065 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
2066 					   true);
2067 
2068 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2069 	ASSERT_EQ(0, ret);
2070 
2071 	/* Install "death on getppid" filter. */
2072 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2073 	ASSERT_EQ(0, ret);
2074 
2075 	/* Tracer will redirect getpid to getppid, and we should die. */
2076 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
2077 }
2078 
TEST(seccomp_syscall)2079 TEST(seccomp_syscall)
2080 {
2081 	struct sock_filter filter[] = {
2082 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2083 	};
2084 	struct sock_fprog prog = {
2085 		.len = (unsigned short)ARRAY_SIZE(filter),
2086 		.filter = filter,
2087 	};
2088 	long ret;
2089 
2090 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2091 	ASSERT_EQ(0, ret) {
2092 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2093 	}
2094 
2095 	/* Reject insane operation. */
2096 	ret = seccomp(-1, 0, &prog);
2097 	ASSERT_NE(ENOSYS, errno) {
2098 		TH_LOG("Kernel does not support seccomp syscall!");
2099 	}
2100 	EXPECT_EQ(EINVAL, errno) {
2101 		TH_LOG("Did not reject crazy op value!");
2102 	}
2103 
2104 	/* Reject strict with flags or pointer. */
2105 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
2106 	EXPECT_EQ(EINVAL, errno) {
2107 		TH_LOG("Did not reject mode strict with flags!");
2108 	}
2109 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
2110 	EXPECT_EQ(EINVAL, errno) {
2111 		TH_LOG("Did not reject mode strict with uargs!");
2112 	}
2113 
2114 	/* Reject insane args for filter. */
2115 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
2116 	EXPECT_EQ(EINVAL, errno) {
2117 		TH_LOG("Did not reject crazy filter flags!");
2118 	}
2119 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
2120 	EXPECT_EQ(EFAULT, errno) {
2121 		TH_LOG("Did not reject NULL filter!");
2122 	}
2123 
2124 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2125 	EXPECT_EQ(0, errno) {
2126 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
2127 			strerror(errno));
2128 	}
2129 }
2130 
TEST(seccomp_syscall_mode_lock)2131 TEST(seccomp_syscall_mode_lock)
2132 {
2133 	struct sock_filter filter[] = {
2134 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2135 	};
2136 	struct sock_fprog prog = {
2137 		.len = (unsigned short)ARRAY_SIZE(filter),
2138 		.filter = filter,
2139 	};
2140 	long ret;
2141 
2142 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2143 	ASSERT_EQ(0, ret) {
2144 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2145 	}
2146 
2147 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2148 	ASSERT_NE(ENOSYS, errno) {
2149 		TH_LOG("Kernel does not support seccomp syscall!");
2150 	}
2151 	EXPECT_EQ(0, ret) {
2152 		TH_LOG("Could not install filter!");
2153 	}
2154 
2155 	/* Make sure neither entry point will switch to strict. */
2156 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
2157 	EXPECT_EQ(EINVAL, errno) {
2158 		TH_LOG("Switched to mode strict!");
2159 	}
2160 
2161 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
2162 	EXPECT_EQ(EINVAL, errno) {
2163 		TH_LOG("Switched to mode strict!");
2164 	}
2165 }
2166 
2167 /*
2168  * Test detection of known and unknown filter flags. Userspace needs to be able
2169  * to check if a filter flag is supported by the current kernel and a good way
2170  * of doing that is by attempting to enter filter mode, with the flag bit in
2171  * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
2172  * that the flag is valid and EINVAL indicates that the flag is invalid.
2173  */
TEST(detect_seccomp_filter_flags)2174 TEST(detect_seccomp_filter_flags)
2175 {
2176 	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
2177 				 SECCOMP_FILTER_FLAG_LOG,
2178 				 SECCOMP_FILTER_FLAG_SPEC_ALLOW,
2179 				 SECCOMP_FILTER_FLAG_NEW_LISTENER };
2180 	unsigned int exclusive[] = {
2181 				SECCOMP_FILTER_FLAG_TSYNC,
2182 				SECCOMP_FILTER_FLAG_NEW_LISTENER };
2183 	unsigned int flag, all_flags, exclusive_mask;
2184 	int i;
2185 	long ret;
2186 
2187 	/* Test detection of individual known-good filter flags */
2188 	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
2189 		int bits = 0;
2190 
2191 		flag = flags[i];
2192 		/* Make sure the flag is a single bit! */
2193 		while (flag) {
2194 			if (flag & 0x1)
2195 				bits ++;
2196 			flag >>= 1;
2197 		}
2198 		ASSERT_EQ(1, bits);
2199 		flag = flags[i];
2200 
2201 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2202 		ASSERT_NE(ENOSYS, errno) {
2203 			TH_LOG("Kernel does not support seccomp syscall!");
2204 		}
2205 		EXPECT_EQ(-1, ret);
2206 		EXPECT_EQ(EFAULT, errno) {
2207 			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
2208 			       flag);
2209 		}
2210 
2211 		all_flags |= flag;
2212 	}
2213 
2214 	/*
2215 	 * Test detection of all known-good filter flags combined. But
2216 	 * for the exclusive flags we need to mask them out and try them
2217 	 * individually for the "all flags" testing.
2218 	 */
2219 	exclusive_mask = 0;
2220 	for (i = 0; i < ARRAY_SIZE(exclusive); i++)
2221 		exclusive_mask |= exclusive[i];
2222 	for (i = 0; i < ARRAY_SIZE(exclusive); i++) {
2223 		flag = all_flags & ~exclusive_mask;
2224 		flag |= exclusive[i];
2225 
2226 		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2227 		EXPECT_EQ(-1, ret);
2228 		EXPECT_EQ(EFAULT, errno) {
2229 			TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
2230 			       flag);
2231 		}
2232 	}
2233 
2234 	/* Test detection of an unknown filter flags, without exclusives. */
2235 	flag = -1;
2236 	flag &= ~exclusive_mask;
2237 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2238 	EXPECT_EQ(-1, ret);
2239 	EXPECT_EQ(EINVAL, errno) {
2240 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
2241 		       flag);
2242 	}
2243 
2244 	/*
2245 	 * Test detection of an unknown filter flag that may simply need to be
2246 	 * added to this test
2247 	 */
2248 	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
2249 	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
2250 	EXPECT_EQ(-1, ret);
2251 	EXPECT_EQ(EINVAL, errno) {
2252 		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
2253 		       flag);
2254 	}
2255 }
2256 
TEST(TSYNC_first)2257 TEST(TSYNC_first)
2258 {
2259 	struct sock_filter filter[] = {
2260 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2261 	};
2262 	struct sock_fprog prog = {
2263 		.len = (unsigned short)ARRAY_SIZE(filter),
2264 		.filter = filter,
2265 	};
2266 	long ret;
2267 
2268 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
2269 	ASSERT_EQ(0, ret) {
2270 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2271 	}
2272 
2273 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2274 		      &prog);
2275 	ASSERT_NE(ENOSYS, errno) {
2276 		TH_LOG("Kernel does not support seccomp syscall!");
2277 	}
2278 	EXPECT_EQ(0, ret) {
2279 		TH_LOG("Could not install initial filter with TSYNC!");
2280 	}
2281 }
2282 
2283 #define TSYNC_SIBLINGS 2
2284 struct tsync_sibling {
2285 	pthread_t tid;
2286 	pid_t system_tid;
2287 	sem_t *started;
2288 	pthread_cond_t *cond;
2289 	pthread_mutex_t *mutex;
2290 	int diverge;
2291 	int num_waits;
2292 	struct sock_fprog *prog;
2293 	struct __test_metadata *metadata;
2294 };
2295 
2296 /*
2297  * To avoid joining joined threads (which is not allowed by Bionic),
2298  * make sure we both successfully join and clear the tid to skip a
2299  * later join attempt during fixture teardown. Any remaining threads
2300  * will be directly killed during teardown.
2301  */
2302 #define PTHREAD_JOIN(tid, status)					\
2303 	do {								\
2304 		int _rc = pthread_join(tid, status);			\
2305 		if (_rc) {						\
2306 			TH_LOG("pthread_join of tid %u failed: %d\n",	\
2307 				(unsigned int)tid, _rc);		\
2308 		} else {						\
2309 			tid = 0;					\
2310 		}							\
2311 	} while (0)
2312 
FIXTURE_DATA(TSYNC)2313 FIXTURE_DATA(TSYNC) {
2314 	struct sock_fprog root_prog, apply_prog;
2315 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
2316 	sem_t started;
2317 	pthread_cond_t cond;
2318 	pthread_mutex_t mutex;
2319 	int sibling_count;
2320 };
2321 
FIXTURE_SETUP(TSYNC)2322 FIXTURE_SETUP(TSYNC)
2323 {
2324 	struct sock_filter root_filter[] = {
2325 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2326 	};
2327 	struct sock_filter apply_filter[] = {
2328 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2329 			offsetof(struct seccomp_data, nr)),
2330 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
2331 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2332 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2333 	};
2334 
2335 	memset(&self->root_prog, 0, sizeof(self->root_prog));
2336 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
2337 	memset(&self->sibling, 0, sizeof(self->sibling));
2338 	self->root_prog.filter = malloc(sizeof(root_filter));
2339 	ASSERT_NE(NULL, self->root_prog.filter);
2340 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
2341 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
2342 
2343 	self->apply_prog.filter = malloc(sizeof(apply_filter));
2344 	ASSERT_NE(NULL, self->apply_prog.filter);
2345 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
2346 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
2347 
2348 	self->sibling_count = 0;
2349 	pthread_mutex_init(&self->mutex, NULL);
2350 	pthread_cond_init(&self->cond, NULL);
2351 	sem_init(&self->started, 0, 0);
2352 	self->sibling[0].tid = 0;
2353 	self->sibling[0].cond = &self->cond;
2354 	self->sibling[0].started = &self->started;
2355 	self->sibling[0].mutex = &self->mutex;
2356 	self->sibling[0].diverge = 0;
2357 	self->sibling[0].num_waits = 1;
2358 	self->sibling[0].prog = &self->root_prog;
2359 	self->sibling[0].metadata = _metadata;
2360 	self->sibling[1].tid = 0;
2361 	self->sibling[1].cond = &self->cond;
2362 	self->sibling[1].started = &self->started;
2363 	self->sibling[1].mutex = &self->mutex;
2364 	self->sibling[1].diverge = 0;
2365 	self->sibling[1].prog = &self->root_prog;
2366 	self->sibling[1].num_waits = 1;
2367 	self->sibling[1].metadata = _metadata;
2368 }
2369 
FIXTURE_TEARDOWN(TSYNC)2370 FIXTURE_TEARDOWN(TSYNC)
2371 {
2372 	int sib = 0;
2373 
2374 	if (self->root_prog.filter)
2375 		free(self->root_prog.filter);
2376 	if (self->apply_prog.filter)
2377 		free(self->apply_prog.filter);
2378 
2379 	for ( ; sib < self->sibling_count; ++sib) {
2380 		struct tsync_sibling *s = &self->sibling[sib];
2381 
2382 		if (!s->tid)
2383 			continue;
2384 		/*
2385 		 * If a thread is still running, it may be stuck, so hit
2386 		 * it over the head really hard.
2387 		 */
2388 		pthread_kill(s->tid, 9);
2389 	}
2390 	pthread_mutex_destroy(&self->mutex);
2391 	pthread_cond_destroy(&self->cond);
2392 	sem_destroy(&self->started);
2393 }
2394 
tsync_sibling(void * data)2395 void *tsync_sibling(void *data)
2396 {
2397 	long ret = 0;
2398 	struct tsync_sibling *me = data;
2399 
2400 	me->system_tid = syscall(__NR_gettid);
2401 
2402 	pthread_mutex_lock(me->mutex);
2403 	if (me->diverge) {
2404 		/* Just re-apply the root prog to fork the tree */
2405 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
2406 				me->prog, 0, 0);
2407 	}
2408 	sem_post(me->started);
2409 	/* Return outside of started so parent notices failures. */
2410 	if (ret) {
2411 		pthread_mutex_unlock(me->mutex);
2412 		return (void *)SIBLING_EXIT_FAILURE;
2413 	}
2414 	do {
2415 		pthread_cond_wait(me->cond, me->mutex);
2416 		me->num_waits = me->num_waits - 1;
2417 	} while (me->num_waits);
2418 	pthread_mutex_unlock(me->mutex);
2419 
2420 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
2421 	if (!ret)
2422 		return (void *)SIBLING_EXIT_NEWPRIVS;
2423 	read(0, NULL, 0);
2424 	return (void *)SIBLING_EXIT_UNKILLED;
2425 }
2426 
tsync_start_sibling(struct tsync_sibling * sibling)2427 void tsync_start_sibling(struct tsync_sibling *sibling)
2428 {
2429 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
2430 }
2431 
TEST_F(TSYNC,siblings_fail_prctl)2432 TEST_F(TSYNC, siblings_fail_prctl)
2433 {
2434 	long ret;
2435 	void *status;
2436 	struct sock_filter filter[] = {
2437 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2438 			offsetof(struct seccomp_data, nr)),
2439 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
2440 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
2441 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2442 	};
2443 	struct sock_fprog prog = {
2444 		.len = (unsigned short)ARRAY_SIZE(filter),
2445 		.filter = filter,
2446 	};
2447 
2448 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2449 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2450 	}
2451 
2452 	/* Check prctl failure detection by requesting sib 0 diverge. */
2453 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
2454 	ASSERT_NE(ENOSYS, errno) {
2455 		TH_LOG("Kernel does not support seccomp syscall!");
2456 	}
2457 	ASSERT_EQ(0, ret) {
2458 		TH_LOG("setting filter failed");
2459 	}
2460 
2461 	self->sibling[0].diverge = 1;
2462 	tsync_start_sibling(&self->sibling[0]);
2463 	tsync_start_sibling(&self->sibling[1]);
2464 
2465 	while (self->sibling_count < TSYNC_SIBLINGS) {
2466 		sem_wait(&self->started);
2467 		self->sibling_count++;
2468 	}
2469 
2470 	/* Signal the threads to clean up*/
2471 	pthread_mutex_lock(&self->mutex);
2472 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2473 		TH_LOG("cond broadcast non-zero");
2474 	}
2475 	pthread_mutex_unlock(&self->mutex);
2476 
2477 	/* Ensure diverging sibling failed to call prctl. */
2478 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2479 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2480 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2481 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2482 }
2483 
TEST_F(TSYNC,two_siblings_with_ancestor)2484 TEST_F(TSYNC, two_siblings_with_ancestor)
2485 {
2486 	long ret;
2487 	void *status;
2488 
2489 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2490 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2491 	}
2492 
2493 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2494 	ASSERT_NE(ENOSYS, errno) {
2495 		TH_LOG("Kernel does not support seccomp syscall!");
2496 	}
2497 	ASSERT_EQ(0, ret) {
2498 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2499 	}
2500 	tsync_start_sibling(&self->sibling[0]);
2501 	tsync_start_sibling(&self->sibling[1]);
2502 
2503 	while (self->sibling_count < TSYNC_SIBLINGS) {
2504 		sem_wait(&self->started);
2505 		self->sibling_count++;
2506 	}
2507 
2508 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2509 		      &self->apply_prog);
2510 	ASSERT_EQ(0, ret) {
2511 		TH_LOG("Could install filter on all threads!");
2512 	}
2513 	/* Tell the siblings to test the policy */
2514 	pthread_mutex_lock(&self->mutex);
2515 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2516 		TH_LOG("cond broadcast non-zero");
2517 	}
2518 	pthread_mutex_unlock(&self->mutex);
2519 	/* Ensure they are both killed and don't exit cleanly. */
2520 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2521 	EXPECT_EQ(0x0, (long)status);
2522 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2523 	EXPECT_EQ(0x0, (long)status);
2524 }
2525 
TEST_F(TSYNC,two_sibling_want_nnp)2526 TEST_F(TSYNC, two_sibling_want_nnp)
2527 {
2528 	void *status;
2529 
2530 	/* start siblings before any prctl() operations */
2531 	tsync_start_sibling(&self->sibling[0]);
2532 	tsync_start_sibling(&self->sibling[1]);
2533 	while (self->sibling_count < TSYNC_SIBLINGS) {
2534 		sem_wait(&self->started);
2535 		self->sibling_count++;
2536 	}
2537 
2538 	/* Tell the siblings to test no policy */
2539 	pthread_mutex_lock(&self->mutex);
2540 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2541 		TH_LOG("cond broadcast non-zero");
2542 	}
2543 	pthread_mutex_unlock(&self->mutex);
2544 
2545 	/* Ensure they are both upset about lacking nnp. */
2546 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2547 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2548 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2549 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2550 }
2551 
TEST_F(TSYNC,two_siblings_with_no_filter)2552 TEST_F(TSYNC, two_siblings_with_no_filter)
2553 {
2554 	long ret;
2555 	void *status;
2556 
2557 	/* start siblings before any prctl() operations */
2558 	tsync_start_sibling(&self->sibling[0]);
2559 	tsync_start_sibling(&self->sibling[1]);
2560 	while (self->sibling_count < TSYNC_SIBLINGS) {
2561 		sem_wait(&self->started);
2562 		self->sibling_count++;
2563 	}
2564 
2565 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2566 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2567 	}
2568 
2569 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2570 		      &self->apply_prog);
2571 	ASSERT_NE(ENOSYS, errno) {
2572 		TH_LOG("Kernel does not support seccomp syscall!");
2573 	}
2574 	ASSERT_EQ(0, ret) {
2575 		TH_LOG("Could install filter on all threads!");
2576 	}
2577 
2578 	/* Tell the siblings to test the policy */
2579 	pthread_mutex_lock(&self->mutex);
2580 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2581 		TH_LOG("cond broadcast non-zero");
2582 	}
2583 	pthread_mutex_unlock(&self->mutex);
2584 
2585 	/* Ensure they are both killed and don't exit cleanly. */
2586 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2587 	EXPECT_EQ(0x0, (long)status);
2588 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2589 	EXPECT_EQ(0x0, (long)status);
2590 }
2591 
TEST_F(TSYNC,two_siblings_with_one_divergence)2592 TEST_F(TSYNC, two_siblings_with_one_divergence)
2593 {
2594 	long ret;
2595 	void *status;
2596 
2597 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2598 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2599 	}
2600 
2601 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2602 	ASSERT_NE(ENOSYS, errno) {
2603 		TH_LOG("Kernel does not support seccomp syscall!");
2604 	}
2605 	ASSERT_EQ(0, ret) {
2606 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2607 	}
2608 	self->sibling[0].diverge = 1;
2609 	tsync_start_sibling(&self->sibling[0]);
2610 	tsync_start_sibling(&self->sibling[1]);
2611 
2612 	while (self->sibling_count < TSYNC_SIBLINGS) {
2613 		sem_wait(&self->started);
2614 		self->sibling_count++;
2615 	}
2616 
2617 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2618 		      &self->apply_prog);
2619 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2620 		TH_LOG("Did not fail on diverged sibling.");
2621 	}
2622 
2623 	/* Wake the threads */
2624 	pthread_mutex_lock(&self->mutex);
2625 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2626 		TH_LOG("cond broadcast non-zero");
2627 	}
2628 	pthread_mutex_unlock(&self->mutex);
2629 
2630 	/* Ensure they are both unkilled. */
2631 	PTHREAD_JOIN(self->sibling[0].tid, &status);
2632 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2633 	PTHREAD_JOIN(self->sibling[1].tid, &status);
2634 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2635 }
2636 
TEST_F(TSYNC,two_siblings_not_under_filter)2637 TEST_F(TSYNC, two_siblings_not_under_filter)
2638 {
2639 	long ret, sib;
2640 	void *status;
2641 	struct timespec delay = { .tv_nsec = 100000000 };
2642 
2643 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2644 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2645 	}
2646 
2647 	/*
2648 	 * Sibling 0 will have its own seccomp policy
2649 	 * and Sibling 1 will not be under seccomp at
2650 	 * all. Sibling 1 will enter seccomp and 0
2651 	 * will cause failure.
2652 	 */
2653 	self->sibling[0].diverge = 1;
2654 	tsync_start_sibling(&self->sibling[0]);
2655 	tsync_start_sibling(&self->sibling[1]);
2656 
2657 	while (self->sibling_count < TSYNC_SIBLINGS) {
2658 		sem_wait(&self->started);
2659 		self->sibling_count++;
2660 	}
2661 
2662 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2663 	ASSERT_NE(ENOSYS, errno) {
2664 		TH_LOG("Kernel does not support seccomp syscall!");
2665 	}
2666 	ASSERT_EQ(0, ret) {
2667 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2668 	}
2669 
2670 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2671 		      &self->apply_prog);
2672 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2673 		TH_LOG("Did not fail on diverged sibling.");
2674 	}
2675 	sib = 1;
2676 	if (ret == self->sibling[0].system_tid)
2677 		sib = 0;
2678 
2679 	pthread_mutex_lock(&self->mutex);
2680 
2681 	/* Increment the other siblings num_waits so we can clean up
2682 	 * the one we just saw.
2683 	 */
2684 	self->sibling[!sib].num_waits += 1;
2685 
2686 	/* Signal the thread to clean up*/
2687 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2688 		TH_LOG("cond broadcast non-zero");
2689 	}
2690 	pthread_mutex_unlock(&self->mutex);
2691 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2692 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2693 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2694 	while (!kill(self->sibling[sib].system_tid, 0))
2695 		nanosleep(&delay, NULL);
2696 	/* Switch to the remaining sibling */
2697 	sib = !sib;
2698 
2699 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2700 		      &self->apply_prog);
2701 	ASSERT_EQ(0, ret) {
2702 		TH_LOG("Expected the remaining sibling to sync");
2703 	};
2704 
2705 	pthread_mutex_lock(&self->mutex);
2706 
2707 	/* If remaining sibling didn't have a chance to wake up during
2708 	 * the first broadcast, manually reduce the num_waits now.
2709 	 */
2710 	if (self->sibling[sib].num_waits > 1)
2711 		self->sibling[sib].num_waits = 1;
2712 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2713 		TH_LOG("cond broadcast non-zero");
2714 	}
2715 	pthread_mutex_unlock(&self->mutex);
2716 	PTHREAD_JOIN(self->sibling[sib].tid, &status);
2717 	EXPECT_EQ(0, (long)status);
2718 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2719 	while (!kill(self->sibling[sib].system_tid, 0))
2720 		nanosleep(&delay, NULL);
2721 
2722 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2723 		      &self->apply_prog);
2724 	ASSERT_EQ(0, ret);  /* just us chickens */
2725 }
2726 
2727 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)2728 TEST(syscall_restart)
2729 {
2730 	long ret;
2731 	unsigned long msg;
2732 	pid_t child_pid;
2733 	int pipefd[2];
2734 	int status;
2735 	siginfo_t info = { };
2736 	struct sock_filter filter[] = {
2737 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2738 			 offsetof(struct seccomp_data, nr)),
2739 
2740 #ifdef __NR_sigreturn
2741 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2742 #endif
2743 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2744 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2745 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2746 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2747 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2748 
2749 		/* Allow __NR_write for easy logging. */
2750 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2751 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2752 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2753 		/* The nanosleep jump target. */
2754 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2755 		/* The restart_syscall jump target. */
2756 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2757 	};
2758 	struct sock_fprog prog = {
2759 		.len = (unsigned short)ARRAY_SIZE(filter),
2760 		.filter = filter,
2761 	};
2762 #if defined(__arm__)
2763 	struct utsname utsbuf;
2764 #endif
2765 
2766 	ASSERT_EQ(0, pipe(pipefd));
2767 
2768 	child_pid = fork();
2769 	ASSERT_LE(0, child_pid);
2770 	if (child_pid == 0) {
2771 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2772 		char buf = ' ';
2773 		struct timespec timeout = { };
2774 
2775 		/* Attach parent as tracer and stop. */
2776 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2777 		EXPECT_EQ(0, raise(SIGSTOP));
2778 
2779 		EXPECT_EQ(0, close(pipefd[1]));
2780 
2781 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2782 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2783 		}
2784 
2785 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2786 		EXPECT_EQ(0, ret) {
2787 			TH_LOG("Failed to install filter!");
2788 		}
2789 
2790 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2791 			TH_LOG("Failed to read() sync from parent");
2792 		}
2793 		EXPECT_EQ('.', buf) {
2794 			TH_LOG("Failed to get sync data from read()");
2795 		}
2796 
2797 		/* Start nanosleep to be interrupted. */
2798 		timeout.tv_sec = 1;
2799 		errno = 0;
2800 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2801 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2802 		}
2803 
2804 		/* Read final sync from parent. */
2805 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2806 			TH_LOG("Failed final read() from parent");
2807 		}
2808 		EXPECT_EQ('!', buf) {
2809 			TH_LOG("Failed to get final data from read()");
2810 		}
2811 
2812 		/* Directly report the status of our test harness results. */
2813 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2814 						     : EXIT_FAILURE);
2815 	}
2816 	EXPECT_EQ(0, close(pipefd[0]));
2817 
2818 	/* Attach to child, setup options, and release. */
2819 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2820 	ASSERT_EQ(true, WIFSTOPPED(status));
2821 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2822 			    PTRACE_O_TRACESECCOMP));
2823 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2824 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2825 
2826 	/* Wait for nanosleep() to start. */
2827 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2828 	ASSERT_EQ(true, WIFSTOPPED(status));
2829 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2830 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2831 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2832 	ASSERT_EQ(0x100, msg);
2833 	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2834 
2835 	/* Might as well check siginfo for sanity while we're here. */
2836 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2837 	ASSERT_EQ(SIGTRAP, info.si_signo);
2838 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2839 	EXPECT_EQ(0, info.si_errno);
2840 	EXPECT_EQ(getuid(), info.si_uid);
2841 	/* Verify signal delivery came from child (seccomp-triggered). */
2842 	EXPECT_EQ(child_pid, info.si_pid);
2843 
2844 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2845 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2846 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2847 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2848 	ASSERT_EQ(true, WIFSTOPPED(status));
2849 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2850 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2851 	/*
2852 	 * There is no siginfo on SIGSTOP any more, so we can't verify
2853 	 * signal delivery came from parent now (getpid() == info.si_pid).
2854 	 * https://lkml.kernel.org/r/CAGXu5jJaZAOzP1qFz66tYrtbuywqb+UN2SOA1VLHpCCOiYvYeg@mail.gmail.com
2855 	 * At least verify the SIGSTOP via PTRACE_GETSIGINFO.
2856 	 */
2857 	EXPECT_EQ(SIGSTOP, info.si_signo);
2858 
2859 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2860 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2861 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2862 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2863 	ASSERT_EQ(true, WIFSTOPPED(status));
2864 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2865 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2866 
2867 	/* Wait for restart_syscall() to start. */
2868 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2869 	ASSERT_EQ(true, WIFSTOPPED(status));
2870 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2871 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2872 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2873 
2874 	ASSERT_EQ(0x200, msg);
2875 	ret = get_syscall(_metadata, child_pid);
2876 #if defined(__arm__)
2877 	/*
2878 	 * FIXME:
2879 	 * - native ARM registers do NOT expose true syscall.
2880 	 * - compat ARM registers on ARM64 DO expose true syscall.
2881 	 */
2882 	ASSERT_EQ(0, uname(&utsbuf));
2883 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2884 		EXPECT_EQ(__NR_nanosleep, ret);
2885 	} else
2886 #endif
2887 	{
2888 		EXPECT_EQ(__NR_restart_syscall, ret);
2889 	}
2890 
2891 	/* Write again to end test. */
2892 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2893 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2894 	EXPECT_EQ(0, close(pipefd[1]));
2895 
2896 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2897 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2898 		_metadata->passed = 0;
2899 }
2900 
TEST_SIGNAL(filter_flag_log,SIGSYS)2901 TEST_SIGNAL(filter_flag_log, SIGSYS)
2902 {
2903 	struct sock_filter allow_filter[] = {
2904 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2905 	};
2906 	struct sock_filter kill_filter[] = {
2907 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2908 			offsetof(struct seccomp_data, nr)),
2909 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
2910 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2911 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2912 	};
2913 	struct sock_fprog allow_prog = {
2914 		.len = (unsigned short)ARRAY_SIZE(allow_filter),
2915 		.filter = allow_filter,
2916 	};
2917 	struct sock_fprog kill_prog = {
2918 		.len = (unsigned short)ARRAY_SIZE(kill_filter),
2919 		.filter = kill_filter,
2920 	};
2921 	long ret;
2922 	pid_t parent = getppid();
2923 
2924 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
2925 	ASSERT_EQ(0, ret);
2926 
2927 	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
2928 	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
2929 		      &allow_prog);
2930 	ASSERT_NE(ENOSYS, errno) {
2931 		TH_LOG("Kernel does not support seccomp syscall!");
2932 	}
2933 	EXPECT_NE(0, ret) {
2934 		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
2935 	}
2936 	EXPECT_EQ(EINVAL, errno) {
2937 		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
2938 	}
2939 
2940 	/* Verify that a simple, permissive filter can be added with no flags */
2941 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
2942 	EXPECT_EQ(0, ret);
2943 
2944 	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
2945 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2946 		      &allow_prog);
2947 	ASSERT_NE(EINVAL, errno) {
2948 		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
2949 	}
2950 	EXPECT_EQ(0, ret);
2951 
2952 	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
2953 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
2954 		      &kill_prog);
2955 	EXPECT_EQ(0, ret);
2956 
2957 	EXPECT_EQ(parent, syscall(__NR_getppid));
2958 	/* getpid() should never return. */
2959 	EXPECT_EQ(0, syscall(__NR_getpid));
2960 }
2961 
TEST(get_action_avail)2962 TEST(get_action_avail)
2963 {
2964 	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
2965 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
2966 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
2967 	__u32 unknown_action = 0x10000000U;
2968 	int i;
2969 	long ret;
2970 
2971 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
2972 	ASSERT_NE(ENOSYS, errno) {
2973 		TH_LOG("Kernel does not support seccomp syscall!");
2974 	}
2975 	ASSERT_NE(EINVAL, errno) {
2976 		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
2977 	}
2978 	EXPECT_EQ(ret, 0);
2979 
2980 	for (i = 0; i < ARRAY_SIZE(actions); i++) {
2981 		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
2982 		EXPECT_EQ(ret, 0) {
2983 			TH_LOG("Expected action (0x%X) not available!",
2984 			       actions[i]);
2985 		}
2986 	}
2987 
2988 	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
2989 	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
2990 	EXPECT_EQ(ret, -1);
2991 	EXPECT_EQ(errno, EOPNOTSUPP);
2992 }
2993 
TEST(get_metadata)2994 TEST(get_metadata)
2995 {
2996 	pid_t pid;
2997 	int pipefd[2];
2998 	char buf;
2999 	struct seccomp_metadata md;
3000 	long ret;
3001 
3002 	/* Only real root can get metadata. */
3003 	if (geteuid()) {
3004 		XFAIL(return, "get_metadata requires real root");
3005 		return;
3006 	}
3007 
3008 	ASSERT_EQ(0, pipe(pipefd));
3009 
3010 	pid = fork();
3011 	ASSERT_GE(pid, 0);
3012 	if (pid == 0) {
3013 		struct sock_filter filter[] = {
3014 			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3015 		};
3016 		struct sock_fprog prog = {
3017 			.len = (unsigned short)ARRAY_SIZE(filter),
3018 			.filter = filter,
3019 		};
3020 
3021 		/* one with log, one without */
3022 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
3023 				     SECCOMP_FILTER_FLAG_LOG, &prog));
3024 		EXPECT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
3025 
3026 		EXPECT_EQ(0, close(pipefd[0]));
3027 		ASSERT_EQ(1, write(pipefd[1], "1", 1));
3028 		ASSERT_EQ(0, close(pipefd[1]));
3029 
3030 		while (1)
3031 			sleep(100);
3032 	}
3033 
3034 	ASSERT_EQ(0, close(pipefd[1]));
3035 	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
3036 
3037 	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
3038 	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
3039 
3040 	/* Past here must not use ASSERT or child process is never killed. */
3041 
3042 	md.filter_off = 0;
3043 	errno = 0;
3044 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3045 	EXPECT_EQ(sizeof(md), ret) {
3046 		if (errno == EINVAL)
3047 			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
3048 	}
3049 
3050 	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
3051 	EXPECT_EQ(md.filter_off, 0);
3052 
3053 	md.filter_off = 1;
3054 	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
3055 	EXPECT_EQ(sizeof(md), ret);
3056 	EXPECT_EQ(md.flags, 0);
3057 	EXPECT_EQ(md.filter_off, 1);
3058 
3059 skip:
3060 	ASSERT_EQ(0, kill(pid, SIGKILL));
3061 }
3062 
user_trap_syscall(int nr,unsigned int flags)3063 static int user_trap_syscall(int nr, unsigned int flags)
3064 {
3065 	struct sock_filter filter[] = {
3066 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
3067 			offsetof(struct seccomp_data, nr)),
3068 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
3069 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
3070 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
3071 	};
3072 
3073 	struct sock_fprog prog = {
3074 		.len = (unsigned short)ARRAY_SIZE(filter),
3075 		.filter = filter,
3076 	};
3077 
3078 	return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
3079 }
3080 
3081 #define USER_NOTIF_MAGIC INT_MAX
TEST(user_notification_basic)3082 TEST(user_notification_basic)
3083 {
3084 	pid_t pid;
3085 	long ret;
3086 	int status, listener;
3087 	struct seccomp_notif req = {};
3088 	struct seccomp_notif_resp resp = {};
3089 	struct pollfd pollfd;
3090 
3091 	struct sock_filter filter[] = {
3092 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
3093 	};
3094 	struct sock_fprog prog = {
3095 		.len = (unsigned short)ARRAY_SIZE(filter),
3096 		.filter = filter,
3097 	};
3098 
3099 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3100 	ASSERT_EQ(0, ret) {
3101 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3102 	}
3103 
3104 	pid = fork();
3105 	ASSERT_GE(pid, 0);
3106 
3107 	/* Check that we get -ENOSYS with no listener attached */
3108 	if (pid == 0) {
3109 		if (user_trap_syscall(__NR_getppid, 0) < 0)
3110 			exit(1);
3111 		ret = syscall(__NR_getppid);
3112 		exit(ret >= 0 || errno != ENOSYS);
3113 	}
3114 
3115 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3116 	EXPECT_EQ(true, WIFEXITED(status));
3117 	EXPECT_EQ(0, WEXITSTATUS(status));
3118 
3119 	/* Add some no-op filters for grins. */
3120 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3121 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3122 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3123 	EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
3124 
3125 	/* Check that the basic notification machinery works */
3126 	listener = user_trap_syscall(__NR_getppid,
3127 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3128 	ASSERT_GE(listener, 0);
3129 
3130 	/* Installing a second listener in the chain should EBUSY */
3131 	EXPECT_EQ(user_trap_syscall(__NR_getppid,
3132 				    SECCOMP_FILTER_FLAG_NEW_LISTENER),
3133 		  -1);
3134 	EXPECT_EQ(errno, EBUSY);
3135 
3136 	pid = fork();
3137 	ASSERT_GE(pid, 0);
3138 
3139 	if (pid == 0) {
3140 		ret = syscall(__NR_getppid);
3141 		exit(ret != USER_NOTIF_MAGIC);
3142 	}
3143 
3144 	pollfd.fd = listener;
3145 	pollfd.events = POLLIN | POLLOUT;
3146 
3147 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3148 	EXPECT_EQ(pollfd.revents, POLLIN);
3149 
3150 	/* Test that we can't pass garbage to the kernel. */
3151 	memset(&req, 0, sizeof(req));
3152 	req.pid = -1;
3153 	errno = 0;
3154 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
3155 	EXPECT_EQ(-1, ret);
3156 	EXPECT_EQ(EINVAL, errno);
3157 
3158 	if (ret) {
3159 		req.pid = 0;
3160 		EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3161 	}
3162 
3163 	pollfd.fd = listener;
3164 	pollfd.events = POLLIN | POLLOUT;
3165 
3166 	EXPECT_GT(poll(&pollfd, 1, -1), 0);
3167 	EXPECT_EQ(pollfd.revents, POLLOUT);
3168 
3169 	EXPECT_EQ(req.data.nr,  __NR_getppid);
3170 
3171 	resp.id = req.id;
3172 	resp.error = 0;
3173 	resp.val = USER_NOTIF_MAGIC;
3174 
3175 	/* check that we make sure flags == 0 */
3176 	resp.flags = 1;
3177 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3178 	EXPECT_EQ(errno, EINVAL);
3179 
3180 	resp.flags = 0;
3181 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3182 
3183 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3184 	EXPECT_EQ(true, WIFEXITED(status));
3185 	EXPECT_EQ(0, WEXITSTATUS(status));
3186 }
3187 
TEST(user_notification_kill_in_middle)3188 TEST(user_notification_kill_in_middle)
3189 {
3190 	pid_t pid;
3191 	long ret;
3192 	int listener;
3193 	struct seccomp_notif req = {};
3194 	struct seccomp_notif_resp resp = {};
3195 
3196 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3197 	ASSERT_EQ(0, ret) {
3198 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3199 	}
3200 
3201 	listener = user_trap_syscall(__NR_getppid,
3202 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3203 	ASSERT_GE(listener, 0);
3204 
3205 	/*
3206 	 * Check that nothing bad happens when we kill the task in the middle
3207 	 * of a syscall.
3208 	 */
3209 	pid = fork();
3210 	ASSERT_GE(pid, 0);
3211 
3212 	if (pid == 0) {
3213 		ret = syscall(__NR_getppid);
3214 		exit(ret != USER_NOTIF_MAGIC);
3215 	}
3216 
3217 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3218 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), 0);
3219 
3220 	EXPECT_EQ(kill(pid, SIGKILL), 0);
3221 	EXPECT_EQ(waitpid(pid, NULL, 0), pid);
3222 
3223 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ID_VALID, &req.id), -1);
3224 
3225 	resp.id = req.id;
3226 	ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp);
3227 	EXPECT_EQ(ret, -1);
3228 	EXPECT_EQ(errno, ENOENT);
3229 }
3230 
3231 static int handled = -1;
3232 
signal_handler(int signal)3233 static void signal_handler(int signal)
3234 {
3235 	if (write(handled, "c", 1) != 1)
3236 		perror("write from signal");
3237 }
3238 
TEST(user_notification_signal)3239 TEST(user_notification_signal)
3240 {
3241 	pid_t pid;
3242 	long ret;
3243 	int status, listener, sk_pair[2];
3244 	struct seccomp_notif req = {};
3245 	struct seccomp_notif_resp resp = {};
3246 	char c;
3247 
3248 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3249 	ASSERT_EQ(0, ret) {
3250 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3251 	}
3252 
3253 	ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
3254 
3255 	listener = user_trap_syscall(__NR_gettid,
3256 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3257 	ASSERT_GE(listener, 0);
3258 
3259 	pid = fork();
3260 	ASSERT_GE(pid, 0);
3261 
3262 	if (pid == 0) {
3263 		close(sk_pair[0]);
3264 		handled = sk_pair[1];
3265 		if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
3266 			perror("signal");
3267 			exit(1);
3268 		}
3269 		/*
3270 		 * ERESTARTSYS behavior is a bit hard to test, because we need
3271 		 * to rely on a signal that has not yet been handled. Let's at
3272 		 * least check that the error code gets propagated through, and
3273 		 * hope that it doesn't break when there is actually a signal :)
3274 		 */
3275 		ret = syscall(__NR_gettid);
3276 		exit(!(ret == -1 && errno == 512));
3277 	}
3278 
3279 	close(sk_pair[1]);
3280 
3281 	memset(&req, 0, sizeof(req));
3282 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3283 
3284 	EXPECT_EQ(kill(pid, SIGUSR1), 0);
3285 
3286 	/*
3287 	 * Make sure the signal really is delivered, which means we're not
3288 	 * stuck in the user notification code any more and the notification
3289 	 * should be dead.
3290 	 */
3291 	EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
3292 
3293 	resp.id = req.id;
3294 	resp.error = -EPERM;
3295 	resp.val = 0;
3296 
3297 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
3298 	EXPECT_EQ(errno, ENOENT);
3299 
3300 	memset(&req, 0, sizeof(req));
3301 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3302 
3303 	resp.id = req.id;
3304 	resp.error = -512; /* -ERESTARTSYS */
3305 	resp.val = 0;
3306 
3307 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3308 
3309 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3310 	EXPECT_EQ(true, WIFEXITED(status));
3311 	EXPECT_EQ(0, WEXITSTATUS(status));
3312 }
3313 
TEST(user_notification_closed_listener)3314 TEST(user_notification_closed_listener)
3315 {
3316 	pid_t pid;
3317 	long ret;
3318 	int status, listener;
3319 
3320 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
3321 	ASSERT_EQ(0, ret) {
3322 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3323 	}
3324 
3325 	listener = user_trap_syscall(__NR_getppid,
3326 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3327 	ASSERT_GE(listener, 0);
3328 
3329 	/*
3330 	 * Check that we get an ENOSYS when the listener is closed.
3331 	 */
3332 	pid = fork();
3333 	ASSERT_GE(pid, 0);
3334 	if (pid == 0) {
3335 		close(listener);
3336 		ret = syscall(__NR_getppid);
3337 		exit(ret != -1 && errno != ENOSYS);
3338 	}
3339 
3340 	close(listener);
3341 
3342 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3343 	EXPECT_EQ(true, WIFEXITED(status));
3344 	EXPECT_EQ(0, WEXITSTATUS(status));
3345 }
3346 
3347 /*
3348  * Check that a pid in a child namespace still shows up as valid in ours.
3349  */
TEST(user_notification_child_pid_ns)3350 TEST(user_notification_child_pid_ns)
3351 {
3352 	pid_t pid;
3353 	int status, listener;
3354 	struct seccomp_notif req = {};
3355 	struct seccomp_notif_resp resp = {};
3356 
3357 	ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
3358 
3359 	listener = user_trap_syscall(__NR_getppid,
3360 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3361 	ASSERT_GE(listener, 0);
3362 
3363 	pid = fork();
3364 	ASSERT_GE(pid, 0);
3365 
3366 	if (pid == 0)
3367 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3368 
3369 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3370 	EXPECT_EQ(req.pid, pid);
3371 
3372 	resp.id = req.id;
3373 	resp.error = 0;
3374 	resp.val = USER_NOTIF_MAGIC;
3375 
3376 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3377 
3378 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3379 	EXPECT_EQ(true, WIFEXITED(status));
3380 	EXPECT_EQ(0, WEXITSTATUS(status));
3381 	close(listener);
3382 }
3383 
3384 /*
3385  * Check that a pid in a sibling (i.e. unrelated) namespace shows up as 0, i.e.
3386  * invalid.
3387  */
TEST(user_notification_sibling_pid_ns)3388 TEST(user_notification_sibling_pid_ns)
3389 {
3390 	pid_t pid, pid2;
3391 	int status, listener;
3392 	struct seccomp_notif req = {};
3393 	struct seccomp_notif_resp resp = {};
3394 
3395 	ASSERT_EQ(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0), 0) {
3396 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
3397 	}
3398 
3399 	listener = user_trap_syscall(__NR_getppid,
3400 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3401 	ASSERT_GE(listener, 0);
3402 
3403 	pid = fork();
3404 	ASSERT_GE(pid, 0);
3405 
3406 	if (pid == 0) {
3407 		ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3408 
3409 		pid2 = fork();
3410 		ASSERT_GE(pid2, 0);
3411 
3412 		if (pid2 == 0)
3413 			exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3414 
3415 		EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3416 		EXPECT_EQ(true, WIFEXITED(status));
3417 		EXPECT_EQ(0, WEXITSTATUS(status));
3418 		exit(WEXITSTATUS(status));
3419 	}
3420 
3421 	/* Create the sibling ns, and sibling in it. */
3422 	ASSERT_EQ(unshare(CLONE_NEWPID), 0);
3423 	ASSERT_EQ(errno, 0);
3424 
3425 	pid2 = fork();
3426 	ASSERT_GE(pid2, 0);
3427 
3428 	if (pid2 == 0) {
3429 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3430 		/*
3431 		 * The pid should be 0, i.e. the task is in some namespace that
3432 		 * we can't "see".
3433 		 */
3434 		EXPECT_EQ(req.pid, 0);
3435 
3436 		resp.id = req.id;
3437 		resp.error = 0;
3438 		resp.val = USER_NOTIF_MAGIC;
3439 
3440 		ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3441 		exit(0);
3442 	}
3443 
3444 	close(listener);
3445 
3446 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3447 	EXPECT_EQ(true, WIFEXITED(status));
3448 	EXPECT_EQ(0, WEXITSTATUS(status));
3449 
3450 	EXPECT_EQ(waitpid(pid2, &status, 0), pid2);
3451 	EXPECT_EQ(true, WIFEXITED(status));
3452 	EXPECT_EQ(0, WEXITSTATUS(status));
3453 }
3454 
TEST(user_notification_fault_recv)3455 TEST(user_notification_fault_recv)
3456 {
3457 	pid_t pid;
3458 	int status, listener;
3459 	struct seccomp_notif req = {};
3460 	struct seccomp_notif_resp resp = {};
3461 
3462 	ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
3463 
3464 	listener = user_trap_syscall(__NR_getppid,
3465 				     SECCOMP_FILTER_FLAG_NEW_LISTENER);
3466 	ASSERT_GE(listener, 0);
3467 
3468 	pid = fork();
3469 	ASSERT_GE(pid, 0);
3470 
3471 	if (pid == 0)
3472 		exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
3473 
3474 	/* Do a bad recv() */
3475 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, NULL), -1);
3476 	EXPECT_EQ(errno, EFAULT);
3477 
3478 	/* We should still be able to receive this notification, though. */
3479 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
3480 	EXPECT_EQ(req.pid, pid);
3481 
3482 	resp.id = req.id;
3483 	resp.error = 0;
3484 	resp.val = USER_NOTIF_MAGIC;
3485 
3486 	EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
3487 
3488 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
3489 	EXPECT_EQ(true, WIFEXITED(status));
3490 	EXPECT_EQ(0, WEXITSTATUS(status));
3491 }
3492 
TEST(seccomp_get_notif_sizes)3493 TEST(seccomp_get_notif_sizes)
3494 {
3495 	struct seccomp_notif_sizes sizes;
3496 
3497 	ASSERT_EQ(seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes), 0);
3498 	EXPECT_EQ(sizes.seccomp_notif, sizeof(struct seccomp_notif));
3499 	EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
3500 }
3501 
3502 /*
3503  * TODO:
3504  * - add microbenchmarks
3505  * - expand NNP testing
3506  * - better arch-specific TRACE and TRAP handlers.
3507  * - endianness checking when appropriate
3508  * - 64-bit arg prodding
3509  * - arch value testing (x86 modes especially)
3510  * - verify that FILTER_FLAG_LOG filters generate log messages
3511  * - verify that RET_LOG generates log messages
3512  * - ...
3513  */
3514 
3515 TEST_HARNESS_MAIN
3516