• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3  * Use of this source code is governed by the GPLv2 license.
4  *
5  * Test code for seccomp bpf.
6  */
7 
8 #include <sys/types.h>
9 
10 /*
11  * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12  * we need to use the kernel's siginfo.h file and trick glibc
13  * into accepting it.
14  */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21 
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43 
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47 
48 #include "test_harness.h"
49 
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53 
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58 
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62 
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66 
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70 
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74 
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78 
79 #ifndef SECCOMP_RET_KILL
80 #define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
81 #define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
82 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
83 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
84 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
85 
86 /* Masks for the return value sections. */
87 #define SECCOMP_RET_ACTION      0x7fff0000U
88 #define SECCOMP_RET_DATA        0x0000ffffU
89 
90 struct seccomp_data {
91 	int nr;
92 	__u32 arch;
93 	__u64 instruction_pointer;
94 	__u64 args[6];
95 };
96 #endif
97 
98 #if __BYTE_ORDER == __LITTLE_ENDIAN
99 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
100 #elif __BYTE_ORDER == __BIG_ENDIAN
101 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
102 #else
103 #error "wut? Unknown __BYTE_ORDER?!"
104 #endif
105 
106 #define SIBLING_EXIT_UNKILLED	0xbadbeef
107 #define SIBLING_EXIT_FAILURE	0xbadface
108 #define SIBLING_EXIT_NEWPRIVS	0xbadfeed
109 
TEST(mode_strict_support)110 TEST(mode_strict_support)
111 {
112 	long ret;
113 
114 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
115 	ASSERT_EQ(0, ret) {
116 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
117 	}
118 	syscall(__NR_exit, 1);
119 }
120 
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)121 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
122 {
123 	long ret;
124 
125 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
126 	ASSERT_EQ(0, ret) {
127 		TH_LOG("Kernel does not support CONFIG_SECCOMP");
128 	}
129 	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
130 		NULL, NULL, NULL);
131 	EXPECT_FALSE(true) {
132 		TH_LOG("Unreachable!");
133 	}
134 }
135 
136 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)137 TEST(no_new_privs_support)
138 {
139 	long ret;
140 
141 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
142 	EXPECT_EQ(0, ret) {
143 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
144 	}
145 }
146 
147 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
TEST(mode_filter_support)148 TEST(mode_filter_support)
149 {
150 	long ret;
151 
152 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
153 	ASSERT_EQ(0, ret) {
154 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
155 	}
156 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
157 	EXPECT_EQ(-1, ret);
158 	EXPECT_EQ(EFAULT, errno) {
159 		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
160 	}
161 }
162 
TEST(mode_filter_without_nnp)163 TEST(mode_filter_without_nnp)
164 {
165 	struct sock_filter filter[] = {
166 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
167 	};
168 	struct sock_fprog prog = {
169 		.len = (unsigned short)ARRAY_SIZE(filter),
170 		.filter = filter,
171 	};
172 	long ret;
173 
174 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
175 	ASSERT_LE(0, ret) {
176 		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
177 	}
178 	errno = 0;
179 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
180 	/* Succeeds with CAP_SYS_ADMIN, fails without */
181 	/* TODO(wad) check caps not euid */
182 	if (geteuid()) {
183 		EXPECT_EQ(-1, ret);
184 		EXPECT_EQ(EACCES, errno);
185 	} else {
186 		EXPECT_EQ(0, ret);
187 	}
188 }
189 
190 #define MAX_INSNS_PER_PATH 32768
191 
TEST(filter_size_limits)192 TEST(filter_size_limits)
193 {
194 	int i;
195 	int count = BPF_MAXINSNS + 1;
196 	struct sock_filter allow[] = {
197 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
198 	};
199 	struct sock_filter *filter;
200 	struct sock_fprog prog = { };
201 	long ret;
202 
203 	filter = calloc(count, sizeof(*filter));
204 	ASSERT_NE(NULL, filter);
205 
206 	for (i = 0; i < count; i++)
207 		filter[i] = allow[0];
208 
209 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
210 	ASSERT_EQ(0, ret);
211 
212 	prog.filter = filter;
213 	prog.len = count;
214 
215 	/* Too many filter instructions in a single filter. */
216 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
217 	ASSERT_NE(0, ret) {
218 		TH_LOG("Installing %d insn filter was allowed", prog.len);
219 	}
220 
221 	/* One less is okay, though. */
222 	prog.len -= 1;
223 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
224 	ASSERT_EQ(0, ret) {
225 		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
226 	}
227 }
228 
TEST(filter_chain_limits)229 TEST(filter_chain_limits)
230 {
231 	int i;
232 	int count = BPF_MAXINSNS;
233 	struct sock_filter allow[] = {
234 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
235 	};
236 	struct sock_filter *filter;
237 	struct sock_fprog prog = { };
238 	long ret;
239 
240 	filter = calloc(count, sizeof(*filter));
241 	ASSERT_NE(NULL, filter);
242 
243 	for (i = 0; i < count; i++)
244 		filter[i] = allow[0];
245 
246 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
247 	ASSERT_EQ(0, ret);
248 
249 	prog.filter = filter;
250 	prog.len = 1;
251 
252 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
253 	ASSERT_EQ(0, ret);
254 
255 	prog.len = count;
256 
257 	/* Too many total filter instructions. */
258 	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
259 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
260 		if (ret != 0)
261 			break;
262 	}
263 	ASSERT_NE(0, ret) {
264 		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
265 		       i, count, i * (count + 4));
266 	}
267 }
268 
TEST(mode_filter_cannot_move_to_strict)269 TEST(mode_filter_cannot_move_to_strict)
270 {
271 	struct sock_filter filter[] = {
272 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
273 	};
274 	struct sock_fprog prog = {
275 		.len = (unsigned short)ARRAY_SIZE(filter),
276 		.filter = filter,
277 	};
278 	long ret;
279 
280 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
281 	ASSERT_EQ(0, ret);
282 
283 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
284 	ASSERT_EQ(0, ret);
285 
286 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
287 	EXPECT_EQ(-1, ret);
288 	EXPECT_EQ(EINVAL, errno);
289 }
290 
291 
TEST(mode_filter_get_seccomp)292 TEST(mode_filter_get_seccomp)
293 {
294 	struct sock_filter filter[] = {
295 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296 	};
297 	struct sock_fprog prog = {
298 		.len = (unsigned short)ARRAY_SIZE(filter),
299 		.filter = filter,
300 	};
301 	long ret;
302 
303 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
304 	ASSERT_EQ(0, ret);
305 
306 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
307 	EXPECT_EQ(0, ret);
308 
309 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
310 	ASSERT_EQ(0, ret);
311 
312 	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
313 	EXPECT_EQ(2, ret);
314 }
315 
316 
TEST(ALLOW_all)317 TEST(ALLOW_all)
318 {
319 	struct sock_filter filter[] = {
320 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
321 	};
322 	struct sock_fprog prog = {
323 		.len = (unsigned short)ARRAY_SIZE(filter),
324 		.filter = filter,
325 	};
326 	long ret;
327 
328 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
329 	ASSERT_EQ(0, ret);
330 
331 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
332 	ASSERT_EQ(0, ret);
333 }
334 
TEST(empty_prog)335 TEST(empty_prog)
336 {
337 	struct sock_filter filter[] = {
338 	};
339 	struct sock_fprog prog = {
340 		.len = (unsigned short)ARRAY_SIZE(filter),
341 		.filter = filter,
342 	};
343 	long ret;
344 
345 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346 	ASSERT_EQ(0, ret);
347 
348 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349 	EXPECT_EQ(-1, ret);
350 	EXPECT_EQ(EINVAL, errno);
351 }
352 
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)353 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
354 {
355 	struct sock_filter filter[] = {
356 		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
357 	};
358 	struct sock_fprog prog = {
359 		.len = (unsigned short)ARRAY_SIZE(filter),
360 		.filter = filter,
361 	};
362 	long ret;
363 
364 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
365 	ASSERT_EQ(0, ret);
366 
367 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
368 	ASSERT_EQ(0, ret);
369 	EXPECT_EQ(0, syscall(__NR_getpid)) {
370 		TH_LOG("getpid() shouldn't ever return");
371 	}
372 }
373 
374 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)375 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
376 {
377 	struct sock_filter filter[] = {
378 		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
379 	};
380 	struct sock_fprog prog = {
381 		.len = (unsigned short)ARRAY_SIZE(filter),
382 		.filter = filter,
383 	};
384 	long ret;
385 
386 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
387 	ASSERT_EQ(0, ret);
388 
389 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
390 	ASSERT_EQ(0, ret);
391 	EXPECT_EQ(0, syscall(__NR_getpid)) {
392 		TH_LOG("getpid() shouldn't ever return");
393 	}
394 }
395 
TEST_SIGNAL(KILL_all,SIGSYS)396 TEST_SIGNAL(KILL_all, SIGSYS)
397 {
398 	struct sock_filter filter[] = {
399 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
400 	};
401 	struct sock_fprog prog = {
402 		.len = (unsigned short)ARRAY_SIZE(filter),
403 		.filter = filter,
404 	};
405 	long ret;
406 
407 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
408 	ASSERT_EQ(0, ret);
409 
410 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
411 	ASSERT_EQ(0, ret);
412 }
413 
TEST_SIGNAL(KILL_one,SIGSYS)414 TEST_SIGNAL(KILL_one, SIGSYS)
415 {
416 	struct sock_filter filter[] = {
417 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
418 			offsetof(struct seccomp_data, nr)),
419 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
420 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
421 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
422 	};
423 	struct sock_fprog prog = {
424 		.len = (unsigned short)ARRAY_SIZE(filter),
425 		.filter = filter,
426 	};
427 	long ret;
428 	pid_t parent = getppid();
429 
430 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
431 	ASSERT_EQ(0, ret);
432 
433 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
434 	ASSERT_EQ(0, ret);
435 
436 	EXPECT_EQ(parent, syscall(__NR_getppid));
437 	/* getpid() should never return. */
438 	EXPECT_EQ(0, syscall(__NR_getpid));
439 }
440 
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)441 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
442 {
443 	void *fatal_address;
444 	struct sock_filter filter[] = {
445 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
446 			offsetof(struct seccomp_data, nr)),
447 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
448 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
449 		/* Only both with lower 32-bit for now. */
450 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
451 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
452 			(unsigned long)&fatal_address, 0, 1),
453 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
454 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
455 	};
456 	struct sock_fprog prog = {
457 		.len = (unsigned short)ARRAY_SIZE(filter),
458 		.filter = filter,
459 	};
460 	long ret;
461 	pid_t parent = getppid();
462 	struct tms timebuf;
463 	clock_t clock = times(&timebuf);
464 
465 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
466 	ASSERT_EQ(0, ret);
467 
468 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
469 	ASSERT_EQ(0, ret);
470 
471 	EXPECT_EQ(parent, syscall(__NR_getppid));
472 	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
473 	/* times() should never return. */
474 	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
475 }
476 
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)477 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
478 {
479 #ifndef __NR_mmap2
480 	int sysno = __NR_mmap;
481 #else
482 	int sysno = __NR_mmap2;
483 #endif
484 	struct sock_filter filter[] = {
485 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
486 			offsetof(struct seccomp_data, nr)),
487 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
488 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
489 		/* Only both with lower 32-bit for now. */
490 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
491 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
492 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
493 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494 	};
495 	struct sock_fprog prog = {
496 		.len = (unsigned short)ARRAY_SIZE(filter),
497 		.filter = filter,
498 	};
499 	long ret;
500 	pid_t parent = getppid();
501 	int fd;
502 	void *map1, *map2;
503 	int page_size = sysconf(_SC_PAGESIZE);
504 
505 	ASSERT_LT(0, page_size);
506 
507 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
508 	ASSERT_EQ(0, ret);
509 
510 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
511 	ASSERT_EQ(0, ret);
512 
513 	fd = open("/dev/zero", O_RDONLY);
514 	ASSERT_NE(-1, fd);
515 
516 	EXPECT_EQ(parent, syscall(__NR_getppid));
517 	map1 = (void *)syscall(sysno,
518 		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
519 	EXPECT_NE(MAP_FAILED, map1);
520 	/* mmap2() should never return. */
521 	map2 = (void *)syscall(sysno,
522 		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
523 	EXPECT_EQ(MAP_FAILED, map2);
524 
525 	/* The test failed, so clean up the resources. */
526 	munmap(map1, page_size);
527 	munmap(map2, page_size);
528 	close(fd);
529 }
530 
531 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)532 TEST(arg_out_of_range)
533 {
534 	struct sock_filter filter[] = {
535 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
536 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
537 	};
538 	struct sock_fprog prog = {
539 		.len = (unsigned short)ARRAY_SIZE(filter),
540 		.filter = filter,
541 	};
542 	long ret;
543 
544 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
545 	ASSERT_EQ(0, ret);
546 
547 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
548 	EXPECT_EQ(-1, ret);
549 	EXPECT_EQ(EINVAL, errno);
550 }
551 
TEST(ERRNO_valid)552 TEST(ERRNO_valid)
553 {
554 	struct sock_filter filter[] = {
555 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
556 			offsetof(struct seccomp_data, nr)),
557 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
558 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
559 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
560 	};
561 	struct sock_fprog prog = {
562 		.len = (unsigned short)ARRAY_SIZE(filter),
563 		.filter = filter,
564 	};
565 	long ret;
566 	pid_t parent = getppid();
567 
568 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
569 	ASSERT_EQ(0, ret);
570 
571 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
572 	ASSERT_EQ(0, ret);
573 
574 	EXPECT_EQ(parent, syscall(__NR_getppid));
575 	EXPECT_EQ(-1, read(0, NULL, 0));
576 	EXPECT_EQ(E2BIG, errno);
577 }
578 
TEST(ERRNO_zero)579 TEST(ERRNO_zero)
580 {
581 	struct sock_filter filter[] = {
582 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
583 			offsetof(struct seccomp_data, nr)),
584 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
585 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
586 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
587 	};
588 	struct sock_fprog prog = {
589 		.len = (unsigned short)ARRAY_SIZE(filter),
590 		.filter = filter,
591 	};
592 	long ret;
593 	pid_t parent = getppid();
594 
595 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
596 	ASSERT_EQ(0, ret);
597 
598 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
599 	ASSERT_EQ(0, ret);
600 
601 	EXPECT_EQ(parent, syscall(__NR_getppid));
602 	/* "errno" of 0 is ok. */
603 	EXPECT_EQ(0, read(0, NULL, 0));
604 }
605 
TEST(ERRNO_capped)606 TEST(ERRNO_capped)
607 {
608 	struct sock_filter filter[] = {
609 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
610 			offsetof(struct seccomp_data, nr)),
611 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
612 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
613 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
614 	};
615 	struct sock_fprog prog = {
616 		.len = (unsigned short)ARRAY_SIZE(filter),
617 		.filter = filter,
618 	};
619 	long ret;
620 	pid_t parent = getppid();
621 
622 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
623 	ASSERT_EQ(0, ret);
624 
625 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
626 	ASSERT_EQ(0, ret);
627 
628 	EXPECT_EQ(parent, syscall(__NR_getppid));
629 	EXPECT_EQ(-1, read(0, NULL, 0));
630 	EXPECT_EQ(4095, errno);
631 }
632 
FIXTURE_DATA(TRAP)633 FIXTURE_DATA(TRAP) {
634 	struct sock_fprog prog;
635 };
636 
FIXTURE_SETUP(TRAP)637 FIXTURE_SETUP(TRAP)
638 {
639 	struct sock_filter filter[] = {
640 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
641 			offsetof(struct seccomp_data, nr)),
642 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
643 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
644 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645 	};
646 
647 	memset(&self->prog, 0, sizeof(self->prog));
648 	self->prog.filter = malloc(sizeof(filter));
649 	ASSERT_NE(NULL, self->prog.filter);
650 	memcpy(self->prog.filter, filter, sizeof(filter));
651 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
652 }
653 
FIXTURE_TEARDOWN(TRAP)654 FIXTURE_TEARDOWN(TRAP)
655 {
656 	if (self->prog.filter)
657 		free(self->prog.filter);
658 }
659 
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)660 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
661 {
662 	long ret;
663 
664 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
665 	ASSERT_EQ(0, ret);
666 
667 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
668 	ASSERT_EQ(0, ret);
669 	syscall(__NR_getpid);
670 }
671 
672 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)673 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
674 {
675 	long ret;
676 
677 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
678 	ASSERT_EQ(0, ret);
679 
680 	signal(SIGSYS, SIG_IGN);
681 
682 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683 	ASSERT_EQ(0, ret);
684 	syscall(__NR_getpid);
685 }
686 
687 static siginfo_t TRAP_info;
688 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)689 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
690 {
691 	memcpy(&TRAP_info, info, sizeof(TRAP_info));
692 	TRAP_nr = nr;
693 }
694 
TEST_F(TRAP,handler)695 TEST_F(TRAP, handler)
696 {
697 	int ret, test;
698 	struct sigaction act;
699 	sigset_t mask;
700 
701 	memset(&act, 0, sizeof(act));
702 	sigemptyset(&mask);
703 	sigaddset(&mask, SIGSYS);
704 
705 	act.sa_sigaction = &TRAP_action;
706 	act.sa_flags = SA_SIGINFO;
707 	ret = sigaction(SIGSYS, &act, NULL);
708 	ASSERT_EQ(0, ret) {
709 		TH_LOG("sigaction failed");
710 	}
711 	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
712 	ASSERT_EQ(0, ret) {
713 		TH_LOG("sigprocmask failed");
714 	}
715 
716 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
717 	ASSERT_EQ(0, ret);
718 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
719 	ASSERT_EQ(0, ret);
720 	TRAP_nr = 0;
721 	memset(&TRAP_info, 0, sizeof(TRAP_info));
722 	/* Expect the registers to be rolled back. (nr = error) may vary
723 	 * based on arch. */
724 	ret = syscall(__NR_getpid);
725 	/* Silence gcc warning about volatile. */
726 	test = TRAP_nr;
727 	EXPECT_EQ(SIGSYS, test);
728 	struct local_sigsys {
729 		void *_call_addr;	/* calling user insn */
730 		int _syscall;		/* triggering system call number */
731 		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
732 	} *sigsys = (struct local_sigsys *)
733 #ifdef si_syscall
734 		&(TRAP_info.si_call_addr);
735 #else
736 		&TRAP_info.si_pid;
737 #endif
738 	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
739 	/* Make sure arch is non-zero. */
740 	EXPECT_NE(0, sigsys->_arch);
741 	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
742 }
743 
FIXTURE_DATA(precedence)744 FIXTURE_DATA(precedence) {
745 	struct sock_fprog allow;
746 	struct sock_fprog trace;
747 	struct sock_fprog error;
748 	struct sock_fprog trap;
749 	struct sock_fprog kill;
750 };
751 
FIXTURE_SETUP(precedence)752 FIXTURE_SETUP(precedence)
753 {
754 	struct sock_filter allow_insns[] = {
755 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
756 	};
757 	struct sock_filter trace_insns[] = {
758 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
759 			offsetof(struct seccomp_data, nr)),
760 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
761 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
762 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
763 	};
764 	struct sock_filter error_insns[] = {
765 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
766 			offsetof(struct seccomp_data, nr)),
767 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
768 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
769 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
770 	};
771 	struct sock_filter trap_insns[] = {
772 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
773 			offsetof(struct seccomp_data, nr)),
774 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
775 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
776 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
777 	};
778 	struct sock_filter kill_insns[] = {
779 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
780 			offsetof(struct seccomp_data, nr)),
781 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
782 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
783 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
784 	};
785 
786 	memset(self, 0, sizeof(*self));
787 #define FILTER_ALLOC(_x) \
788 	self->_x.filter = malloc(sizeof(_x##_insns)); \
789 	ASSERT_NE(NULL, self->_x.filter); \
790 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
791 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
792 	FILTER_ALLOC(allow);
793 	FILTER_ALLOC(trace);
794 	FILTER_ALLOC(error);
795 	FILTER_ALLOC(trap);
796 	FILTER_ALLOC(kill);
797 }
798 
FIXTURE_TEARDOWN(precedence)799 FIXTURE_TEARDOWN(precedence)
800 {
801 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
802 	FILTER_FREE(allow);
803 	FILTER_FREE(trace);
804 	FILTER_FREE(error);
805 	FILTER_FREE(trap);
806 	FILTER_FREE(kill);
807 }
808 
TEST_F(precedence,allow_ok)809 TEST_F(precedence, allow_ok)
810 {
811 	pid_t parent, res = 0;
812 	long ret;
813 
814 	parent = getppid();
815 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
816 	ASSERT_EQ(0, ret);
817 
818 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
819 	ASSERT_EQ(0, ret);
820 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
821 	ASSERT_EQ(0, ret);
822 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
823 	ASSERT_EQ(0, ret);
824 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
825 	ASSERT_EQ(0, ret);
826 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
827 	ASSERT_EQ(0, ret);
828 	/* Should work just fine. */
829 	res = syscall(__NR_getppid);
830 	EXPECT_EQ(parent, res);
831 }
832 
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)833 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
834 {
835 	pid_t parent, res = 0;
836 	long ret;
837 
838 	parent = getppid();
839 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
840 	ASSERT_EQ(0, ret);
841 
842 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
843 	ASSERT_EQ(0, ret);
844 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
845 	ASSERT_EQ(0, ret);
846 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
847 	ASSERT_EQ(0, ret);
848 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
849 	ASSERT_EQ(0, ret);
850 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
851 	ASSERT_EQ(0, ret);
852 	/* Should work just fine. */
853 	res = syscall(__NR_getppid);
854 	EXPECT_EQ(parent, res);
855 	/* getpid() should never return. */
856 	res = syscall(__NR_getpid);
857 	EXPECT_EQ(0, res);
858 }
859 
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)860 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
861 {
862 	pid_t parent;
863 	long ret;
864 
865 	parent = getppid();
866 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
867 	ASSERT_EQ(0, ret);
868 
869 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
870 	ASSERT_EQ(0, ret);
871 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
872 	ASSERT_EQ(0, ret);
873 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
874 	ASSERT_EQ(0, ret);
875 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
876 	ASSERT_EQ(0, ret);
877 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
878 	ASSERT_EQ(0, ret);
879 	/* Should work just fine. */
880 	EXPECT_EQ(parent, syscall(__NR_getppid));
881 	/* getpid() should never return. */
882 	EXPECT_EQ(0, syscall(__NR_getpid));
883 }
884 
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)885 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
886 {
887 	pid_t parent;
888 	long ret;
889 
890 	parent = getppid();
891 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
892 	ASSERT_EQ(0, ret);
893 
894 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
895 	ASSERT_EQ(0, ret);
896 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
897 	ASSERT_EQ(0, ret);
898 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
899 	ASSERT_EQ(0, ret);
900 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
901 	ASSERT_EQ(0, ret);
902 	/* Should work just fine. */
903 	EXPECT_EQ(parent, syscall(__NR_getppid));
904 	/* getpid() should never return. */
905 	EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907 
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)908 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
909 {
910 	pid_t parent;
911 	long ret;
912 
913 	parent = getppid();
914 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915 	ASSERT_EQ(0, ret);
916 
917 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
918 	ASSERT_EQ(0, ret);
919 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
920 	ASSERT_EQ(0, ret);
921 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
922 	ASSERT_EQ(0, ret);
923 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
924 	ASSERT_EQ(0, ret);
925 	/* Should work just fine. */
926 	EXPECT_EQ(parent, syscall(__NR_getppid));
927 	/* getpid() should never return. */
928 	EXPECT_EQ(0, syscall(__NR_getpid));
929 }
930 
TEST_F(precedence,errno_is_third)931 TEST_F(precedence, errno_is_third)
932 {
933 	pid_t parent;
934 	long ret;
935 
936 	parent = getppid();
937 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
938 	ASSERT_EQ(0, ret);
939 
940 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
941 	ASSERT_EQ(0, ret);
942 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
943 	ASSERT_EQ(0, ret);
944 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
945 	ASSERT_EQ(0, ret);
946 	/* Should work just fine. */
947 	EXPECT_EQ(parent, syscall(__NR_getppid));
948 	EXPECT_EQ(0, syscall(__NR_getpid));
949 }
950 
TEST_F(precedence,errno_is_third_in_any_order)951 TEST_F(precedence, errno_is_third_in_any_order)
952 {
953 	pid_t parent;
954 	long ret;
955 
956 	parent = getppid();
957 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
958 	ASSERT_EQ(0, ret);
959 
960 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
961 	ASSERT_EQ(0, ret);
962 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
963 	ASSERT_EQ(0, ret);
964 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
965 	ASSERT_EQ(0, ret);
966 	/* Should work just fine. */
967 	EXPECT_EQ(parent, syscall(__NR_getppid));
968 	EXPECT_EQ(0, syscall(__NR_getpid));
969 }
970 
TEST_F(precedence,trace_is_fourth)971 TEST_F(precedence, trace_is_fourth)
972 {
973 	pid_t parent;
974 	long ret;
975 
976 	parent = getppid();
977 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
978 	ASSERT_EQ(0, ret);
979 
980 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
981 	ASSERT_EQ(0, ret);
982 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
983 	ASSERT_EQ(0, ret);
984 	/* Should work just fine. */
985 	EXPECT_EQ(parent, syscall(__NR_getppid));
986 	/* No ptracer */
987 	EXPECT_EQ(-1, syscall(__NR_getpid));
988 }
989 
TEST_F(precedence,trace_is_fourth_in_any_order)990 TEST_F(precedence, trace_is_fourth_in_any_order)
991 {
992 	pid_t parent;
993 	long ret;
994 
995 	parent = getppid();
996 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
997 	ASSERT_EQ(0, ret);
998 
999 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1000 	ASSERT_EQ(0, ret);
1001 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1002 	ASSERT_EQ(0, ret);
1003 	/* Should work just fine. */
1004 	EXPECT_EQ(parent, syscall(__NR_getppid));
1005 	/* No ptracer */
1006 	EXPECT_EQ(-1, syscall(__NR_getpid));
1007 }
1008 
1009 #ifndef PTRACE_O_TRACESECCOMP
1010 #define PTRACE_O_TRACESECCOMP	0x00000080
1011 #endif
1012 
1013 /* Catch the Ubuntu 12.04 value error. */
1014 #if PTRACE_EVENT_SECCOMP != 7
1015 #undef PTRACE_EVENT_SECCOMP
1016 #endif
1017 
1018 #ifndef PTRACE_EVENT_SECCOMP
1019 #define PTRACE_EVENT_SECCOMP 7
1020 #endif
1021 
1022 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1023 bool tracer_running;
tracer_stop(int sig)1024 void tracer_stop(int sig)
1025 {
1026 	tracer_running = false;
1027 }
1028 
1029 typedef void tracer_func_t(struct __test_metadata *_metadata,
1030 			   pid_t tracee, int status, void *args);
1031 
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1032 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1033 	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1034 {
1035 	int ret = -1;
1036 	struct sigaction action = {
1037 		.sa_handler = tracer_stop,
1038 	};
1039 
1040 	/* Allow external shutdown. */
1041 	tracer_running = true;
1042 	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1043 
1044 	errno = 0;
1045 	while (ret == -1 && errno != EINVAL)
1046 		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1047 	ASSERT_EQ(0, ret) {
1048 		kill(tracee, SIGKILL);
1049 	}
1050 	/* Wait for attach stop */
1051 	wait(NULL);
1052 
1053 	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1054 						      PTRACE_O_TRACESYSGOOD :
1055 						      PTRACE_O_TRACESECCOMP);
1056 	ASSERT_EQ(0, ret) {
1057 		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1058 		kill(tracee, SIGKILL);
1059 	}
1060 	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1061 		     tracee, NULL, 0);
1062 	ASSERT_EQ(0, ret);
1063 
1064 	/* Unblock the tracee */
1065 	ASSERT_EQ(1, write(fd, "A", 1));
1066 	ASSERT_EQ(0, close(fd));
1067 
1068 	/* Run until we're shut down. Must assert to stop execution. */
1069 	while (tracer_running) {
1070 		int status;
1071 
1072 		if (wait(&status) != tracee)
1073 			continue;
1074 		if (WIFSIGNALED(status) || WIFEXITED(status))
1075 			/* Child is dead. Time to go. */
1076 			return;
1077 
1078 		/* Check if this is a seccomp event. */
1079 		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1080 
1081 		tracer_func(_metadata, tracee, status, args);
1082 
1083 		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1084 			     tracee, NULL, 0);
1085 		ASSERT_EQ(0, ret);
1086 	}
1087 	/* Directly report the status of our test harness results. */
1088 	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1089 }
1090 
1091 /* Common tracer setup/teardown functions. */
cont_handler(int num)1092 void cont_handler(int num)
1093 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1094 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1095 			  tracer_func_t func, void *args, bool ptrace_syscall)
1096 {
1097 	char sync;
1098 	int pipefd[2];
1099 	pid_t tracer_pid;
1100 	pid_t tracee = getpid();
1101 
1102 	/* Setup a pipe for clean synchronization. */
1103 	ASSERT_EQ(0, pipe(pipefd));
1104 
1105 	/* Fork a child which we'll promote to tracer */
1106 	tracer_pid = fork();
1107 	ASSERT_LE(0, tracer_pid);
1108 	signal(SIGALRM, cont_handler);
1109 	if (tracer_pid == 0) {
1110 		close(pipefd[0]);
1111 		start_tracer(_metadata, pipefd[1], tracee, func, args,
1112 			     ptrace_syscall);
1113 		syscall(__NR_exit, 0);
1114 	}
1115 	close(pipefd[1]);
1116 	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1117 	read(pipefd[0], &sync, 1);
1118 	close(pipefd[0]);
1119 
1120 	return tracer_pid;
1121 }
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1122 void teardown_trace_fixture(struct __test_metadata *_metadata,
1123 			    pid_t tracer)
1124 {
1125 	if (tracer) {
1126 		int status;
1127 		/*
1128 		 * Extract the exit code from the other process and
1129 		 * adopt it for ourselves in case its asserts failed.
1130 		 */
1131 		ASSERT_EQ(0, kill(tracer, SIGUSR1));
1132 		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1133 		if (WEXITSTATUS(status))
1134 			_metadata->passed = 0;
1135 	}
1136 }
1137 
1138 /* "poke" tracer arguments and function. */
1139 struct tracer_args_poke_t {
1140 	unsigned long poke_addr;
1141 };
1142 
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1143 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1144 		 void *args)
1145 {
1146 	int ret;
1147 	unsigned long msg;
1148 	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1149 
1150 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1151 	EXPECT_EQ(0, ret);
1152 	/* If this fails, don't try to recover. */
1153 	ASSERT_EQ(0x1001, msg) {
1154 		kill(tracee, SIGKILL);
1155 	}
1156 	/*
1157 	 * Poke in the message.
1158 	 * Registers are not touched to try to keep this relatively arch
1159 	 * agnostic.
1160 	 */
1161 	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1162 	EXPECT_EQ(0, ret);
1163 }
1164 
FIXTURE_DATA(TRACE_poke)1165 FIXTURE_DATA(TRACE_poke) {
1166 	struct sock_fprog prog;
1167 	pid_t tracer;
1168 	long poked;
1169 	struct tracer_args_poke_t tracer_args;
1170 };
1171 
FIXTURE_SETUP(TRACE_poke)1172 FIXTURE_SETUP(TRACE_poke)
1173 {
1174 	struct sock_filter filter[] = {
1175 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1176 			offsetof(struct seccomp_data, nr)),
1177 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1178 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1179 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1180 	};
1181 
1182 	self->poked = 0;
1183 	memset(&self->prog, 0, sizeof(self->prog));
1184 	self->prog.filter = malloc(sizeof(filter));
1185 	ASSERT_NE(NULL, self->prog.filter);
1186 	memcpy(self->prog.filter, filter, sizeof(filter));
1187 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1188 
1189 	/* Set up tracer args. */
1190 	self->tracer_args.poke_addr = (unsigned long)&self->poked;
1191 
1192 	/* Launch tracer. */
1193 	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1194 					   &self->tracer_args, false);
1195 }
1196 
FIXTURE_TEARDOWN(TRACE_poke)1197 FIXTURE_TEARDOWN(TRACE_poke)
1198 {
1199 	teardown_trace_fixture(_metadata, self->tracer);
1200 	if (self->prog.filter)
1201 		free(self->prog.filter);
1202 }
1203 
TEST_F(TRACE_poke,read_has_side_effects)1204 TEST_F(TRACE_poke, read_has_side_effects)
1205 {
1206 	ssize_t ret;
1207 
1208 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1209 	ASSERT_EQ(0, ret);
1210 
1211 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1212 	ASSERT_EQ(0, ret);
1213 
1214 	EXPECT_EQ(0, self->poked);
1215 	ret = read(-1, NULL, 0);
1216 	EXPECT_EQ(-1, ret);
1217 	EXPECT_EQ(0x1001, self->poked);
1218 }
1219 
TEST_F(TRACE_poke,getpid_runs_normally)1220 TEST_F(TRACE_poke, getpid_runs_normally)
1221 {
1222 	long ret;
1223 
1224 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1225 	ASSERT_EQ(0, ret);
1226 
1227 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1228 	ASSERT_EQ(0, ret);
1229 
1230 	EXPECT_EQ(0, self->poked);
1231 	EXPECT_NE(0, syscall(__NR_getpid));
1232 	EXPECT_EQ(0, self->poked);
1233 }
1234 
1235 #if defined(__x86_64__)
1236 # define ARCH_REGS	struct user_regs_struct
1237 # define SYSCALL_NUM	orig_rax
1238 # define SYSCALL_RET	rax
1239 #elif defined(__i386__)
1240 # define ARCH_REGS	struct user_regs_struct
1241 # define SYSCALL_NUM	orig_eax
1242 # define SYSCALL_RET	eax
1243 #elif defined(__arm__)
1244 # define ARCH_REGS	struct pt_regs
1245 # define SYSCALL_NUM	ARM_r7
1246 # define SYSCALL_RET	ARM_r0
1247 #elif defined(__aarch64__)
1248 # define ARCH_REGS	struct user_pt_regs
1249 # define SYSCALL_NUM	regs[8]
1250 # define SYSCALL_RET	regs[0]
1251 #elif defined(__hppa__)
1252 # define ARCH_REGS	struct user_regs_struct
1253 # define SYSCALL_NUM	gr[20]
1254 # define SYSCALL_RET	gr[28]
1255 #elif defined(__powerpc__)
1256 # define ARCH_REGS	struct pt_regs
1257 # define SYSCALL_NUM	gpr[0]
1258 # define SYSCALL_RET	gpr[3]
1259 #elif defined(__s390__)
1260 # define ARCH_REGS     s390_regs
1261 # define SYSCALL_NUM   gprs[2]
1262 # define SYSCALL_RET   gprs[2]
1263 #elif defined(__mips__)
1264 # define ARCH_REGS	struct pt_regs
1265 # define SYSCALL_NUM	regs[2]
1266 # define SYSCALL_SYSCALL_NUM regs[4]
1267 # define SYSCALL_RET	regs[2]
1268 # define SYSCALL_NUM_RET_SHARE_REG
1269 #else
1270 # error "Do not know how to find your architecture's registers and syscalls"
1271 #endif
1272 
1273 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1274  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1275  */
1276 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1277 #define HAVE_GETREGS
1278 #endif
1279 
1280 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1281 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1282 {
1283 	ARCH_REGS regs;
1284 #ifdef HAVE_GETREGS
1285 	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
1286 		TH_LOG("PTRACE_GETREGS failed");
1287 		return -1;
1288 	}
1289 #else
1290 	struct iovec iov;
1291 
1292 	iov.iov_base = &regs;
1293 	iov.iov_len = sizeof(regs);
1294 	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1295 		TH_LOG("PTRACE_GETREGSET failed");
1296 		return -1;
1297 	}
1298 #endif
1299 
1300 #if defined(__mips__)
1301 	if (regs.SYSCALL_NUM == __NR_O32_Linux)
1302 		return regs.SYSCALL_SYSCALL_NUM;
1303 #endif
1304 	return regs.SYSCALL_NUM;
1305 }
1306 
1307 /* Architecture-specific syscall changing routine. */
change_syscall(struct __test_metadata * _metadata,pid_t tracee,int syscall)1308 void change_syscall(struct __test_metadata *_metadata,
1309 		    pid_t tracee, int syscall)
1310 {
1311 	int ret;
1312 	ARCH_REGS regs;
1313 #ifdef HAVE_GETREGS
1314 	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
1315 #else
1316 	struct iovec iov;
1317 	iov.iov_base = &regs;
1318 	iov.iov_len = sizeof(regs);
1319 	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1320 #endif
1321 	EXPECT_EQ(0, ret) {}
1322 
1323 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1324     defined(__s390__) || defined(__hppa__)
1325 	{
1326 		regs.SYSCALL_NUM = syscall;
1327 	}
1328 #elif defined(__mips__)
1329 	{
1330 		if (regs.SYSCALL_NUM == __NR_O32_Linux)
1331 			regs.SYSCALL_SYSCALL_NUM = syscall;
1332 		else
1333 			regs.SYSCALL_NUM = syscall;
1334 	}
1335 
1336 #elif defined(__arm__)
1337 # ifndef PTRACE_SET_SYSCALL
1338 #  define PTRACE_SET_SYSCALL   23
1339 # endif
1340 	{
1341 		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1342 		EXPECT_EQ(0, ret);
1343 	}
1344 
1345 #elif defined(__aarch64__)
1346 # ifndef NT_ARM_SYSTEM_CALL
1347 #  define NT_ARM_SYSTEM_CALL 0x404
1348 # endif
1349 	{
1350 		iov.iov_base = &syscall;
1351 		iov.iov_len = sizeof(syscall);
1352 		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1353 			     &iov);
1354 		EXPECT_EQ(0, ret);
1355 	}
1356 
1357 #else
1358 	ASSERT_EQ(1, 0) {
1359 		TH_LOG("How is the syscall changed on this architecture?");
1360 	}
1361 #endif
1362 
1363 	/* If syscall is skipped, change return value. */
1364 	if (syscall == -1)
1365 #ifdef SYSCALL_NUM_RET_SHARE_REG
1366 		TH_LOG("Can't modify syscall return on this architecture");
1367 #else
1368 		regs.SYSCALL_RET = 1;
1369 #endif
1370 
1371 #ifdef HAVE_GETREGS
1372 	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
1373 #else
1374 	iov.iov_base = &regs;
1375 	iov.iov_len = sizeof(regs);
1376 	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1377 #endif
1378 	EXPECT_EQ(0, ret);
1379 }
1380 
tracer_syscall(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1381 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1382 		    int status, void *args)
1383 {
1384 	int ret;
1385 	unsigned long msg;
1386 
1387 	/* Make sure we got the right message. */
1388 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1389 	EXPECT_EQ(0, ret);
1390 
1391 	/* Validate and take action on expected syscalls. */
1392 	switch (msg) {
1393 	case 0x1002:
1394 		/* change getpid to getppid. */
1395 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1396 		change_syscall(_metadata, tracee, __NR_getppid);
1397 		break;
1398 	case 0x1003:
1399 		/* skip gettid. */
1400 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1401 		change_syscall(_metadata, tracee, -1);
1402 		break;
1403 	case 0x1004:
1404 		/* do nothing (allow getppid) */
1405 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1406 		break;
1407 	default:
1408 		EXPECT_EQ(0, msg) {
1409 			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1410 			kill(tracee, SIGKILL);
1411 		}
1412 	}
1413 
1414 }
1415 
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1416 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1417 		   int status, void *args)
1418 {
1419 	int ret, nr;
1420 	unsigned long msg;
1421 	static bool entry;
1422 
1423 	/* Make sure we got an empty message. */
1424 	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1425 	EXPECT_EQ(0, ret);
1426 	EXPECT_EQ(0, msg);
1427 
1428 	/* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1429 	entry = !entry;
1430 	if (!entry)
1431 		return;
1432 
1433 	nr = get_syscall(_metadata, tracee);
1434 
1435 	if (nr == __NR_getpid)
1436 		change_syscall(_metadata, tracee, __NR_getppid);
1437 }
1438 
FIXTURE_DATA(TRACE_syscall)1439 FIXTURE_DATA(TRACE_syscall) {
1440 	struct sock_fprog prog;
1441 	pid_t tracer, mytid, mypid, parent;
1442 };
1443 
FIXTURE_SETUP(TRACE_syscall)1444 FIXTURE_SETUP(TRACE_syscall)
1445 {
1446 	struct sock_filter filter[] = {
1447 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1448 			offsetof(struct seccomp_data, nr)),
1449 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1450 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1451 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1452 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1453 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1454 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1455 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1456 	};
1457 
1458 	memset(&self->prog, 0, sizeof(self->prog));
1459 	self->prog.filter = malloc(sizeof(filter));
1460 	ASSERT_NE(NULL, self->prog.filter);
1461 	memcpy(self->prog.filter, filter, sizeof(filter));
1462 	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1463 
1464 	/* Prepare some testable syscall results. */
1465 	self->mytid = syscall(__NR_gettid);
1466 	ASSERT_GT(self->mytid, 0);
1467 	ASSERT_NE(self->mytid, 1) {
1468 		TH_LOG("Running this test as init is not supported. :)");
1469 	}
1470 
1471 	self->mypid = getpid();
1472 	ASSERT_GT(self->mypid, 0);
1473 	ASSERT_EQ(self->mytid, self->mypid);
1474 
1475 	self->parent = getppid();
1476 	ASSERT_GT(self->parent, 0);
1477 	ASSERT_NE(self->parent, self->mypid);
1478 
1479 	/* Launch tracer. */
1480 	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1481 					   false);
1482 }
1483 
FIXTURE_TEARDOWN(TRACE_syscall)1484 FIXTURE_TEARDOWN(TRACE_syscall)
1485 {
1486 	teardown_trace_fixture(_metadata, self->tracer);
1487 	if (self->prog.filter)
1488 		free(self->prog.filter);
1489 }
1490 
TEST_F(TRACE_syscall,syscall_allowed)1491 TEST_F(TRACE_syscall, syscall_allowed)
1492 {
1493 	long ret;
1494 
1495 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1496 	ASSERT_EQ(0, ret);
1497 
1498 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1499 	ASSERT_EQ(0, ret);
1500 
1501 	/* getppid works as expected (no changes). */
1502 	EXPECT_EQ(self->parent, syscall(__NR_getppid));
1503 	EXPECT_NE(self->mypid, syscall(__NR_getppid));
1504 }
1505 
TEST_F(TRACE_syscall,syscall_redirected)1506 TEST_F(TRACE_syscall, syscall_redirected)
1507 {
1508 	long ret;
1509 
1510 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1511 	ASSERT_EQ(0, ret);
1512 
1513 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1514 	ASSERT_EQ(0, ret);
1515 
1516 	/* getpid has been redirected to getppid as expected. */
1517 	EXPECT_EQ(self->parent, syscall(__NR_getpid));
1518 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1519 }
1520 
TEST_F(TRACE_syscall,syscall_dropped)1521 TEST_F(TRACE_syscall, syscall_dropped)
1522 {
1523 	long ret;
1524 
1525 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1526 	ASSERT_EQ(0, ret);
1527 
1528 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1529 	ASSERT_EQ(0, ret);
1530 
1531 #ifdef SYSCALL_NUM_RET_SHARE_REG
1532 	/* gettid has been skipped */
1533 	EXPECT_EQ(-1, syscall(__NR_gettid));
1534 #else
1535 	/* gettid has been skipped and an altered return value stored. */
1536 	EXPECT_EQ(1, syscall(__NR_gettid));
1537 #endif
1538 	EXPECT_NE(self->mytid, syscall(__NR_gettid));
1539 }
1540 
TEST_F(TRACE_syscall,skip_after_RET_TRACE)1541 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1542 {
1543 	struct sock_filter filter[] = {
1544 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1545 			offsetof(struct seccomp_data, nr)),
1546 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1547 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1548 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1549 	};
1550 	struct sock_fprog prog = {
1551 		.len = (unsigned short)ARRAY_SIZE(filter),
1552 		.filter = filter,
1553 	};
1554 	long ret;
1555 
1556 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1557 	ASSERT_EQ(0, ret);
1558 
1559 	/* Install fixture filter. */
1560 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1561 	ASSERT_EQ(0, ret);
1562 
1563 	/* Install "errno on getppid" filter. */
1564 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1565 	ASSERT_EQ(0, ret);
1566 
1567 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1568 	EXPECT_EQ(-1, syscall(__NR_getpid));
1569 	EXPECT_EQ(EPERM, errno);
1570 }
1571 
TEST_F_SIGNAL(TRACE_syscall,kill_after_RET_TRACE,SIGSYS)1572 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1573 {
1574 	struct sock_filter filter[] = {
1575 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1576 			offsetof(struct seccomp_data, nr)),
1577 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1578 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1579 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1580 	};
1581 	struct sock_fprog prog = {
1582 		.len = (unsigned short)ARRAY_SIZE(filter),
1583 		.filter = filter,
1584 	};
1585 	long ret;
1586 
1587 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1588 	ASSERT_EQ(0, ret);
1589 
1590 	/* Install fixture filter. */
1591 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1592 	ASSERT_EQ(0, ret);
1593 
1594 	/* Install "death on getppid" filter. */
1595 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1596 	ASSERT_EQ(0, ret);
1597 
1598 	/* Tracer will redirect getpid to getppid, and we should die. */
1599 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1600 }
1601 
TEST_F(TRACE_syscall,skip_after_ptrace)1602 TEST_F(TRACE_syscall, skip_after_ptrace)
1603 {
1604 	struct sock_filter filter[] = {
1605 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1606 			offsetof(struct seccomp_data, nr)),
1607 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1608 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1609 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1610 	};
1611 	struct sock_fprog prog = {
1612 		.len = (unsigned short)ARRAY_SIZE(filter),
1613 		.filter = filter,
1614 	};
1615 	long ret;
1616 
1617 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1618 	teardown_trace_fixture(_metadata, self->tracer);
1619 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1620 					   true);
1621 
1622 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1623 	ASSERT_EQ(0, ret);
1624 
1625 	/* Install "errno on getppid" filter. */
1626 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1627 	ASSERT_EQ(0, ret);
1628 
1629 	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
1630 	EXPECT_EQ(-1, syscall(__NR_getpid));
1631 	EXPECT_EQ(EPERM, errno);
1632 }
1633 
TEST_F_SIGNAL(TRACE_syscall,kill_after_ptrace,SIGSYS)1634 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1635 {
1636 	struct sock_filter filter[] = {
1637 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1638 			offsetof(struct seccomp_data, nr)),
1639 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1640 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1641 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1642 	};
1643 	struct sock_fprog prog = {
1644 		.len = (unsigned short)ARRAY_SIZE(filter),
1645 		.filter = filter,
1646 	};
1647 	long ret;
1648 
1649 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1650 	teardown_trace_fixture(_metadata, self->tracer);
1651 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1652 					   true);
1653 
1654 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1655 	ASSERT_EQ(0, ret);
1656 
1657 	/* Install "death on getppid" filter. */
1658 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1659 	ASSERT_EQ(0, ret);
1660 
1661 	/* Tracer will redirect getpid to getppid, and we should die. */
1662 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
1663 }
1664 
1665 #ifndef __NR_seccomp
1666 # if defined(__i386__)
1667 #  define __NR_seccomp 354
1668 # elif defined(__x86_64__)
1669 #  define __NR_seccomp 317
1670 # elif defined(__arm__)
1671 #  define __NR_seccomp 383
1672 # elif defined(__aarch64__)
1673 #  define __NR_seccomp 277
1674 # elif defined(__hppa__)
1675 #  define __NR_seccomp 338
1676 # elif defined(__powerpc__)
1677 #  define __NR_seccomp 358
1678 # elif defined(__s390__)
1679 #  define __NR_seccomp 348
1680 # else
1681 #  warning "seccomp syscall number unknown for this architecture"
1682 #  define __NR_seccomp 0xffff
1683 # endif
1684 #endif
1685 
1686 #ifndef SECCOMP_SET_MODE_STRICT
1687 #define SECCOMP_SET_MODE_STRICT 0
1688 #endif
1689 
1690 #ifndef SECCOMP_SET_MODE_FILTER
1691 #define SECCOMP_SET_MODE_FILTER 1
1692 #endif
1693 
1694 #ifndef SECCOMP_FILTER_FLAG_TSYNC
1695 #define SECCOMP_FILTER_FLAG_TSYNC 1
1696 #endif
1697 
1698 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)1699 int seccomp(unsigned int op, unsigned int flags, void *args)
1700 {
1701 	errno = 0;
1702 	return syscall(__NR_seccomp, op, flags, args);
1703 }
1704 #endif
1705 
TEST(seccomp_syscall)1706 TEST(seccomp_syscall)
1707 {
1708 	struct sock_filter filter[] = {
1709 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1710 	};
1711 	struct sock_fprog prog = {
1712 		.len = (unsigned short)ARRAY_SIZE(filter),
1713 		.filter = filter,
1714 	};
1715 	long ret;
1716 
1717 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1718 	ASSERT_EQ(0, ret) {
1719 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1720 	}
1721 
1722 	/* Reject insane operation. */
1723 	ret = seccomp(-1, 0, &prog);
1724 	ASSERT_NE(ENOSYS, errno) {
1725 		TH_LOG("Kernel does not support seccomp syscall!");
1726 	}
1727 	EXPECT_EQ(EINVAL, errno) {
1728 		TH_LOG("Did not reject crazy op value!");
1729 	}
1730 
1731 	/* Reject strict with flags or pointer. */
1732 	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1733 	EXPECT_EQ(EINVAL, errno) {
1734 		TH_LOG("Did not reject mode strict with flags!");
1735 	}
1736 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1737 	EXPECT_EQ(EINVAL, errno) {
1738 		TH_LOG("Did not reject mode strict with uargs!");
1739 	}
1740 
1741 	/* Reject insane args for filter. */
1742 	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1743 	EXPECT_EQ(EINVAL, errno) {
1744 		TH_LOG("Did not reject crazy filter flags!");
1745 	}
1746 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1747 	EXPECT_EQ(EFAULT, errno) {
1748 		TH_LOG("Did not reject NULL filter!");
1749 	}
1750 
1751 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1752 	EXPECT_EQ(0, errno) {
1753 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1754 			strerror(errno));
1755 	}
1756 }
1757 
TEST(seccomp_syscall_mode_lock)1758 TEST(seccomp_syscall_mode_lock)
1759 {
1760 	struct sock_filter filter[] = {
1761 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1762 	};
1763 	struct sock_fprog prog = {
1764 		.len = (unsigned short)ARRAY_SIZE(filter),
1765 		.filter = filter,
1766 	};
1767 	long ret;
1768 
1769 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1770 	ASSERT_EQ(0, ret) {
1771 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1772 	}
1773 
1774 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1775 	ASSERT_NE(ENOSYS, errno) {
1776 		TH_LOG("Kernel does not support seccomp syscall!");
1777 	}
1778 	EXPECT_EQ(0, ret) {
1779 		TH_LOG("Could not install filter!");
1780 	}
1781 
1782 	/* Make sure neither entry point will switch to strict. */
1783 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1784 	EXPECT_EQ(EINVAL, errno) {
1785 		TH_LOG("Switched to mode strict!");
1786 	}
1787 
1788 	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1789 	EXPECT_EQ(EINVAL, errno) {
1790 		TH_LOG("Switched to mode strict!");
1791 	}
1792 }
1793 
TEST(TSYNC_first)1794 TEST(TSYNC_first)
1795 {
1796 	struct sock_filter filter[] = {
1797 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1798 	};
1799 	struct sock_fprog prog = {
1800 		.len = (unsigned short)ARRAY_SIZE(filter),
1801 		.filter = filter,
1802 	};
1803 	long ret;
1804 
1805 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1806 	ASSERT_EQ(0, ret) {
1807 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1808 	}
1809 
1810 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1811 		      &prog);
1812 	ASSERT_NE(ENOSYS, errno) {
1813 		TH_LOG("Kernel does not support seccomp syscall!");
1814 	}
1815 	EXPECT_EQ(0, ret) {
1816 		TH_LOG("Could not install initial filter with TSYNC!");
1817 	}
1818 }
1819 
1820 #define TSYNC_SIBLINGS 2
1821 struct tsync_sibling {
1822 	pthread_t tid;
1823 	pid_t system_tid;
1824 	sem_t *started;
1825 	pthread_cond_t *cond;
1826 	pthread_mutex_t *mutex;
1827 	int diverge;
1828 	int num_waits;
1829 	struct sock_fprog *prog;
1830 	struct __test_metadata *metadata;
1831 };
1832 
FIXTURE_DATA(TSYNC)1833 FIXTURE_DATA(TSYNC) {
1834 	struct sock_fprog root_prog, apply_prog;
1835 	struct tsync_sibling sibling[TSYNC_SIBLINGS];
1836 	sem_t started;
1837 	pthread_cond_t cond;
1838 	pthread_mutex_t mutex;
1839 	int sibling_count;
1840 };
1841 
FIXTURE_SETUP(TSYNC)1842 FIXTURE_SETUP(TSYNC)
1843 {
1844 	struct sock_filter root_filter[] = {
1845 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1846 	};
1847 	struct sock_filter apply_filter[] = {
1848 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1849 			offsetof(struct seccomp_data, nr)),
1850 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1851 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1852 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1853 	};
1854 
1855 	memset(&self->root_prog, 0, sizeof(self->root_prog));
1856 	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1857 	memset(&self->sibling, 0, sizeof(self->sibling));
1858 	self->root_prog.filter = malloc(sizeof(root_filter));
1859 	ASSERT_NE(NULL, self->root_prog.filter);
1860 	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1861 	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1862 
1863 	self->apply_prog.filter = malloc(sizeof(apply_filter));
1864 	ASSERT_NE(NULL, self->apply_prog.filter);
1865 	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1866 	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1867 
1868 	self->sibling_count = 0;
1869 	pthread_mutex_init(&self->mutex, NULL);
1870 	pthread_cond_init(&self->cond, NULL);
1871 	sem_init(&self->started, 0, 0);
1872 	self->sibling[0].tid = 0;
1873 	self->sibling[0].cond = &self->cond;
1874 	self->sibling[0].started = &self->started;
1875 	self->sibling[0].mutex = &self->mutex;
1876 	self->sibling[0].diverge = 0;
1877 	self->sibling[0].num_waits = 1;
1878 	self->sibling[0].prog = &self->root_prog;
1879 	self->sibling[0].metadata = _metadata;
1880 	self->sibling[1].tid = 0;
1881 	self->sibling[1].cond = &self->cond;
1882 	self->sibling[1].started = &self->started;
1883 	self->sibling[1].mutex = &self->mutex;
1884 	self->sibling[1].diverge = 0;
1885 	self->sibling[1].prog = &self->root_prog;
1886 	self->sibling[1].num_waits = 1;
1887 	self->sibling[1].metadata = _metadata;
1888 }
1889 
FIXTURE_TEARDOWN(TSYNC)1890 FIXTURE_TEARDOWN(TSYNC)
1891 {
1892 	int sib = 0;
1893 
1894 	if (self->root_prog.filter)
1895 		free(self->root_prog.filter);
1896 	if (self->apply_prog.filter)
1897 		free(self->apply_prog.filter);
1898 
1899 	for ( ; sib < self->sibling_count; ++sib) {
1900 		struct tsync_sibling *s = &self->sibling[sib];
1901 		void *status;
1902 
1903 		if (!s->tid)
1904 			continue;
1905 		if (pthread_kill(s->tid, 0)) {
1906 			pthread_cancel(s->tid);
1907 			pthread_join(s->tid, &status);
1908 		}
1909 	}
1910 	pthread_mutex_destroy(&self->mutex);
1911 	pthread_cond_destroy(&self->cond);
1912 	sem_destroy(&self->started);
1913 }
1914 
tsync_sibling(void * data)1915 void *tsync_sibling(void *data)
1916 {
1917 	long ret = 0;
1918 	struct tsync_sibling *me = data;
1919 
1920 	me->system_tid = syscall(__NR_gettid);
1921 
1922 	pthread_mutex_lock(me->mutex);
1923 	if (me->diverge) {
1924 		/* Just re-apply the root prog to fork the tree */
1925 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1926 				me->prog, 0, 0);
1927 	}
1928 	sem_post(me->started);
1929 	/* Return outside of started so parent notices failures. */
1930 	if (ret) {
1931 		pthread_mutex_unlock(me->mutex);
1932 		return (void *)SIBLING_EXIT_FAILURE;
1933 	}
1934 	do {
1935 		pthread_cond_wait(me->cond, me->mutex);
1936 		me->num_waits = me->num_waits - 1;
1937 	} while (me->num_waits);
1938 	pthread_mutex_unlock(me->mutex);
1939 
1940 	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1941 	if (!ret)
1942 		return (void *)SIBLING_EXIT_NEWPRIVS;
1943 	read(0, NULL, 0);
1944 	return (void *)SIBLING_EXIT_UNKILLED;
1945 }
1946 
tsync_start_sibling(struct tsync_sibling * sibling)1947 void tsync_start_sibling(struct tsync_sibling *sibling)
1948 {
1949 	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1950 }
1951 
TEST_F(TSYNC,siblings_fail_prctl)1952 TEST_F(TSYNC, siblings_fail_prctl)
1953 {
1954 	long ret;
1955 	void *status;
1956 	struct sock_filter filter[] = {
1957 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1958 			offsetof(struct seccomp_data, nr)),
1959 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1960 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1961 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1962 	};
1963 	struct sock_fprog prog = {
1964 		.len = (unsigned short)ARRAY_SIZE(filter),
1965 		.filter = filter,
1966 	};
1967 
1968 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1969 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1970 	}
1971 
1972 	/* Check prctl failure detection by requesting sib 0 diverge. */
1973 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1974 	ASSERT_NE(ENOSYS, errno) {
1975 		TH_LOG("Kernel does not support seccomp syscall!");
1976 	}
1977 	ASSERT_EQ(0, ret) {
1978 		TH_LOG("setting filter failed");
1979 	}
1980 
1981 	self->sibling[0].diverge = 1;
1982 	tsync_start_sibling(&self->sibling[0]);
1983 	tsync_start_sibling(&self->sibling[1]);
1984 
1985 	while (self->sibling_count < TSYNC_SIBLINGS) {
1986 		sem_wait(&self->started);
1987 		self->sibling_count++;
1988 	}
1989 
1990 	/* Signal the threads to clean up*/
1991 	pthread_mutex_lock(&self->mutex);
1992 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1993 		TH_LOG("cond broadcast non-zero");
1994 	}
1995 	pthread_mutex_unlock(&self->mutex);
1996 
1997 	/* Ensure diverging sibling failed to call prctl. */
1998 	pthread_join(self->sibling[0].tid, &status);
1999 	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2000 	pthread_join(self->sibling[1].tid, &status);
2001 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2002 }
2003 
TEST_F(TSYNC,two_siblings_with_ancestor)2004 TEST_F(TSYNC, two_siblings_with_ancestor)
2005 {
2006 	long ret;
2007 	void *status;
2008 
2009 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2010 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2011 	}
2012 
2013 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2014 	ASSERT_NE(ENOSYS, errno) {
2015 		TH_LOG("Kernel does not support seccomp syscall!");
2016 	}
2017 	ASSERT_EQ(0, ret) {
2018 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2019 	}
2020 	tsync_start_sibling(&self->sibling[0]);
2021 	tsync_start_sibling(&self->sibling[1]);
2022 
2023 	while (self->sibling_count < TSYNC_SIBLINGS) {
2024 		sem_wait(&self->started);
2025 		self->sibling_count++;
2026 	}
2027 
2028 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2029 		      &self->apply_prog);
2030 	ASSERT_EQ(0, ret) {
2031 		TH_LOG("Could install filter on all threads!");
2032 	}
2033 	/* Tell the siblings to test the policy */
2034 	pthread_mutex_lock(&self->mutex);
2035 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2036 		TH_LOG("cond broadcast non-zero");
2037 	}
2038 	pthread_mutex_unlock(&self->mutex);
2039 	/* Ensure they are both killed and don't exit cleanly. */
2040 	pthread_join(self->sibling[0].tid, &status);
2041 	EXPECT_EQ(0x0, (long)status);
2042 	pthread_join(self->sibling[1].tid, &status);
2043 	EXPECT_EQ(0x0, (long)status);
2044 }
2045 
TEST_F(TSYNC,two_sibling_want_nnp)2046 TEST_F(TSYNC, two_sibling_want_nnp)
2047 {
2048 	void *status;
2049 
2050 	/* start siblings before any prctl() operations */
2051 	tsync_start_sibling(&self->sibling[0]);
2052 	tsync_start_sibling(&self->sibling[1]);
2053 	while (self->sibling_count < TSYNC_SIBLINGS) {
2054 		sem_wait(&self->started);
2055 		self->sibling_count++;
2056 	}
2057 
2058 	/* Tell the siblings to test no policy */
2059 	pthread_mutex_lock(&self->mutex);
2060 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2061 		TH_LOG("cond broadcast non-zero");
2062 	}
2063 	pthread_mutex_unlock(&self->mutex);
2064 
2065 	/* Ensure they are both upset about lacking nnp. */
2066 	pthread_join(self->sibling[0].tid, &status);
2067 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2068 	pthread_join(self->sibling[1].tid, &status);
2069 	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2070 }
2071 
TEST_F(TSYNC,two_siblings_with_no_filter)2072 TEST_F(TSYNC, two_siblings_with_no_filter)
2073 {
2074 	long ret;
2075 	void *status;
2076 
2077 	/* start siblings before any prctl() operations */
2078 	tsync_start_sibling(&self->sibling[0]);
2079 	tsync_start_sibling(&self->sibling[1]);
2080 	while (self->sibling_count < TSYNC_SIBLINGS) {
2081 		sem_wait(&self->started);
2082 		self->sibling_count++;
2083 	}
2084 
2085 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2086 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2087 	}
2088 
2089 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2090 		      &self->apply_prog);
2091 	ASSERT_NE(ENOSYS, errno) {
2092 		TH_LOG("Kernel does not support seccomp syscall!");
2093 	}
2094 	ASSERT_EQ(0, ret) {
2095 		TH_LOG("Could install filter on all threads!");
2096 	}
2097 
2098 	/* Tell the siblings to test the policy */
2099 	pthread_mutex_lock(&self->mutex);
2100 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2101 		TH_LOG("cond broadcast non-zero");
2102 	}
2103 	pthread_mutex_unlock(&self->mutex);
2104 
2105 	/* Ensure they are both killed and don't exit cleanly. */
2106 	pthread_join(self->sibling[0].tid, &status);
2107 	EXPECT_EQ(0x0, (long)status);
2108 	pthread_join(self->sibling[1].tid, &status);
2109 	EXPECT_EQ(0x0, (long)status);
2110 }
2111 
TEST_F(TSYNC,two_siblings_with_one_divergence)2112 TEST_F(TSYNC, two_siblings_with_one_divergence)
2113 {
2114 	long ret;
2115 	void *status;
2116 
2117 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2118 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2119 	}
2120 
2121 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2122 	ASSERT_NE(ENOSYS, errno) {
2123 		TH_LOG("Kernel does not support seccomp syscall!");
2124 	}
2125 	ASSERT_EQ(0, ret) {
2126 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2127 	}
2128 	self->sibling[0].diverge = 1;
2129 	tsync_start_sibling(&self->sibling[0]);
2130 	tsync_start_sibling(&self->sibling[1]);
2131 
2132 	while (self->sibling_count < TSYNC_SIBLINGS) {
2133 		sem_wait(&self->started);
2134 		self->sibling_count++;
2135 	}
2136 
2137 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2138 		      &self->apply_prog);
2139 	ASSERT_EQ(self->sibling[0].system_tid, ret) {
2140 		TH_LOG("Did not fail on diverged sibling.");
2141 	}
2142 
2143 	/* Wake the threads */
2144 	pthread_mutex_lock(&self->mutex);
2145 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2146 		TH_LOG("cond broadcast non-zero");
2147 	}
2148 	pthread_mutex_unlock(&self->mutex);
2149 
2150 	/* Ensure they are both unkilled. */
2151 	pthread_join(self->sibling[0].tid, &status);
2152 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2153 	pthread_join(self->sibling[1].tid, &status);
2154 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2155 }
2156 
TEST_F(TSYNC,two_siblings_not_under_filter)2157 TEST_F(TSYNC, two_siblings_not_under_filter)
2158 {
2159 	long ret, sib;
2160 	void *status;
2161 
2162 	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2163 		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2164 	}
2165 
2166 	/*
2167 	 * Sibling 0 will have its own seccomp policy
2168 	 * and Sibling 1 will not be under seccomp at
2169 	 * all. Sibling 1 will enter seccomp and 0
2170 	 * will cause failure.
2171 	 */
2172 	self->sibling[0].diverge = 1;
2173 	tsync_start_sibling(&self->sibling[0]);
2174 	tsync_start_sibling(&self->sibling[1]);
2175 
2176 	while (self->sibling_count < TSYNC_SIBLINGS) {
2177 		sem_wait(&self->started);
2178 		self->sibling_count++;
2179 	}
2180 
2181 	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2182 	ASSERT_NE(ENOSYS, errno) {
2183 		TH_LOG("Kernel does not support seccomp syscall!");
2184 	}
2185 	ASSERT_EQ(0, ret) {
2186 		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2187 	}
2188 
2189 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2190 		      &self->apply_prog);
2191 	ASSERT_EQ(ret, self->sibling[0].system_tid) {
2192 		TH_LOG("Did not fail on diverged sibling.");
2193 	}
2194 	sib = 1;
2195 	if (ret == self->sibling[0].system_tid)
2196 		sib = 0;
2197 
2198 	pthread_mutex_lock(&self->mutex);
2199 
2200 	/* Increment the other siblings num_waits so we can clean up
2201 	 * the one we just saw.
2202 	 */
2203 	self->sibling[!sib].num_waits += 1;
2204 
2205 	/* Signal the thread to clean up*/
2206 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2207 		TH_LOG("cond broadcast non-zero");
2208 	}
2209 	pthread_mutex_unlock(&self->mutex);
2210 	pthread_join(self->sibling[sib].tid, &status);
2211 	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2212 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2213 	while (!kill(self->sibling[sib].system_tid, 0))
2214 		sleep(0.1);
2215 	/* Switch to the remaining sibling */
2216 	sib = !sib;
2217 
2218 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2219 		      &self->apply_prog);
2220 	ASSERT_EQ(0, ret) {
2221 		TH_LOG("Expected the remaining sibling to sync");
2222 	};
2223 
2224 	pthread_mutex_lock(&self->mutex);
2225 
2226 	/* If remaining sibling didn't have a chance to wake up during
2227 	 * the first broadcast, manually reduce the num_waits now.
2228 	 */
2229 	if (self->sibling[sib].num_waits > 1)
2230 		self->sibling[sib].num_waits = 1;
2231 	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2232 		TH_LOG("cond broadcast non-zero");
2233 	}
2234 	pthread_mutex_unlock(&self->mutex);
2235 	pthread_join(self->sibling[sib].tid, &status);
2236 	EXPECT_EQ(0, (long)status);
2237 	/* Poll for actual task death. pthread_join doesn't guarantee it. */
2238 	while (!kill(self->sibling[sib].system_tid, 0))
2239 		sleep(0.1);
2240 
2241 	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2242 		      &self->apply_prog);
2243 	ASSERT_EQ(0, ret);  /* just us chickens */
2244 }
2245 
2246 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)2247 TEST(syscall_restart)
2248 {
2249 	long ret;
2250 	unsigned long msg;
2251 	pid_t child_pid;
2252 	int pipefd[2];
2253 	int status;
2254 	siginfo_t info = { };
2255 	struct sock_filter filter[] = {
2256 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2257 			 offsetof(struct seccomp_data, nr)),
2258 
2259 #ifdef __NR_sigreturn
2260 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2261 #endif
2262 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2263 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2264 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2265 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2266 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2267 
2268 		/* Allow __NR_write for easy logging. */
2269 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2270 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2271 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2272 		/* The nanosleep jump target. */
2273 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2274 		/* The restart_syscall jump target. */
2275 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2276 	};
2277 	struct sock_fprog prog = {
2278 		.len = (unsigned short)ARRAY_SIZE(filter),
2279 		.filter = filter,
2280 	};
2281 #if defined(__arm__)
2282 	struct utsname utsbuf;
2283 #endif
2284 
2285 	ASSERT_EQ(0, pipe(pipefd));
2286 
2287 	child_pid = fork();
2288 	ASSERT_LE(0, child_pid);
2289 	if (child_pid == 0) {
2290 		/* Child uses EXPECT not ASSERT to deliver status correctly. */
2291 		char buf = ' ';
2292 		struct timespec timeout = { };
2293 
2294 		/* Attach parent as tracer and stop. */
2295 		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2296 		EXPECT_EQ(0, raise(SIGSTOP));
2297 
2298 		EXPECT_EQ(0, close(pipefd[1]));
2299 
2300 		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2301 			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2302 		}
2303 
2304 		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2305 		EXPECT_EQ(0, ret) {
2306 			TH_LOG("Failed to install filter!");
2307 		}
2308 
2309 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2310 			TH_LOG("Failed to read() sync from parent");
2311 		}
2312 		EXPECT_EQ('.', buf) {
2313 			TH_LOG("Failed to get sync data from read()");
2314 		}
2315 
2316 		/* Start nanosleep to be interrupted. */
2317 		timeout.tv_sec = 1;
2318 		errno = 0;
2319 		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2320 			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2321 		}
2322 
2323 		/* Read final sync from parent. */
2324 		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2325 			TH_LOG("Failed final read() from parent");
2326 		}
2327 		EXPECT_EQ('!', buf) {
2328 			TH_LOG("Failed to get final data from read()");
2329 		}
2330 
2331 		/* Directly report the status of our test harness results. */
2332 		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2333 						     : EXIT_FAILURE);
2334 	}
2335 	EXPECT_EQ(0, close(pipefd[0]));
2336 
2337 	/* Attach to child, setup options, and release. */
2338 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2339 	ASSERT_EQ(true, WIFSTOPPED(status));
2340 	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2341 			    PTRACE_O_TRACESECCOMP));
2342 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2343 	ASSERT_EQ(1, write(pipefd[1], ".", 1));
2344 
2345 	/* Wait for nanosleep() to start. */
2346 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2347 	ASSERT_EQ(true, WIFSTOPPED(status));
2348 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2349 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2350 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2351 	ASSERT_EQ(0x100, msg);
2352 	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2353 
2354 	/* Might as well check siginfo for sanity while we're here. */
2355 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2356 	ASSERT_EQ(SIGTRAP, info.si_signo);
2357 	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2358 	EXPECT_EQ(0, info.si_errno);
2359 	EXPECT_EQ(getuid(), info.si_uid);
2360 	/* Verify signal delivery came from child (seccomp-triggered). */
2361 	EXPECT_EQ(child_pid, info.si_pid);
2362 
2363 	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2364 	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2365 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2366 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2367 	ASSERT_EQ(true, WIFSTOPPED(status));
2368 	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2369 	/* Verify signal delivery came from parent now. */
2370 	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2371 	EXPECT_EQ(getpid(), info.si_pid);
2372 
2373 	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2374 	ASSERT_EQ(0, kill(child_pid, SIGCONT));
2375 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2376 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2377 	ASSERT_EQ(true, WIFSTOPPED(status));
2378 	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2379 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2380 
2381 	/* Wait for restart_syscall() to start. */
2382 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2383 	ASSERT_EQ(true, WIFSTOPPED(status));
2384 	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2385 	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2386 	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2387 
2388 	ASSERT_EQ(0x200, msg);
2389 	ret = get_syscall(_metadata, child_pid);
2390 #if defined(__arm__)
2391 	/*
2392 	 * FIXME:
2393 	 * - native ARM registers do NOT expose true syscall.
2394 	 * - compat ARM registers on ARM64 DO expose true syscall.
2395 	 */
2396 	ASSERT_EQ(0, uname(&utsbuf));
2397 	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2398 		EXPECT_EQ(__NR_nanosleep, ret);
2399 	} else
2400 #endif
2401 	{
2402 		EXPECT_EQ(__NR_restart_syscall, ret);
2403 	}
2404 
2405 	/* Write again to end test. */
2406 	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2407 	ASSERT_EQ(1, write(pipefd[1], "!", 1));
2408 	EXPECT_EQ(0, close(pipefd[1]));
2409 
2410 	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2411 	if (WIFSIGNALED(status) || WEXITSTATUS(status))
2412 		_metadata->passed = 0;
2413 }
2414 
2415 /*
2416  * TODO:
2417  * - add microbenchmarks
2418  * - expand NNP testing
2419  * - better arch-specific TRACE and TRAP handlers.
2420  * - endianness checking when appropriate
2421  * - 64-bit arg prodding
2422  * - arch value testing (x86 modes especially)
2423  * - ...
2424  */
2425 
2426 TEST_HARNESS_MAIN
2427