1 /*
2 * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
3 * Use of this source code is governed by the GPLv2 license.
4 *
5 * Test code for seccomp bpf.
6 */
7
8 #include <sys/types.h>
9
10 /*
11 * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
12 * we need to use the kernel's siginfo.h file and trick glibc
13 * into accepting it.
14 */
15 #if !__GLIBC_PREREQ(2, 26)
16 # include <asm/siginfo.h>
17 # define __have_siginfo_t 1
18 # define __have_sigval_t 1
19 # define __have_sigevent_t 1
20 #endif
21
22 #include <errno.h>
23 #include <linux/filter.h>
24 #include <sys/prctl.h>
25 #include <sys/ptrace.h>
26 #include <sys/user.h>
27 #include <linux/prctl.h>
28 #include <linux/ptrace.h>
29 #include <linux/seccomp.h>
30 #include <pthread.h>
31 #include <semaphore.h>
32 #include <signal.h>
33 #include <stddef.h>
34 #include <stdbool.h>
35 #include <string.h>
36 #include <time.h>
37 #include <linux/elf.h>
38 #include <sys/uio.h>
39 #include <sys/utsname.h>
40 #include <sys/fcntl.h>
41 #include <sys/mman.h>
42 #include <sys/times.h>
43
44 #define _GNU_SOURCE
45 #include <unistd.h>
46 #include <sys/syscall.h>
47
48 #include "test_harness.h"
49
50 #ifndef PR_SET_PTRACER
51 # define PR_SET_PTRACER 0x59616d61
52 #endif
53
54 #ifndef PR_SET_NO_NEW_PRIVS
55 #define PR_SET_NO_NEW_PRIVS 38
56 #define PR_GET_NO_NEW_PRIVS 39
57 #endif
58
59 #ifndef PR_SECCOMP_EXT
60 #define PR_SECCOMP_EXT 43
61 #endif
62
63 #ifndef SECCOMP_EXT_ACT
64 #define SECCOMP_EXT_ACT 1
65 #endif
66
67 #ifndef SECCOMP_EXT_ACT_TSYNC
68 #define SECCOMP_EXT_ACT_TSYNC 1
69 #endif
70
71 #ifndef SECCOMP_MODE_STRICT
72 #define SECCOMP_MODE_STRICT 1
73 #endif
74
75 #ifndef SECCOMP_MODE_FILTER
76 #define SECCOMP_MODE_FILTER 2
77 #endif
78
79 #ifndef SECCOMP_RET_KILL
80 #define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */
81 #define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
82 #define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
83 #define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
84 #define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
85
86 /* Masks for the return value sections. */
87 #define SECCOMP_RET_ACTION 0x7fff0000U
88 #define SECCOMP_RET_DATA 0x0000ffffU
89
90 struct seccomp_data {
91 int nr;
92 __u32 arch;
93 __u64 instruction_pointer;
94 __u64 args[6];
95 };
96 #endif
97
98 #if __BYTE_ORDER == __LITTLE_ENDIAN
99 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
100 #elif __BYTE_ORDER == __BIG_ENDIAN
101 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
102 #else
103 #error "wut? Unknown __BYTE_ORDER?!"
104 #endif
105
106 #define SIBLING_EXIT_UNKILLED 0xbadbeef
107 #define SIBLING_EXIT_FAILURE 0xbadface
108 #define SIBLING_EXIT_NEWPRIVS 0xbadfeed
109
TEST(mode_strict_support)110 TEST(mode_strict_support)
111 {
112 long ret;
113
114 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
115 ASSERT_EQ(0, ret) {
116 TH_LOG("Kernel does not support CONFIG_SECCOMP");
117 }
118 syscall(__NR_exit, 1);
119 }
120
TEST_SIGNAL(mode_strict_cannot_call_prctl,SIGKILL)121 TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
122 {
123 long ret;
124
125 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
126 ASSERT_EQ(0, ret) {
127 TH_LOG("Kernel does not support CONFIG_SECCOMP");
128 }
129 syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
130 NULL, NULL, NULL);
131 EXPECT_FALSE(true) {
132 TH_LOG("Unreachable!");
133 }
134 }
135
136 /* Note! This doesn't test no new privs behavior */
TEST(no_new_privs_support)137 TEST(no_new_privs_support)
138 {
139 long ret;
140
141 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
142 EXPECT_EQ(0, ret) {
143 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
144 }
145 }
146
147 /* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
TEST(mode_filter_support)148 TEST(mode_filter_support)
149 {
150 long ret;
151
152 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
153 ASSERT_EQ(0, ret) {
154 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
155 }
156 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
157 EXPECT_EQ(-1, ret);
158 EXPECT_EQ(EFAULT, errno) {
159 TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
160 }
161 }
162
TEST(mode_filter_without_nnp)163 TEST(mode_filter_without_nnp)
164 {
165 struct sock_filter filter[] = {
166 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
167 };
168 struct sock_fprog prog = {
169 .len = (unsigned short)ARRAY_SIZE(filter),
170 .filter = filter,
171 };
172 long ret;
173
174 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
175 ASSERT_LE(0, ret) {
176 TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
177 }
178 errno = 0;
179 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
180 /* Succeeds with CAP_SYS_ADMIN, fails without */
181 /* TODO(wad) check caps not euid */
182 if (geteuid()) {
183 EXPECT_EQ(-1, ret);
184 EXPECT_EQ(EACCES, errno);
185 } else {
186 EXPECT_EQ(0, ret);
187 }
188 }
189
190 #define MAX_INSNS_PER_PATH 32768
191
TEST(filter_size_limits)192 TEST(filter_size_limits)
193 {
194 int i;
195 int count = BPF_MAXINSNS + 1;
196 struct sock_filter allow[] = {
197 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
198 };
199 struct sock_filter *filter;
200 struct sock_fprog prog = { };
201 long ret;
202
203 filter = calloc(count, sizeof(*filter));
204 ASSERT_NE(NULL, filter);
205
206 for (i = 0; i < count; i++)
207 filter[i] = allow[0];
208
209 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
210 ASSERT_EQ(0, ret);
211
212 prog.filter = filter;
213 prog.len = count;
214
215 /* Too many filter instructions in a single filter. */
216 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
217 ASSERT_NE(0, ret) {
218 TH_LOG("Installing %d insn filter was allowed", prog.len);
219 }
220
221 /* One less is okay, though. */
222 prog.len -= 1;
223 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
224 ASSERT_EQ(0, ret) {
225 TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
226 }
227 }
228
TEST(filter_chain_limits)229 TEST(filter_chain_limits)
230 {
231 int i;
232 int count = BPF_MAXINSNS;
233 struct sock_filter allow[] = {
234 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
235 };
236 struct sock_filter *filter;
237 struct sock_fprog prog = { };
238 long ret;
239
240 filter = calloc(count, sizeof(*filter));
241 ASSERT_NE(NULL, filter);
242
243 for (i = 0; i < count; i++)
244 filter[i] = allow[0];
245
246 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
247 ASSERT_EQ(0, ret);
248
249 prog.filter = filter;
250 prog.len = 1;
251
252 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
253 ASSERT_EQ(0, ret);
254
255 prog.len = count;
256
257 /* Too many total filter instructions. */
258 for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
259 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
260 if (ret != 0)
261 break;
262 }
263 ASSERT_NE(0, ret) {
264 TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
265 i, count, i * (count + 4));
266 }
267 }
268
TEST(mode_filter_cannot_move_to_strict)269 TEST(mode_filter_cannot_move_to_strict)
270 {
271 struct sock_filter filter[] = {
272 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
273 };
274 struct sock_fprog prog = {
275 .len = (unsigned short)ARRAY_SIZE(filter),
276 .filter = filter,
277 };
278 long ret;
279
280 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
281 ASSERT_EQ(0, ret);
282
283 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
284 ASSERT_EQ(0, ret);
285
286 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
287 EXPECT_EQ(-1, ret);
288 EXPECT_EQ(EINVAL, errno);
289 }
290
291
TEST(mode_filter_get_seccomp)292 TEST(mode_filter_get_seccomp)
293 {
294 struct sock_filter filter[] = {
295 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
296 };
297 struct sock_fprog prog = {
298 .len = (unsigned short)ARRAY_SIZE(filter),
299 .filter = filter,
300 };
301 long ret;
302
303 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
304 ASSERT_EQ(0, ret);
305
306 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
307 EXPECT_EQ(0, ret);
308
309 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
310 ASSERT_EQ(0, ret);
311
312 ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
313 EXPECT_EQ(2, ret);
314 }
315
316
TEST(ALLOW_all)317 TEST(ALLOW_all)
318 {
319 struct sock_filter filter[] = {
320 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
321 };
322 struct sock_fprog prog = {
323 .len = (unsigned short)ARRAY_SIZE(filter),
324 .filter = filter,
325 };
326 long ret;
327
328 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
329 ASSERT_EQ(0, ret);
330
331 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
332 ASSERT_EQ(0, ret);
333 }
334
TEST(empty_prog)335 TEST(empty_prog)
336 {
337 struct sock_filter filter[] = {
338 };
339 struct sock_fprog prog = {
340 .len = (unsigned short)ARRAY_SIZE(filter),
341 .filter = filter,
342 };
343 long ret;
344
345 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
346 ASSERT_EQ(0, ret);
347
348 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
349 EXPECT_EQ(-1, ret);
350 EXPECT_EQ(EINVAL, errno);
351 }
352
TEST_SIGNAL(unknown_ret_is_kill_inside,SIGSYS)353 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
354 {
355 struct sock_filter filter[] = {
356 BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
357 };
358 struct sock_fprog prog = {
359 .len = (unsigned short)ARRAY_SIZE(filter),
360 .filter = filter,
361 };
362 long ret;
363
364 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
365 ASSERT_EQ(0, ret);
366
367 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
368 ASSERT_EQ(0, ret);
369 EXPECT_EQ(0, syscall(__NR_getpid)) {
370 TH_LOG("getpid() shouldn't ever return");
371 }
372 }
373
374 /* return code >= 0x80000000 is unused. */
TEST_SIGNAL(unknown_ret_is_kill_above_allow,SIGSYS)375 TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
376 {
377 struct sock_filter filter[] = {
378 BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
379 };
380 struct sock_fprog prog = {
381 .len = (unsigned short)ARRAY_SIZE(filter),
382 .filter = filter,
383 };
384 long ret;
385
386 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
387 ASSERT_EQ(0, ret);
388
389 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
390 ASSERT_EQ(0, ret);
391 EXPECT_EQ(0, syscall(__NR_getpid)) {
392 TH_LOG("getpid() shouldn't ever return");
393 }
394 }
395
TEST_SIGNAL(KILL_all,SIGSYS)396 TEST_SIGNAL(KILL_all, SIGSYS)
397 {
398 struct sock_filter filter[] = {
399 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
400 };
401 struct sock_fprog prog = {
402 .len = (unsigned short)ARRAY_SIZE(filter),
403 .filter = filter,
404 };
405 long ret;
406
407 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
408 ASSERT_EQ(0, ret);
409
410 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
411 ASSERT_EQ(0, ret);
412 }
413
TEST_SIGNAL(KILL_one,SIGSYS)414 TEST_SIGNAL(KILL_one, SIGSYS)
415 {
416 struct sock_filter filter[] = {
417 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
418 offsetof(struct seccomp_data, nr)),
419 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
420 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
421 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
422 };
423 struct sock_fprog prog = {
424 .len = (unsigned short)ARRAY_SIZE(filter),
425 .filter = filter,
426 };
427 long ret;
428 pid_t parent = getppid();
429
430 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
431 ASSERT_EQ(0, ret);
432
433 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
434 ASSERT_EQ(0, ret);
435
436 EXPECT_EQ(parent, syscall(__NR_getppid));
437 /* getpid() should never return. */
438 EXPECT_EQ(0, syscall(__NR_getpid));
439 }
440
TEST_SIGNAL(KILL_one_arg_one,SIGSYS)441 TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
442 {
443 void *fatal_address;
444 struct sock_filter filter[] = {
445 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
446 offsetof(struct seccomp_data, nr)),
447 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
448 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
449 /* Only both with lower 32-bit for now. */
450 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
451 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
452 (unsigned long)&fatal_address, 0, 1),
453 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
454 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
455 };
456 struct sock_fprog prog = {
457 .len = (unsigned short)ARRAY_SIZE(filter),
458 .filter = filter,
459 };
460 long ret;
461 pid_t parent = getppid();
462 struct tms timebuf;
463 clock_t clock = times(&timebuf);
464
465 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
466 ASSERT_EQ(0, ret);
467
468 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
469 ASSERT_EQ(0, ret);
470
471 EXPECT_EQ(parent, syscall(__NR_getppid));
472 EXPECT_LE(clock, syscall(__NR_times, &timebuf));
473 /* times() should never return. */
474 EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
475 }
476
TEST_SIGNAL(KILL_one_arg_six,SIGSYS)477 TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
478 {
479 #ifndef __NR_mmap2
480 int sysno = __NR_mmap;
481 #else
482 int sysno = __NR_mmap2;
483 #endif
484 struct sock_filter filter[] = {
485 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
486 offsetof(struct seccomp_data, nr)),
487 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
488 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
489 /* Only both with lower 32-bit for now. */
490 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
491 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
492 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
493 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
494 };
495 struct sock_fprog prog = {
496 .len = (unsigned short)ARRAY_SIZE(filter),
497 .filter = filter,
498 };
499 long ret;
500 pid_t parent = getppid();
501 int fd;
502 void *map1, *map2;
503 int page_size = sysconf(_SC_PAGESIZE);
504
505 ASSERT_LT(0, page_size);
506
507 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
508 ASSERT_EQ(0, ret);
509
510 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
511 ASSERT_EQ(0, ret);
512
513 fd = open("/dev/zero", O_RDONLY);
514 ASSERT_NE(-1, fd);
515
516 EXPECT_EQ(parent, syscall(__NR_getppid));
517 map1 = (void *)syscall(sysno,
518 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
519 EXPECT_NE(MAP_FAILED, map1);
520 /* mmap2() should never return. */
521 map2 = (void *)syscall(sysno,
522 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
523 EXPECT_EQ(MAP_FAILED, map2);
524
525 /* The test failed, so clean up the resources. */
526 munmap(map1, page_size);
527 munmap(map2, page_size);
528 close(fd);
529 }
530
531 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)532 TEST(arg_out_of_range)
533 {
534 struct sock_filter filter[] = {
535 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
536 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
537 };
538 struct sock_fprog prog = {
539 .len = (unsigned short)ARRAY_SIZE(filter),
540 .filter = filter,
541 };
542 long ret;
543
544 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
545 ASSERT_EQ(0, ret);
546
547 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
548 EXPECT_EQ(-1, ret);
549 EXPECT_EQ(EINVAL, errno);
550 }
551
TEST(ERRNO_valid)552 TEST(ERRNO_valid)
553 {
554 struct sock_filter filter[] = {
555 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
556 offsetof(struct seccomp_data, nr)),
557 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
558 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
559 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
560 };
561 struct sock_fprog prog = {
562 .len = (unsigned short)ARRAY_SIZE(filter),
563 .filter = filter,
564 };
565 long ret;
566 pid_t parent = getppid();
567
568 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
569 ASSERT_EQ(0, ret);
570
571 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
572 ASSERT_EQ(0, ret);
573
574 EXPECT_EQ(parent, syscall(__NR_getppid));
575 EXPECT_EQ(-1, read(0, NULL, 0));
576 EXPECT_EQ(E2BIG, errno);
577 }
578
TEST(ERRNO_zero)579 TEST(ERRNO_zero)
580 {
581 struct sock_filter filter[] = {
582 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
583 offsetof(struct seccomp_data, nr)),
584 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
585 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
586 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
587 };
588 struct sock_fprog prog = {
589 .len = (unsigned short)ARRAY_SIZE(filter),
590 .filter = filter,
591 };
592 long ret;
593 pid_t parent = getppid();
594
595 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
596 ASSERT_EQ(0, ret);
597
598 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
599 ASSERT_EQ(0, ret);
600
601 EXPECT_EQ(parent, syscall(__NR_getppid));
602 /* "errno" of 0 is ok. */
603 EXPECT_EQ(0, read(0, NULL, 0));
604 }
605
TEST(ERRNO_capped)606 TEST(ERRNO_capped)
607 {
608 struct sock_filter filter[] = {
609 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
610 offsetof(struct seccomp_data, nr)),
611 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
612 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
613 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
614 };
615 struct sock_fprog prog = {
616 .len = (unsigned short)ARRAY_SIZE(filter),
617 .filter = filter,
618 };
619 long ret;
620 pid_t parent = getppid();
621
622 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
623 ASSERT_EQ(0, ret);
624
625 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
626 ASSERT_EQ(0, ret);
627
628 EXPECT_EQ(parent, syscall(__NR_getppid));
629 EXPECT_EQ(-1, read(0, NULL, 0));
630 EXPECT_EQ(4095, errno);
631 }
632
FIXTURE_DATA(TRAP)633 FIXTURE_DATA(TRAP) {
634 struct sock_fprog prog;
635 };
636
FIXTURE_SETUP(TRAP)637 FIXTURE_SETUP(TRAP)
638 {
639 struct sock_filter filter[] = {
640 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
641 offsetof(struct seccomp_data, nr)),
642 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
643 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
644 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
645 };
646
647 memset(&self->prog, 0, sizeof(self->prog));
648 self->prog.filter = malloc(sizeof(filter));
649 ASSERT_NE(NULL, self->prog.filter);
650 memcpy(self->prog.filter, filter, sizeof(filter));
651 self->prog.len = (unsigned short)ARRAY_SIZE(filter);
652 }
653
FIXTURE_TEARDOWN(TRAP)654 FIXTURE_TEARDOWN(TRAP)
655 {
656 if (self->prog.filter)
657 free(self->prog.filter);
658 }
659
TEST_F_SIGNAL(TRAP,dfl,SIGSYS)660 TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
661 {
662 long ret;
663
664 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
665 ASSERT_EQ(0, ret);
666
667 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
668 ASSERT_EQ(0, ret);
669 syscall(__NR_getpid);
670 }
671
672 /* Ensure that SIGSYS overrides SIG_IGN */
TEST_F_SIGNAL(TRAP,ign,SIGSYS)673 TEST_F_SIGNAL(TRAP, ign, SIGSYS)
674 {
675 long ret;
676
677 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
678 ASSERT_EQ(0, ret);
679
680 signal(SIGSYS, SIG_IGN);
681
682 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
683 ASSERT_EQ(0, ret);
684 syscall(__NR_getpid);
685 }
686
687 static siginfo_t TRAP_info;
688 static volatile int TRAP_nr;
TRAP_action(int nr,siginfo_t * info,void * void_context)689 static void TRAP_action(int nr, siginfo_t *info, void *void_context)
690 {
691 memcpy(&TRAP_info, info, sizeof(TRAP_info));
692 TRAP_nr = nr;
693 }
694
TEST_F(TRAP,handler)695 TEST_F(TRAP, handler)
696 {
697 int ret, test;
698 struct sigaction act;
699 sigset_t mask;
700
701 memset(&act, 0, sizeof(act));
702 sigemptyset(&mask);
703 sigaddset(&mask, SIGSYS);
704
705 act.sa_sigaction = &TRAP_action;
706 act.sa_flags = SA_SIGINFO;
707 ret = sigaction(SIGSYS, &act, NULL);
708 ASSERT_EQ(0, ret) {
709 TH_LOG("sigaction failed");
710 }
711 ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
712 ASSERT_EQ(0, ret) {
713 TH_LOG("sigprocmask failed");
714 }
715
716 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
717 ASSERT_EQ(0, ret);
718 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
719 ASSERT_EQ(0, ret);
720 TRAP_nr = 0;
721 memset(&TRAP_info, 0, sizeof(TRAP_info));
722 /* Expect the registers to be rolled back. (nr = error) may vary
723 * based on arch. */
724 ret = syscall(__NR_getpid);
725 /* Silence gcc warning about volatile. */
726 test = TRAP_nr;
727 EXPECT_EQ(SIGSYS, test);
728 struct local_sigsys {
729 void *_call_addr; /* calling user insn */
730 int _syscall; /* triggering system call number */
731 unsigned int _arch; /* AUDIT_ARCH_* of syscall */
732 } *sigsys = (struct local_sigsys *)
733 #ifdef si_syscall
734 &(TRAP_info.si_call_addr);
735 #else
736 &TRAP_info.si_pid;
737 #endif
738 EXPECT_EQ(__NR_getpid, sigsys->_syscall);
739 /* Make sure arch is non-zero. */
740 EXPECT_NE(0, sigsys->_arch);
741 EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
742 }
743
FIXTURE_DATA(precedence)744 FIXTURE_DATA(precedence) {
745 struct sock_fprog allow;
746 struct sock_fprog trace;
747 struct sock_fprog error;
748 struct sock_fprog trap;
749 struct sock_fprog kill;
750 };
751
FIXTURE_SETUP(precedence)752 FIXTURE_SETUP(precedence)
753 {
754 struct sock_filter allow_insns[] = {
755 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
756 };
757 struct sock_filter trace_insns[] = {
758 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
759 offsetof(struct seccomp_data, nr)),
760 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
761 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
762 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
763 };
764 struct sock_filter error_insns[] = {
765 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
766 offsetof(struct seccomp_data, nr)),
767 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
768 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
769 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
770 };
771 struct sock_filter trap_insns[] = {
772 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
773 offsetof(struct seccomp_data, nr)),
774 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
775 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
776 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
777 };
778 struct sock_filter kill_insns[] = {
779 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
780 offsetof(struct seccomp_data, nr)),
781 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
782 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
783 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
784 };
785
786 memset(self, 0, sizeof(*self));
787 #define FILTER_ALLOC(_x) \
788 self->_x.filter = malloc(sizeof(_x##_insns)); \
789 ASSERT_NE(NULL, self->_x.filter); \
790 memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
791 self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
792 FILTER_ALLOC(allow);
793 FILTER_ALLOC(trace);
794 FILTER_ALLOC(error);
795 FILTER_ALLOC(trap);
796 FILTER_ALLOC(kill);
797 }
798
FIXTURE_TEARDOWN(precedence)799 FIXTURE_TEARDOWN(precedence)
800 {
801 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
802 FILTER_FREE(allow);
803 FILTER_FREE(trace);
804 FILTER_FREE(error);
805 FILTER_FREE(trap);
806 FILTER_FREE(kill);
807 }
808
TEST_F(precedence,allow_ok)809 TEST_F(precedence, allow_ok)
810 {
811 pid_t parent, res = 0;
812 long ret;
813
814 parent = getppid();
815 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
816 ASSERT_EQ(0, ret);
817
818 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
819 ASSERT_EQ(0, ret);
820 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
821 ASSERT_EQ(0, ret);
822 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
823 ASSERT_EQ(0, ret);
824 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
825 ASSERT_EQ(0, ret);
826 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
827 ASSERT_EQ(0, ret);
828 /* Should work just fine. */
829 res = syscall(__NR_getppid);
830 EXPECT_EQ(parent, res);
831 }
832
TEST_F_SIGNAL(precedence,kill_is_highest,SIGSYS)833 TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
834 {
835 pid_t parent, res = 0;
836 long ret;
837
838 parent = getppid();
839 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
840 ASSERT_EQ(0, ret);
841
842 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
843 ASSERT_EQ(0, ret);
844 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
845 ASSERT_EQ(0, ret);
846 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
847 ASSERT_EQ(0, ret);
848 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
849 ASSERT_EQ(0, ret);
850 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
851 ASSERT_EQ(0, ret);
852 /* Should work just fine. */
853 res = syscall(__NR_getppid);
854 EXPECT_EQ(parent, res);
855 /* getpid() should never return. */
856 res = syscall(__NR_getpid);
857 EXPECT_EQ(0, res);
858 }
859
TEST_F_SIGNAL(precedence,kill_is_highest_in_any_order,SIGSYS)860 TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
861 {
862 pid_t parent;
863 long ret;
864
865 parent = getppid();
866 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
867 ASSERT_EQ(0, ret);
868
869 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
870 ASSERT_EQ(0, ret);
871 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
872 ASSERT_EQ(0, ret);
873 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
874 ASSERT_EQ(0, ret);
875 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
876 ASSERT_EQ(0, ret);
877 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
878 ASSERT_EQ(0, ret);
879 /* Should work just fine. */
880 EXPECT_EQ(parent, syscall(__NR_getppid));
881 /* getpid() should never return. */
882 EXPECT_EQ(0, syscall(__NR_getpid));
883 }
884
TEST_F_SIGNAL(precedence,trap_is_second,SIGSYS)885 TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
886 {
887 pid_t parent;
888 long ret;
889
890 parent = getppid();
891 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
892 ASSERT_EQ(0, ret);
893
894 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
895 ASSERT_EQ(0, ret);
896 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
897 ASSERT_EQ(0, ret);
898 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
899 ASSERT_EQ(0, ret);
900 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
901 ASSERT_EQ(0, ret);
902 /* Should work just fine. */
903 EXPECT_EQ(parent, syscall(__NR_getppid));
904 /* getpid() should never return. */
905 EXPECT_EQ(0, syscall(__NR_getpid));
906 }
907
TEST_F_SIGNAL(precedence,trap_is_second_in_any_order,SIGSYS)908 TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
909 {
910 pid_t parent;
911 long ret;
912
913 parent = getppid();
914 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
915 ASSERT_EQ(0, ret);
916
917 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
918 ASSERT_EQ(0, ret);
919 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
920 ASSERT_EQ(0, ret);
921 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
922 ASSERT_EQ(0, ret);
923 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
924 ASSERT_EQ(0, ret);
925 /* Should work just fine. */
926 EXPECT_EQ(parent, syscall(__NR_getppid));
927 /* getpid() should never return. */
928 EXPECT_EQ(0, syscall(__NR_getpid));
929 }
930
TEST_F(precedence,errno_is_third)931 TEST_F(precedence, errno_is_third)
932 {
933 pid_t parent;
934 long ret;
935
936 parent = getppid();
937 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
938 ASSERT_EQ(0, ret);
939
940 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
941 ASSERT_EQ(0, ret);
942 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
943 ASSERT_EQ(0, ret);
944 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
945 ASSERT_EQ(0, ret);
946 /* Should work just fine. */
947 EXPECT_EQ(parent, syscall(__NR_getppid));
948 EXPECT_EQ(0, syscall(__NR_getpid));
949 }
950
TEST_F(precedence,errno_is_third_in_any_order)951 TEST_F(precedence, errno_is_third_in_any_order)
952 {
953 pid_t parent;
954 long ret;
955
956 parent = getppid();
957 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
958 ASSERT_EQ(0, ret);
959
960 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
961 ASSERT_EQ(0, ret);
962 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
963 ASSERT_EQ(0, ret);
964 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
965 ASSERT_EQ(0, ret);
966 /* Should work just fine. */
967 EXPECT_EQ(parent, syscall(__NR_getppid));
968 EXPECT_EQ(0, syscall(__NR_getpid));
969 }
970
TEST_F(precedence,trace_is_fourth)971 TEST_F(precedence, trace_is_fourth)
972 {
973 pid_t parent;
974 long ret;
975
976 parent = getppid();
977 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
978 ASSERT_EQ(0, ret);
979
980 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
981 ASSERT_EQ(0, ret);
982 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
983 ASSERT_EQ(0, ret);
984 /* Should work just fine. */
985 EXPECT_EQ(parent, syscall(__NR_getppid));
986 /* No ptracer */
987 EXPECT_EQ(-1, syscall(__NR_getpid));
988 }
989
TEST_F(precedence,trace_is_fourth_in_any_order)990 TEST_F(precedence, trace_is_fourth_in_any_order)
991 {
992 pid_t parent;
993 long ret;
994
995 parent = getppid();
996 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
997 ASSERT_EQ(0, ret);
998
999 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
1000 ASSERT_EQ(0, ret);
1001 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
1002 ASSERT_EQ(0, ret);
1003 /* Should work just fine. */
1004 EXPECT_EQ(parent, syscall(__NR_getppid));
1005 /* No ptracer */
1006 EXPECT_EQ(-1, syscall(__NR_getpid));
1007 }
1008
1009 #ifndef PTRACE_O_TRACESECCOMP
1010 #define PTRACE_O_TRACESECCOMP 0x00000080
1011 #endif
1012
1013 /* Catch the Ubuntu 12.04 value error. */
1014 #if PTRACE_EVENT_SECCOMP != 7
1015 #undef PTRACE_EVENT_SECCOMP
1016 #endif
1017
1018 #ifndef PTRACE_EVENT_SECCOMP
1019 #define PTRACE_EVENT_SECCOMP 7
1020 #endif
1021
1022 #define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
1023 bool tracer_running;
tracer_stop(int sig)1024 void tracer_stop(int sig)
1025 {
1026 tracer_running = false;
1027 }
1028
1029 typedef void tracer_func_t(struct __test_metadata *_metadata,
1030 pid_t tracee, int status, void *args);
1031
start_tracer(struct __test_metadata * _metadata,int fd,pid_t tracee,tracer_func_t tracer_func,void * args,bool ptrace_syscall)1032 void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
1033 tracer_func_t tracer_func, void *args, bool ptrace_syscall)
1034 {
1035 int ret = -1;
1036 struct sigaction action = {
1037 .sa_handler = tracer_stop,
1038 };
1039
1040 /* Allow external shutdown. */
1041 tracer_running = true;
1042 ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
1043
1044 errno = 0;
1045 while (ret == -1 && errno != EINVAL)
1046 ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
1047 ASSERT_EQ(0, ret) {
1048 kill(tracee, SIGKILL);
1049 }
1050 /* Wait for attach stop */
1051 wait(NULL);
1052
1053 ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
1054 PTRACE_O_TRACESYSGOOD :
1055 PTRACE_O_TRACESECCOMP);
1056 ASSERT_EQ(0, ret) {
1057 TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
1058 kill(tracee, SIGKILL);
1059 }
1060 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1061 tracee, NULL, 0);
1062 ASSERT_EQ(0, ret);
1063
1064 /* Unblock the tracee */
1065 ASSERT_EQ(1, write(fd, "A", 1));
1066 ASSERT_EQ(0, close(fd));
1067
1068 /* Run until we're shut down. Must assert to stop execution. */
1069 while (tracer_running) {
1070 int status;
1071
1072 if (wait(&status) != tracee)
1073 continue;
1074 if (WIFSIGNALED(status) || WIFEXITED(status))
1075 /* Child is dead. Time to go. */
1076 return;
1077
1078 /* Check if this is a seccomp event. */
1079 ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
1080
1081 tracer_func(_metadata, tracee, status, args);
1082
1083 ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
1084 tracee, NULL, 0);
1085 ASSERT_EQ(0, ret);
1086 }
1087 /* Directly report the status of our test harness results. */
1088 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
1089 }
1090
1091 /* Common tracer setup/teardown functions. */
cont_handler(int num)1092 void cont_handler(int num)
1093 { }
setup_trace_fixture(struct __test_metadata * _metadata,tracer_func_t func,void * args,bool ptrace_syscall)1094 pid_t setup_trace_fixture(struct __test_metadata *_metadata,
1095 tracer_func_t func, void *args, bool ptrace_syscall)
1096 {
1097 char sync;
1098 int pipefd[2];
1099 pid_t tracer_pid;
1100 pid_t tracee = getpid();
1101
1102 /* Setup a pipe for clean synchronization. */
1103 ASSERT_EQ(0, pipe(pipefd));
1104
1105 /* Fork a child which we'll promote to tracer */
1106 tracer_pid = fork();
1107 ASSERT_LE(0, tracer_pid);
1108 signal(SIGALRM, cont_handler);
1109 if (tracer_pid == 0) {
1110 close(pipefd[0]);
1111 start_tracer(_metadata, pipefd[1], tracee, func, args,
1112 ptrace_syscall);
1113 syscall(__NR_exit, 0);
1114 }
1115 close(pipefd[1]);
1116 prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
1117 read(pipefd[0], &sync, 1);
1118 close(pipefd[0]);
1119
1120 return tracer_pid;
1121 }
teardown_trace_fixture(struct __test_metadata * _metadata,pid_t tracer)1122 void teardown_trace_fixture(struct __test_metadata *_metadata,
1123 pid_t tracer)
1124 {
1125 if (tracer) {
1126 int status;
1127 /*
1128 * Extract the exit code from the other process and
1129 * adopt it for ourselves in case its asserts failed.
1130 */
1131 ASSERT_EQ(0, kill(tracer, SIGUSR1));
1132 ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
1133 if (WEXITSTATUS(status))
1134 _metadata->passed = 0;
1135 }
1136 }
1137
1138 /* "poke" tracer arguments and function. */
1139 struct tracer_args_poke_t {
1140 unsigned long poke_addr;
1141 };
1142
tracer_poke(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1143 void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
1144 void *args)
1145 {
1146 int ret;
1147 unsigned long msg;
1148 struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
1149
1150 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1151 EXPECT_EQ(0, ret);
1152 /* If this fails, don't try to recover. */
1153 ASSERT_EQ(0x1001, msg) {
1154 kill(tracee, SIGKILL);
1155 }
1156 /*
1157 * Poke in the message.
1158 * Registers are not touched to try to keep this relatively arch
1159 * agnostic.
1160 */
1161 ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
1162 EXPECT_EQ(0, ret);
1163 }
1164
FIXTURE_DATA(TRACE_poke)1165 FIXTURE_DATA(TRACE_poke) {
1166 struct sock_fprog prog;
1167 pid_t tracer;
1168 long poked;
1169 struct tracer_args_poke_t tracer_args;
1170 };
1171
FIXTURE_SETUP(TRACE_poke)1172 FIXTURE_SETUP(TRACE_poke)
1173 {
1174 struct sock_filter filter[] = {
1175 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1176 offsetof(struct seccomp_data, nr)),
1177 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1178 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
1179 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1180 };
1181
1182 self->poked = 0;
1183 memset(&self->prog, 0, sizeof(self->prog));
1184 self->prog.filter = malloc(sizeof(filter));
1185 ASSERT_NE(NULL, self->prog.filter);
1186 memcpy(self->prog.filter, filter, sizeof(filter));
1187 self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1188
1189 /* Set up tracer args. */
1190 self->tracer_args.poke_addr = (unsigned long)&self->poked;
1191
1192 /* Launch tracer. */
1193 self->tracer = setup_trace_fixture(_metadata, tracer_poke,
1194 &self->tracer_args, false);
1195 }
1196
FIXTURE_TEARDOWN(TRACE_poke)1197 FIXTURE_TEARDOWN(TRACE_poke)
1198 {
1199 teardown_trace_fixture(_metadata, self->tracer);
1200 if (self->prog.filter)
1201 free(self->prog.filter);
1202 }
1203
TEST_F(TRACE_poke,read_has_side_effects)1204 TEST_F(TRACE_poke, read_has_side_effects)
1205 {
1206 ssize_t ret;
1207
1208 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1209 ASSERT_EQ(0, ret);
1210
1211 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1212 ASSERT_EQ(0, ret);
1213
1214 EXPECT_EQ(0, self->poked);
1215 ret = read(-1, NULL, 0);
1216 EXPECT_EQ(-1, ret);
1217 EXPECT_EQ(0x1001, self->poked);
1218 }
1219
TEST_F(TRACE_poke,getpid_runs_normally)1220 TEST_F(TRACE_poke, getpid_runs_normally)
1221 {
1222 long ret;
1223
1224 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1225 ASSERT_EQ(0, ret);
1226
1227 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1228 ASSERT_EQ(0, ret);
1229
1230 EXPECT_EQ(0, self->poked);
1231 EXPECT_NE(0, syscall(__NR_getpid));
1232 EXPECT_EQ(0, self->poked);
1233 }
1234
1235 #if defined(__x86_64__)
1236 # define ARCH_REGS struct user_regs_struct
1237 # define SYSCALL_NUM orig_rax
1238 # define SYSCALL_RET rax
1239 #elif defined(__i386__)
1240 # define ARCH_REGS struct user_regs_struct
1241 # define SYSCALL_NUM orig_eax
1242 # define SYSCALL_RET eax
1243 #elif defined(__arm__)
1244 # define ARCH_REGS struct pt_regs
1245 # define SYSCALL_NUM ARM_r7
1246 # define SYSCALL_RET ARM_r0
1247 #elif defined(__aarch64__)
1248 # define ARCH_REGS struct user_pt_regs
1249 # define SYSCALL_NUM regs[8]
1250 # define SYSCALL_RET regs[0]
1251 #elif defined(__hppa__)
1252 # define ARCH_REGS struct user_regs_struct
1253 # define SYSCALL_NUM gr[20]
1254 # define SYSCALL_RET gr[28]
1255 #elif defined(__powerpc__)
1256 # define ARCH_REGS struct pt_regs
1257 # define SYSCALL_NUM gpr[0]
1258 # define SYSCALL_RET gpr[3]
1259 #elif defined(__s390__)
1260 # define ARCH_REGS s390_regs
1261 # define SYSCALL_NUM gprs[2]
1262 # define SYSCALL_RET gprs[2]
1263 #elif defined(__mips__)
1264 # define ARCH_REGS struct pt_regs
1265 # define SYSCALL_NUM regs[2]
1266 # define SYSCALL_SYSCALL_NUM regs[4]
1267 # define SYSCALL_RET regs[2]
1268 # define SYSCALL_NUM_RET_SHARE_REG
1269 #else
1270 # error "Do not know how to find your architecture's registers and syscalls"
1271 #endif
1272
1273 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
1274 * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
1275 */
1276 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
1277 #define HAVE_GETREGS
1278 #endif
1279
1280 /* Architecture-specific syscall fetching routine. */
get_syscall(struct __test_metadata * _metadata,pid_t tracee)1281 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
1282 {
1283 ARCH_REGS regs;
1284 #ifdef HAVE_GETREGS
1285 EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) {
1286 TH_LOG("PTRACE_GETREGS failed");
1287 return -1;
1288 }
1289 #else
1290 struct iovec iov;
1291
1292 iov.iov_base = ®s;
1293 iov.iov_len = sizeof(regs);
1294 EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
1295 TH_LOG("PTRACE_GETREGSET failed");
1296 return -1;
1297 }
1298 #endif
1299
1300 #if defined(__mips__)
1301 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1302 return regs.SYSCALL_SYSCALL_NUM;
1303 #endif
1304 return regs.SYSCALL_NUM;
1305 }
1306
1307 /* Architecture-specific syscall changing routine. */
change_syscall(struct __test_metadata * _metadata,pid_t tracee,int syscall)1308 void change_syscall(struct __test_metadata *_metadata,
1309 pid_t tracee, int syscall)
1310 {
1311 int ret;
1312 ARCH_REGS regs;
1313 #ifdef HAVE_GETREGS
1314 ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s);
1315 #else
1316 struct iovec iov;
1317 iov.iov_base = ®s;
1318 iov.iov_len = sizeof(regs);
1319 ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
1320 #endif
1321 EXPECT_EQ(0, ret) {}
1322
1323 #if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
1324 defined(__s390__) || defined(__hppa__)
1325 {
1326 regs.SYSCALL_NUM = syscall;
1327 }
1328 #elif defined(__mips__)
1329 {
1330 if (regs.SYSCALL_NUM == __NR_O32_Linux)
1331 regs.SYSCALL_SYSCALL_NUM = syscall;
1332 else
1333 regs.SYSCALL_NUM = syscall;
1334 }
1335
1336 #elif defined(__arm__)
1337 # ifndef PTRACE_SET_SYSCALL
1338 # define PTRACE_SET_SYSCALL 23
1339 # endif
1340 {
1341 ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
1342 EXPECT_EQ(0, ret);
1343 }
1344
1345 #elif defined(__aarch64__)
1346 # ifndef NT_ARM_SYSTEM_CALL
1347 # define NT_ARM_SYSTEM_CALL 0x404
1348 # endif
1349 {
1350 iov.iov_base = &syscall;
1351 iov.iov_len = sizeof(syscall);
1352 ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
1353 &iov);
1354 EXPECT_EQ(0, ret);
1355 }
1356
1357 #else
1358 ASSERT_EQ(1, 0) {
1359 TH_LOG("How is the syscall changed on this architecture?");
1360 }
1361 #endif
1362
1363 /* If syscall is skipped, change return value. */
1364 if (syscall == -1)
1365 #ifdef SYSCALL_NUM_RET_SHARE_REG
1366 TH_LOG("Can't modify syscall return on this architecture");
1367 #else
1368 regs.SYSCALL_RET = 1;
1369 #endif
1370
1371 #ifdef HAVE_GETREGS
1372 ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s);
1373 #else
1374 iov.iov_base = ®s;
1375 iov.iov_len = sizeof(regs);
1376 ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
1377 #endif
1378 EXPECT_EQ(0, ret);
1379 }
1380
tracer_syscall(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1381 void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
1382 int status, void *args)
1383 {
1384 int ret;
1385 unsigned long msg;
1386
1387 /* Make sure we got the right message. */
1388 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1389 EXPECT_EQ(0, ret);
1390
1391 /* Validate and take action on expected syscalls. */
1392 switch (msg) {
1393 case 0x1002:
1394 /* change getpid to getppid. */
1395 EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
1396 change_syscall(_metadata, tracee, __NR_getppid);
1397 break;
1398 case 0x1003:
1399 /* skip gettid. */
1400 EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
1401 change_syscall(_metadata, tracee, -1);
1402 break;
1403 case 0x1004:
1404 /* do nothing (allow getppid) */
1405 EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
1406 break;
1407 default:
1408 EXPECT_EQ(0, msg) {
1409 TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
1410 kill(tracee, SIGKILL);
1411 }
1412 }
1413
1414 }
1415
tracer_ptrace(struct __test_metadata * _metadata,pid_t tracee,int status,void * args)1416 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
1417 int status, void *args)
1418 {
1419 int ret, nr;
1420 unsigned long msg;
1421 static bool entry;
1422
1423 /* Make sure we got an empty message. */
1424 ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
1425 EXPECT_EQ(0, ret);
1426 EXPECT_EQ(0, msg);
1427
1428 /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
1429 entry = !entry;
1430 if (!entry)
1431 return;
1432
1433 nr = get_syscall(_metadata, tracee);
1434
1435 if (nr == __NR_getpid)
1436 change_syscall(_metadata, tracee, __NR_getppid);
1437 }
1438
FIXTURE_DATA(TRACE_syscall)1439 FIXTURE_DATA(TRACE_syscall) {
1440 struct sock_fprog prog;
1441 pid_t tracer, mytid, mypid, parent;
1442 };
1443
FIXTURE_SETUP(TRACE_syscall)1444 FIXTURE_SETUP(TRACE_syscall)
1445 {
1446 struct sock_filter filter[] = {
1447 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1448 offsetof(struct seccomp_data, nr)),
1449 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
1450 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
1451 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
1452 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
1453 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1454 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
1455 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1456 };
1457
1458 memset(&self->prog, 0, sizeof(self->prog));
1459 self->prog.filter = malloc(sizeof(filter));
1460 ASSERT_NE(NULL, self->prog.filter);
1461 memcpy(self->prog.filter, filter, sizeof(filter));
1462 self->prog.len = (unsigned short)ARRAY_SIZE(filter);
1463
1464 /* Prepare some testable syscall results. */
1465 self->mytid = syscall(__NR_gettid);
1466 ASSERT_GT(self->mytid, 0);
1467 ASSERT_NE(self->mytid, 1) {
1468 TH_LOG("Running this test as init is not supported. :)");
1469 }
1470
1471 self->mypid = getpid();
1472 ASSERT_GT(self->mypid, 0);
1473 ASSERT_EQ(self->mytid, self->mypid);
1474
1475 self->parent = getppid();
1476 ASSERT_GT(self->parent, 0);
1477 ASSERT_NE(self->parent, self->mypid);
1478
1479 /* Launch tracer. */
1480 self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
1481 false);
1482 }
1483
FIXTURE_TEARDOWN(TRACE_syscall)1484 FIXTURE_TEARDOWN(TRACE_syscall)
1485 {
1486 teardown_trace_fixture(_metadata, self->tracer);
1487 if (self->prog.filter)
1488 free(self->prog.filter);
1489 }
1490
TEST_F(TRACE_syscall,syscall_allowed)1491 TEST_F(TRACE_syscall, syscall_allowed)
1492 {
1493 long ret;
1494
1495 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1496 ASSERT_EQ(0, ret);
1497
1498 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1499 ASSERT_EQ(0, ret);
1500
1501 /* getppid works as expected (no changes). */
1502 EXPECT_EQ(self->parent, syscall(__NR_getppid));
1503 EXPECT_NE(self->mypid, syscall(__NR_getppid));
1504 }
1505
TEST_F(TRACE_syscall,syscall_redirected)1506 TEST_F(TRACE_syscall, syscall_redirected)
1507 {
1508 long ret;
1509
1510 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1511 ASSERT_EQ(0, ret);
1512
1513 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1514 ASSERT_EQ(0, ret);
1515
1516 /* getpid has been redirected to getppid as expected. */
1517 EXPECT_EQ(self->parent, syscall(__NR_getpid));
1518 EXPECT_NE(self->mypid, syscall(__NR_getpid));
1519 }
1520
TEST_F(TRACE_syscall,syscall_dropped)1521 TEST_F(TRACE_syscall, syscall_dropped)
1522 {
1523 long ret;
1524
1525 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1526 ASSERT_EQ(0, ret);
1527
1528 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1529 ASSERT_EQ(0, ret);
1530
1531 #ifdef SYSCALL_NUM_RET_SHARE_REG
1532 /* gettid has been skipped */
1533 EXPECT_EQ(-1, syscall(__NR_gettid));
1534 #else
1535 /* gettid has been skipped and an altered return value stored. */
1536 EXPECT_EQ(1, syscall(__NR_gettid));
1537 #endif
1538 EXPECT_NE(self->mytid, syscall(__NR_gettid));
1539 }
1540
TEST_F(TRACE_syscall,skip_after_RET_TRACE)1541 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
1542 {
1543 struct sock_filter filter[] = {
1544 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1545 offsetof(struct seccomp_data, nr)),
1546 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1547 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1548 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1549 };
1550 struct sock_fprog prog = {
1551 .len = (unsigned short)ARRAY_SIZE(filter),
1552 .filter = filter,
1553 };
1554 long ret;
1555
1556 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1557 ASSERT_EQ(0, ret);
1558
1559 /* Install fixture filter. */
1560 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1561 ASSERT_EQ(0, ret);
1562
1563 /* Install "errno on getppid" filter. */
1564 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1565 ASSERT_EQ(0, ret);
1566
1567 /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1568 EXPECT_EQ(-1, syscall(__NR_getpid));
1569 EXPECT_EQ(EPERM, errno);
1570 }
1571
TEST_F_SIGNAL(TRACE_syscall,kill_after_RET_TRACE,SIGSYS)1572 TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
1573 {
1574 struct sock_filter filter[] = {
1575 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1576 offsetof(struct seccomp_data, nr)),
1577 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1578 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1579 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1580 };
1581 struct sock_fprog prog = {
1582 .len = (unsigned short)ARRAY_SIZE(filter),
1583 .filter = filter,
1584 };
1585 long ret;
1586
1587 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1588 ASSERT_EQ(0, ret);
1589
1590 /* Install fixture filter. */
1591 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
1592 ASSERT_EQ(0, ret);
1593
1594 /* Install "death on getppid" filter. */
1595 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1596 ASSERT_EQ(0, ret);
1597
1598 /* Tracer will redirect getpid to getppid, and we should die. */
1599 EXPECT_NE(self->mypid, syscall(__NR_getpid));
1600 }
1601
TEST_F(TRACE_syscall,skip_after_ptrace)1602 TEST_F(TRACE_syscall, skip_after_ptrace)
1603 {
1604 struct sock_filter filter[] = {
1605 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1606 offsetof(struct seccomp_data, nr)),
1607 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1608 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
1609 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1610 };
1611 struct sock_fprog prog = {
1612 .len = (unsigned short)ARRAY_SIZE(filter),
1613 .filter = filter,
1614 };
1615 long ret;
1616
1617 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1618 teardown_trace_fixture(_metadata, self->tracer);
1619 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1620 true);
1621
1622 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1623 ASSERT_EQ(0, ret);
1624
1625 /* Install "errno on getppid" filter. */
1626 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1627 ASSERT_EQ(0, ret);
1628
1629 /* Tracer will redirect getpid to getppid, and we should see EPERM. */
1630 EXPECT_EQ(-1, syscall(__NR_getpid));
1631 EXPECT_EQ(EPERM, errno);
1632 }
1633
TEST_F_SIGNAL(TRACE_syscall,kill_after_ptrace,SIGSYS)1634 TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
1635 {
1636 struct sock_filter filter[] = {
1637 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1638 offsetof(struct seccomp_data, nr)),
1639 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
1640 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1641 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1642 };
1643 struct sock_fprog prog = {
1644 .len = (unsigned short)ARRAY_SIZE(filter),
1645 .filter = filter,
1646 };
1647 long ret;
1648
1649 /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
1650 teardown_trace_fixture(_metadata, self->tracer);
1651 self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
1652 true);
1653
1654 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1655 ASSERT_EQ(0, ret);
1656
1657 /* Install "death on getppid" filter. */
1658 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
1659 ASSERT_EQ(0, ret);
1660
1661 /* Tracer will redirect getpid to getppid, and we should die. */
1662 EXPECT_NE(self->mypid, syscall(__NR_getpid));
1663 }
1664
1665 #ifndef __NR_seccomp
1666 # if defined(__i386__)
1667 # define __NR_seccomp 354
1668 # elif defined(__x86_64__)
1669 # define __NR_seccomp 317
1670 # elif defined(__arm__)
1671 # define __NR_seccomp 383
1672 # elif defined(__aarch64__)
1673 # define __NR_seccomp 277
1674 # elif defined(__hppa__)
1675 # define __NR_seccomp 338
1676 # elif defined(__powerpc__)
1677 # define __NR_seccomp 358
1678 # elif defined(__s390__)
1679 # define __NR_seccomp 348
1680 # else
1681 # warning "seccomp syscall number unknown for this architecture"
1682 # define __NR_seccomp 0xffff
1683 # endif
1684 #endif
1685
1686 #ifndef SECCOMP_SET_MODE_STRICT
1687 #define SECCOMP_SET_MODE_STRICT 0
1688 #endif
1689
1690 #ifndef SECCOMP_SET_MODE_FILTER
1691 #define SECCOMP_SET_MODE_FILTER 1
1692 #endif
1693
1694 #ifndef SECCOMP_FILTER_FLAG_TSYNC
1695 #define SECCOMP_FILTER_FLAG_TSYNC 1
1696 #endif
1697
1698 #ifndef seccomp
seccomp(unsigned int op,unsigned int flags,void * args)1699 int seccomp(unsigned int op, unsigned int flags, void *args)
1700 {
1701 errno = 0;
1702 return syscall(__NR_seccomp, op, flags, args);
1703 }
1704 #endif
1705
TEST(seccomp_syscall)1706 TEST(seccomp_syscall)
1707 {
1708 struct sock_filter filter[] = {
1709 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1710 };
1711 struct sock_fprog prog = {
1712 .len = (unsigned short)ARRAY_SIZE(filter),
1713 .filter = filter,
1714 };
1715 long ret;
1716
1717 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
1718 ASSERT_EQ(0, ret) {
1719 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1720 }
1721
1722 /* Reject insane operation. */
1723 ret = seccomp(-1, 0, &prog);
1724 ASSERT_NE(ENOSYS, errno) {
1725 TH_LOG("Kernel does not support seccomp syscall!");
1726 }
1727 EXPECT_EQ(EINVAL, errno) {
1728 TH_LOG("Did not reject crazy op value!");
1729 }
1730
1731 /* Reject strict with flags or pointer. */
1732 ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
1733 EXPECT_EQ(EINVAL, errno) {
1734 TH_LOG("Did not reject mode strict with flags!");
1735 }
1736 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
1737 EXPECT_EQ(EINVAL, errno) {
1738 TH_LOG("Did not reject mode strict with uargs!");
1739 }
1740
1741 /* Reject insane args for filter. */
1742 ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
1743 EXPECT_EQ(EINVAL, errno) {
1744 TH_LOG("Did not reject crazy filter flags!");
1745 }
1746 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
1747 EXPECT_EQ(EFAULT, errno) {
1748 TH_LOG("Did not reject NULL filter!");
1749 }
1750
1751 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1752 EXPECT_EQ(0, errno) {
1753 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
1754 strerror(errno));
1755 }
1756 }
1757
TEST(seccomp_syscall_mode_lock)1758 TEST(seccomp_syscall_mode_lock)
1759 {
1760 struct sock_filter filter[] = {
1761 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1762 };
1763 struct sock_fprog prog = {
1764 .len = (unsigned short)ARRAY_SIZE(filter),
1765 .filter = filter,
1766 };
1767 long ret;
1768
1769 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1770 ASSERT_EQ(0, ret) {
1771 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1772 }
1773
1774 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1775 ASSERT_NE(ENOSYS, errno) {
1776 TH_LOG("Kernel does not support seccomp syscall!");
1777 }
1778 EXPECT_EQ(0, ret) {
1779 TH_LOG("Could not install filter!");
1780 }
1781
1782 /* Make sure neither entry point will switch to strict. */
1783 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
1784 EXPECT_EQ(EINVAL, errno) {
1785 TH_LOG("Switched to mode strict!");
1786 }
1787
1788 ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
1789 EXPECT_EQ(EINVAL, errno) {
1790 TH_LOG("Switched to mode strict!");
1791 }
1792 }
1793
TEST(TSYNC_first)1794 TEST(TSYNC_first)
1795 {
1796 struct sock_filter filter[] = {
1797 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1798 };
1799 struct sock_fprog prog = {
1800 .len = (unsigned short)ARRAY_SIZE(filter),
1801 .filter = filter,
1802 };
1803 long ret;
1804
1805 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
1806 ASSERT_EQ(0, ret) {
1807 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1808 }
1809
1810 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
1811 &prog);
1812 ASSERT_NE(ENOSYS, errno) {
1813 TH_LOG("Kernel does not support seccomp syscall!");
1814 }
1815 EXPECT_EQ(0, ret) {
1816 TH_LOG("Could not install initial filter with TSYNC!");
1817 }
1818 }
1819
1820 #define TSYNC_SIBLINGS 2
1821 struct tsync_sibling {
1822 pthread_t tid;
1823 pid_t system_tid;
1824 sem_t *started;
1825 pthread_cond_t *cond;
1826 pthread_mutex_t *mutex;
1827 int diverge;
1828 int num_waits;
1829 struct sock_fprog *prog;
1830 struct __test_metadata *metadata;
1831 };
1832
FIXTURE_DATA(TSYNC)1833 FIXTURE_DATA(TSYNC) {
1834 struct sock_fprog root_prog, apply_prog;
1835 struct tsync_sibling sibling[TSYNC_SIBLINGS];
1836 sem_t started;
1837 pthread_cond_t cond;
1838 pthread_mutex_t mutex;
1839 int sibling_count;
1840 };
1841
FIXTURE_SETUP(TSYNC)1842 FIXTURE_SETUP(TSYNC)
1843 {
1844 struct sock_filter root_filter[] = {
1845 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1846 };
1847 struct sock_filter apply_filter[] = {
1848 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1849 offsetof(struct seccomp_data, nr)),
1850 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
1851 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
1852 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1853 };
1854
1855 memset(&self->root_prog, 0, sizeof(self->root_prog));
1856 memset(&self->apply_prog, 0, sizeof(self->apply_prog));
1857 memset(&self->sibling, 0, sizeof(self->sibling));
1858 self->root_prog.filter = malloc(sizeof(root_filter));
1859 ASSERT_NE(NULL, self->root_prog.filter);
1860 memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
1861 self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
1862
1863 self->apply_prog.filter = malloc(sizeof(apply_filter));
1864 ASSERT_NE(NULL, self->apply_prog.filter);
1865 memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
1866 self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
1867
1868 self->sibling_count = 0;
1869 pthread_mutex_init(&self->mutex, NULL);
1870 pthread_cond_init(&self->cond, NULL);
1871 sem_init(&self->started, 0, 0);
1872 self->sibling[0].tid = 0;
1873 self->sibling[0].cond = &self->cond;
1874 self->sibling[0].started = &self->started;
1875 self->sibling[0].mutex = &self->mutex;
1876 self->sibling[0].diverge = 0;
1877 self->sibling[0].num_waits = 1;
1878 self->sibling[0].prog = &self->root_prog;
1879 self->sibling[0].metadata = _metadata;
1880 self->sibling[1].tid = 0;
1881 self->sibling[1].cond = &self->cond;
1882 self->sibling[1].started = &self->started;
1883 self->sibling[1].mutex = &self->mutex;
1884 self->sibling[1].diverge = 0;
1885 self->sibling[1].prog = &self->root_prog;
1886 self->sibling[1].num_waits = 1;
1887 self->sibling[1].metadata = _metadata;
1888 }
1889
FIXTURE_TEARDOWN(TSYNC)1890 FIXTURE_TEARDOWN(TSYNC)
1891 {
1892 int sib = 0;
1893
1894 if (self->root_prog.filter)
1895 free(self->root_prog.filter);
1896 if (self->apply_prog.filter)
1897 free(self->apply_prog.filter);
1898
1899 for ( ; sib < self->sibling_count; ++sib) {
1900 struct tsync_sibling *s = &self->sibling[sib];
1901 void *status;
1902
1903 if (!s->tid)
1904 continue;
1905 if (pthread_kill(s->tid, 0)) {
1906 pthread_cancel(s->tid);
1907 pthread_join(s->tid, &status);
1908 }
1909 }
1910 pthread_mutex_destroy(&self->mutex);
1911 pthread_cond_destroy(&self->cond);
1912 sem_destroy(&self->started);
1913 }
1914
tsync_sibling(void * data)1915 void *tsync_sibling(void *data)
1916 {
1917 long ret = 0;
1918 struct tsync_sibling *me = data;
1919
1920 me->system_tid = syscall(__NR_gettid);
1921
1922 pthread_mutex_lock(me->mutex);
1923 if (me->diverge) {
1924 /* Just re-apply the root prog to fork the tree */
1925 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
1926 me->prog, 0, 0);
1927 }
1928 sem_post(me->started);
1929 /* Return outside of started so parent notices failures. */
1930 if (ret) {
1931 pthread_mutex_unlock(me->mutex);
1932 return (void *)SIBLING_EXIT_FAILURE;
1933 }
1934 do {
1935 pthread_cond_wait(me->cond, me->mutex);
1936 me->num_waits = me->num_waits - 1;
1937 } while (me->num_waits);
1938 pthread_mutex_unlock(me->mutex);
1939
1940 ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
1941 if (!ret)
1942 return (void *)SIBLING_EXIT_NEWPRIVS;
1943 read(0, NULL, 0);
1944 return (void *)SIBLING_EXIT_UNKILLED;
1945 }
1946
tsync_start_sibling(struct tsync_sibling * sibling)1947 void tsync_start_sibling(struct tsync_sibling *sibling)
1948 {
1949 pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
1950 }
1951
TEST_F(TSYNC,siblings_fail_prctl)1952 TEST_F(TSYNC, siblings_fail_prctl)
1953 {
1954 long ret;
1955 void *status;
1956 struct sock_filter filter[] = {
1957 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
1958 offsetof(struct seccomp_data, nr)),
1959 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
1960 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
1961 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
1962 };
1963 struct sock_fprog prog = {
1964 .len = (unsigned short)ARRAY_SIZE(filter),
1965 .filter = filter,
1966 };
1967
1968 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
1969 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
1970 }
1971
1972 /* Check prctl failure detection by requesting sib 0 diverge. */
1973 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
1974 ASSERT_NE(ENOSYS, errno) {
1975 TH_LOG("Kernel does not support seccomp syscall!");
1976 }
1977 ASSERT_EQ(0, ret) {
1978 TH_LOG("setting filter failed");
1979 }
1980
1981 self->sibling[0].diverge = 1;
1982 tsync_start_sibling(&self->sibling[0]);
1983 tsync_start_sibling(&self->sibling[1]);
1984
1985 while (self->sibling_count < TSYNC_SIBLINGS) {
1986 sem_wait(&self->started);
1987 self->sibling_count++;
1988 }
1989
1990 /* Signal the threads to clean up*/
1991 pthread_mutex_lock(&self->mutex);
1992 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
1993 TH_LOG("cond broadcast non-zero");
1994 }
1995 pthread_mutex_unlock(&self->mutex);
1996
1997 /* Ensure diverging sibling failed to call prctl. */
1998 pthread_join(self->sibling[0].tid, &status);
1999 EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
2000 pthread_join(self->sibling[1].tid, &status);
2001 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2002 }
2003
TEST_F(TSYNC,two_siblings_with_ancestor)2004 TEST_F(TSYNC, two_siblings_with_ancestor)
2005 {
2006 long ret;
2007 void *status;
2008
2009 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2010 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2011 }
2012
2013 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2014 ASSERT_NE(ENOSYS, errno) {
2015 TH_LOG("Kernel does not support seccomp syscall!");
2016 }
2017 ASSERT_EQ(0, ret) {
2018 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2019 }
2020 tsync_start_sibling(&self->sibling[0]);
2021 tsync_start_sibling(&self->sibling[1]);
2022
2023 while (self->sibling_count < TSYNC_SIBLINGS) {
2024 sem_wait(&self->started);
2025 self->sibling_count++;
2026 }
2027
2028 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2029 &self->apply_prog);
2030 ASSERT_EQ(0, ret) {
2031 TH_LOG("Could install filter on all threads!");
2032 }
2033 /* Tell the siblings to test the policy */
2034 pthread_mutex_lock(&self->mutex);
2035 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2036 TH_LOG("cond broadcast non-zero");
2037 }
2038 pthread_mutex_unlock(&self->mutex);
2039 /* Ensure they are both killed and don't exit cleanly. */
2040 pthread_join(self->sibling[0].tid, &status);
2041 EXPECT_EQ(0x0, (long)status);
2042 pthread_join(self->sibling[1].tid, &status);
2043 EXPECT_EQ(0x0, (long)status);
2044 }
2045
TEST_F(TSYNC,two_sibling_want_nnp)2046 TEST_F(TSYNC, two_sibling_want_nnp)
2047 {
2048 void *status;
2049
2050 /* start siblings before any prctl() operations */
2051 tsync_start_sibling(&self->sibling[0]);
2052 tsync_start_sibling(&self->sibling[1]);
2053 while (self->sibling_count < TSYNC_SIBLINGS) {
2054 sem_wait(&self->started);
2055 self->sibling_count++;
2056 }
2057
2058 /* Tell the siblings to test no policy */
2059 pthread_mutex_lock(&self->mutex);
2060 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2061 TH_LOG("cond broadcast non-zero");
2062 }
2063 pthread_mutex_unlock(&self->mutex);
2064
2065 /* Ensure they are both upset about lacking nnp. */
2066 pthread_join(self->sibling[0].tid, &status);
2067 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2068 pthread_join(self->sibling[1].tid, &status);
2069 EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
2070 }
2071
TEST_F(TSYNC,two_siblings_with_no_filter)2072 TEST_F(TSYNC, two_siblings_with_no_filter)
2073 {
2074 long ret;
2075 void *status;
2076
2077 /* start siblings before any prctl() operations */
2078 tsync_start_sibling(&self->sibling[0]);
2079 tsync_start_sibling(&self->sibling[1]);
2080 while (self->sibling_count < TSYNC_SIBLINGS) {
2081 sem_wait(&self->started);
2082 self->sibling_count++;
2083 }
2084
2085 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2086 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2087 }
2088
2089 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2090 &self->apply_prog);
2091 ASSERT_NE(ENOSYS, errno) {
2092 TH_LOG("Kernel does not support seccomp syscall!");
2093 }
2094 ASSERT_EQ(0, ret) {
2095 TH_LOG("Could install filter on all threads!");
2096 }
2097
2098 /* Tell the siblings to test the policy */
2099 pthread_mutex_lock(&self->mutex);
2100 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2101 TH_LOG("cond broadcast non-zero");
2102 }
2103 pthread_mutex_unlock(&self->mutex);
2104
2105 /* Ensure they are both killed and don't exit cleanly. */
2106 pthread_join(self->sibling[0].tid, &status);
2107 EXPECT_EQ(0x0, (long)status);
2108 pthread_join(self->sibling[1].tid, &status);
2109 EXPECT_EQ(0x0, (long)status);
2110 }
2111
TEST_F(TSYNC,two_siblings_with_one_divergence)2112 TEST_F(TSYNC, two_siblings_with_one_divergence)
2113 {
2114 long ret;
2115 void *status;
2116
2117 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2118 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2119 }
2120
2121 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2122 ASSERT_NE(ENOSYS, errno) {
2123 TH_LOG("Kernel does not support seccomp syscall!");
2124 }
2125 ASSERT_EQ(0, ret) {
2126 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2127 }
2128 self->sibling[0].diverge = 1;
2129 tsync_start_sibling(&self->sibling[0]);
2130 tsync_start_sibling(&self->sibling[1]);
2131
2132 while (self->sibling_count < TSYNC_SIBLINGS) {
2133 sem_wait(&self->started);
2134 self->sibling_count++;
2135 }
2136
2137 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2138 &self->apply_prog);
2139 ASSERT_EQ(self->sibling[0].system_tid, ret) {
2140 TH_LOG("Did not fail on diverged sibling.");
2141 }
2142
2143 /* Wake the threads */
2144 pthread_mutex_lock(&self->mutex);
2145 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2146 TH_LOG("cond broadcast non-zero");
2147 }
2148 pthread_mutex_unlock(&self->mutex);
2149
2150 /* Ensure they are both unkilled. */
2151 pthread_join(self->sibling[0].tid, &status);
2152 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2153 pthread_join(self->sibling[1].tid, &status);
2154 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2155 }
2156
TEST_F(TSYNC,two_siblings_not_under_filter)2157 TEST_F(TSYNC, two_siblings_not_under_filter)
2158 {
2159 long ret, sib;
2160 void *status;
2161
2162 ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2163 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2164 }
2165
2166 /*
2167 * Sibling 0 will have its own seccomp policy
2168 * and Sibling 1 will not be under seccomp at
2169 * all. Sibling 1 will enter seccomp and 0
2170 * will cause failure.
2171 */
2172 self->sibling[0].diverge = 1;
2173 tsync_start_sibling(&self->sibling[0]);
2174 tsync_start_sibling(&self->sibling[1]);
2175
2176 while (self->sibling_count < TSYNC_SIBLINGS) {
2177 sem_wait(&self->started);
2178 self->sibling_count++;
2179 }
2180
2181 ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
2182 ASSERT_NE(ENOSYS, errno) {
2183 TH_LOG("Kernel does not support seccomp syscall!");
2184 }
2185 ASSERT_EQ(0, ret) {
2186 TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
2187 }
2188
2189 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2190 &self->apply_prog);
2191 ASSERT_EQ(ret, self->sibling[0].system_tid) {
2192 TH_LOG("Did not fail on diverged sibling.");
2193 }
2194 sib = 1;
2195 if (ret == self->sibling[0].system_tid)
2196 sib = 0;
2197
2198 pthread_mutex_lock(&self->mutex);
2199
2200 /* Increment the other siblings num_waits so we can clean up
2201 * the one we just saw.
2202 */
2203 self->sibling[!sib].num_waits += 1;
2204
2205 /* Signal the thread to clean up*/
2206 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2207 TH_LOG("cond broadcast non-zero");
2208 }
2209 pthread_mutex_unlock(&self->mutex);
2210 pthread_join(self->sibling[sib].tid, &status);
2211 EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
2212 /* Poll for actual task death. pthread_join doesn't guarantee it. */
2213 while (!kill(self->sibling[sib].system_tid, 0))
2214 sleep(0.1);
2215 /* Switch to the remaining sibling */
2216 sib = !sib;
2217
2218 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2219 &self->apply_prog);
2220 ASSERT_EQ(0, ret) {
2221 TH_LOG("Expected the remaining sibling to sync");
2222 };
2223
2224 pthread_mutex_lock(&self->mutex);
2225
2226 /* If remaining sibling didn't have a chance to wake up during
2227 * the first broadcast, manually reduce the num_waits now.
2228 */
2229 if (self->sibling[sib].num_waits > 1)
2230 self->sibling[sib].num_waits = 1;
2231 ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
2232 TH_LOG("cond broadcast non-zero");
2233 }
2234 pthread_mutex_unlock(&self->mutex);
2235 pthread_join(self->sibling[sib].tid, &status);
2236 EXPECT_EQ(0, (long)status);
2237 /* Poll for actual task death. pthread_join doesn't guarantee it. */
2238 while (!kill(self->sibling[sib].system_tid, 0))
2239 sleep(0.1);
2240
2241 ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
2242 &self->apply_prog);
2243 ASSERT_EQ(0, ret); /* just us chickens */
2244 }
2245
2246 /* Make sure restarted syscalls are seen directly as "restart_syscall". */
TEST(syscall_restart)2247 TEST(syscall_restart)
2248 {
2249 long ret;
2250 unsigned long msg;
2251 pid_t child_pid;
2252 int pipefd[2];
2253 int status;
2254 siginfo_t info = { };
2255 struct sock_filter filter[] = {
2256 BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
2257 offsetof(struct seccomp_data, nr)),
2258
2259 #ifdef __NR_sigreturn
2260 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
2261 #endif
2262 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
2263 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
2264 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
2265 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
2266 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
2267
2268 /* Allow __NR_write for easy logging. */
2269 BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
2270 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
2271 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
2272 /* The nanosleep jump target. */
2273 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
2274 /* The restart_syscall jump target. */
2275 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
2276 };
2277 struct sock_fprog prog = {
2278 .len = (unsigned short)ARRAY_SIZE(filter),
2279 .filter = filter,
2280 };
2281 #if defined(__arm__)
2282 struct utsname utsbuf;
2283 #endif
2284
2285 ASSERT_EQ(0, pipe(pipefd));
2286
2287 child_pid = fork();
2288 ASSERT_LE(0, child_pid);
2289 if (child_pid == 0) {
2290 /* Child uses EXPECT not ASSERT to deliver status correctly. */
2291 char buf = ' ';
2292 struct timespec timeout = { };
2293
2294 /* Attach parent as tracer and stop. */
2295 EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
2296 EXPECT_EQ(0, raise(SIGSTOP));
2297
2298 EXPECT_EQ(0, close(pipefd[1]));
2299
2300 EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
2301 TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
2302 }
2303
2304 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
2305 EXPECT_EQ(0, ret) {
2306 TH_LOG("Failed to install filter!");
2307 }
2308
2309 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2310 TH_LOG("Failed to read() sync from parent");
2311 }
2312 EXPECT_EQ('.', buf) {
2313 TH_LOG("Failed to get sync data from read()");
2314 }
2315
2316 /* Start nanosleep to be interrupted. */
2317 timeout.tv_sec = 1;
2318 errno = 0;
2319 EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
2320 TH_LOG("Call to nanosleep() failed (errno %d)", errno);
2321 }
2322
2323 /* Read final sync from parent. */
2324 EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
2325 TH_LOG("Failed final read() from parent");
2326 }
2327 EXPECT_EQ('!', buf) {
2328 TH_LOG("Failed to get final data from read()");
2329 }
2330
2331 /* Directly report the status of our test harness results. */
2332 syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
2333 : EXIT_FAILURE);
2334 }
2335 EXPECT_EQ(0, close(pipefd[0]));
2336
2337 /* Attach to child, setup options, and release. */
2338 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2339 ASSERT_EQ(true, WIFSTOPPED(status));
2340 ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
2341 PTRACE_O_TRACESECCOMP));
2342 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2343 ASSERT_EQ(1, write(pipefd[1], ".", 1));
2344
2345 /* Wait for nanosleep() to start. */
2346 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2347 ASSERT_EQ(true, WIFSTOPPED(status));
2348 ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2349 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2350 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2351 ASSERT_EQ(0x100, msg);
2352 EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
2353
2354 /* Might as well check siginfo for sanity while we're here. */
2355 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2356 ASSERT_EQ(SIGTRAP, info.si_signo);
2357 ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
2358 EXPECT_EQ(0, info.si_errno);
2359 EXPECT_EQ(getuid(), info.si_uid);
2360 /* Verify signal delivery came from child (seccomp-triggered). */
2361 EXPECT_EQ(child_pid, info.si_pid);
2362
2363 /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
2364 ASSERT_EQ(0, kill(child_pid, SIGSTOP));
2365 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2366 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2367 ASSERT_EQ(true, WIFSTOPPED(status));
2368 ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
2369 /* Verify signal delivery came from parent now. */
2370 ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
2371 EXPECT_EQ(getpid(), info.si_pid);
2372
2373 /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
2374 ASSERT_EQ(0, kill(child_pid, SIGCONT));
2375 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2376 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2377 ASSERT_EQ(true, WIFSTOPPED(status));
2378 ASSERT_EQ(SIGCONT, WSTOPSIG(status));
2379 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2380
2381 /* Wait for restart_syscall() to start. */
2382 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2383 ASSERT_EQ(true, WIFSTOPPED(status));
2384 ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
2385 ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
2386 ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
2387
2388 ASSERT_EQ(0x200, msg);
2389 ret = get_syscall(_metadata, child_pid);
2390 #if defined(__arm__)
2391 /*
2392 * FIXME:
2393 * - native ARM registers do NOT expose true syscall.
2394 * - compat ARM registers on ARM64 DO expose true syscall.
2395 */
2396 ASSERT_EQ(0, uname(&utsbuf));
2397 if (strncmp(utsbuf.machine, "arm", 3) == 0) {
2398 EXPECT_EQ(__NR_nanosleep, ret);
2399 } else
2400 #endif
2401 {
2402 EXPECT_EQ(__NR_restart_syscall, ret);
2403 }
2404
2405 /* Write again to end test. */
2406 ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
2407 ASSERT_EQ(1, write(pipefd[1], "!", 1));
2408 EXPECT_EQ(0, close(pipefd[1]));
2409
2410 ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
2411 if (WIFSIGNALED(status) || WEXITSTATUS(status))
2412 _metadata->passed = 0;
2413 }
2414
2415 /*
2416 * TODO:
2417 * - add microbenchmarks
2418 * - expand NNP testing
2419 * - better arch-specific TRACE and TRAP handlers.
2420 * - endianness checking when appropriate
2421 * - 64-bit arg prodding
2422 * - arch value testing (x86 modes especially)
2423 * - ...
2424 */
2425
2426 TEST_HARNESS_MAIN
2427