1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/unistd.h>
8 #include <netinet/in.h>
9 #include <netinet/tcp.h>
10 #include <netinet/udp.h>
11 #include <pthread.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/ioctl.h>
18 #include <sys/ipc.h>
19 #include <sys/mman.h>
20 #include <sys/prctl.h>
21 #include <sys/resource.h>
22 #include <sys/shm.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/types.h>
26 #include <time.h>
27 #include <unistd.h>
28
29 #include "base/macros.h"
30 #include "base/posix/eintr_wrapper.h"
31 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
32 #include "sandbox/linux/seccomp-bpf/sandbox_bpf_policy.h"
33 #include "sandbox/linux/services/linux_syscalls.h"
34
35 using sandbox::ErrorCode;
36 using sandbox::SandboxBPF;
37 using sandbox::SandboxBPFPolicy;
38 using sandbox::arch_seccomp_data;
39
40 #define ERR EPERM
41
42 // We don't expect our sandbox to do anything useful yet. So, we will fail
43 // almost immediately. For now, force the code to continue running. The
44 // following line should be removed as soon as the sandbox is starting to
45 // actually enforce restrictions in a meaningful way:
46 #define _exit(x) do { } while (0)
47
48 namespace {
49
SendFds(int transport,const void * buf,size_t len,...)50 bool SendFds(int transport, const void *buf, size_t len, ...) {
51 int count = 0;
52 va_list ap;
53 va_start(ap, len);
54 while (va_arg(ap, int) >= 0) {
55 ++count;
56 }
57 va_end(ap);
58 if (!count) {
59 return false;
60 }
61 char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
62 memset(cmsg_buf, 0, sizeof(cmsg_buf));
63 struct iovec iov[2] = { { 0 } };
64 struct msghdr msg = { 0 };
65 int dummy = 0;
66 iov[0].iov_base = &dummy;
67 iov[0].iov_len = sizeof(dummy);
68 if (buf && len > 0) {
69 iov[1].iov_base = const_cast<void *>(buf);
70 iov[1].iov_len = len;
71 }
72 msg.msg_iov = iov;
73 msg.msg_iovlen = (buf && len > 0) ? 2 : 1;
74 msg.msg_control = cmsg_buf;
75 msg.msg_controllen = CMSG_LEN(count*sizeof(int));
76 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
77 cmsg->cmsg_level = SOL_SOCKET;
78 cmsg->cmsg_type = SCM_RIGHTS;
79 cmsg->cmsg_len = CMSG_LEN(count*sizeof(int));
80 va_start(ap, len);
81 for (int i = 0, fd; (fd = va_arg(ap, int)) >= 0; ++i) {
82 (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i] = fd;
83 }
84 return sendmsg(transport, &msg, 0) ==
85 static_cast<ssize_t>(sizeof(dummy) + ((buf && len > 0) ? len : 0));
86 }
87
GetFds(int transport,void * buf,size_t * len,...)88 bool GetFds(int transport, void *buf, size_t *len, ...) {
89 int count = 0;
90 va_list ap;
91 va_start(ap, len);
92 for (int *fd; (fd = va_arg(ap, int *)) != NULL; ++count) {
93 *fd = -1;
94 }
95 va_end(ap);
96 if (!count) {
97 return false;
98 }
99 char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
100 memset(cmsg_buf, 0, sizeof(cmsg_buf));
101 struct iovec iov[2] = { { 0 } };
102 struct msghdr msg = { 0 };
103 int err;
104 iov[0].iov_base = &err;
105 iov[0].iov_len = sizeof(int);
106 if (buf && len && *len > 0) {
107 iov[1].iov_base = buf;
108 iov[1].iov_len = *len;
109 }
110 msg.msg_iov = iov;
111 msg.msg_iovlen = (buf && len && *len > 0) ? 2 : 1;
112 msg.msg_control = cmsg_buf;
113 msg.msg_controllen = CMSG_LEN(count*sizeof(int));
114 ssize_t bytes = recvmsg(transport, &msg, 0);
115 if (len) {
116 *len = bytes > static_cast<int>(sizeof(int)) ? bytes - sizeof(int) : 0;
117 }
118 if (bytes != static_cast<ssize_t>(sizeof(int) + iov[1].iov_len)) {
119 if (bytes >= 0) {
120 errno = 0;
121 }
122 return false;
123 }
124 if (err) {
125 // "err" is the first four bytes of the payload. If these are non-zero,
126 // the sender on the other side of the socketpair sent us an errno value.
127 // We don't expect to get any file handles in this case.
128 errno = err;
129 return false;
130 }
131 struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
132 if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
133 !cmsg ||
134 cmsg->cmsg_level != SOL_SOCKET ||
135 cmsg->cmsg_type != SCM_RIGHTS ||
136 cmsg->cmsg_len != CMSG_LEN(count*sizeof(int))) {
137 errno = EBADF;
138 return false;
139 }
140 va_start(ap, len);
141 for (int *fd, i = 0; (fd = va_arg(ap, int *)) != NULL; ++i) {
142 *fd = (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i];
143 }
144 va_end(ap);
145 return true;
146 }
147
148
149 // POSIX doesn't define any async-signal safe function for converting
150 // an integer to ASCII. We'll have to define our own version.
151 // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
152 // conversion was successful or NULL otherwise. It never writes more than "sz"
153 // bytes. Output will be truncated as needed, and a NUL character is always
154 // appended.
itoa_r(int i,char * buf,size_t sz)155 char *itoa_r(int i, char *buf, size_t sz) {
156 // Make sure we can write at least one NUL byte.
157 size_t n = 1;
158 if (n > sz) {
159 return NULL;
160 }
161
162 // Handle negative numbers.
163 char *start = buf;
164 int minint = 0;
165 if (i < 0) {
166 // Make sure we can write the '-' character.
167 if (++n > sz) {
168 *start = '\000';
169 return NULL;
170 }
171 *start++ = '-';
172
173 // Turn our number positive.
174 if (i == -i) {
175 // The lowest-most negative integer needs special treatment.
176 minint = 1;
177 i = -(i + 1);
178 } else {
179 // "Normal" negative numbers are easy.
180 i = -i;
181 }
182 }
183
184 // Loop until we have converted the entire number. Output at least one
185 // character (i.e. '0').
186 char *ptr = start;
187 do {
188 // Make sure there is still enough space left in our output buffer.
189 if (++n > sz) {
190 buf = NULL;
191 goto truncate;
192 }
193
194 // Output the next digit and (if necessary) compensate for the lowest-most
195 // negative integer needing special treatment. This works because, no
196 // matter the bit width of the integer, the lowest-most integer always ends
197 // in 2, 4, 6, or 8.
198 *ptr++ = i%10 + '0' + minint;
199 minint = 0;
200 i /= 10;
201 } while (i);
202 truncate: // Terminate the output with a NUL character.
203 *ptr = '\000';
204
205 // Conversion to ASCII actually resulted in the digits being in reverse
206 // order. We can't easily generate them in forward order, as we can't tell
207 // the number of characters needed until we are done converting.
208 // So, now, we reverse the string (except for the possible "-" sign).
209 while (--ptr > start) {
210 char ch = *ptr;
211 *ptr = *start;
212 *start++ = ch;
213 }
214 return buf;
215 }
216
217 // This handler gets called, whenever we encounter a system call that we
218 // don't recognize explicitly. For the purposes of this program, we just
219 // log the system call and then deny it. More elaborate sandbox policies
220 // might try to evaluate the system call in user-space, instead.
221 // The only notable complication is that this function must be async-signal
222 // safe. This restricts the libary functions that we can call.
DefaultHandler(const struct arch_seccomp_data & data,void *)223 intptr_t DefaultHandler(const struct arch_seccomp_data& data, void *) {
224 static const char msg0[] = "Disallowed system call #";
225 static const char msg1[] = "\n";
226 char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
227
228 *buf = '\000';
229 strncat(buf, msg0, sizeof(buf) - 1);
230
231 char *ptr = strrchr(buf, '\000');
232 itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
233
234 ptr = strrchr(ptr, '\000');
235 strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
236
237 ptr = strrchr(ptr, '\000');
238 if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
239
240 return -ERR;
241 }
242
243 class DemoPolicy : public SandboxBPFPolicy {
244 public:
DemoPolicy()245 DemoPolicy() {}
246 virtual ErrorCode EvaluateSyscall(SandboxBPF* sandbox,
247 int sysno) const OVERRIDE;
248
249 private:
250 DISALLOW_COPY_AND_ASSIGN(DemoPolicy);
251 };
252
EvaluateSyscall(SandboxBPF * sandbox,int sysno) const253 ErrorCode DemoPolicy::EvaluateSyscall(SandboxBPF* sandbox, int sysno) const {
254 switch (sysno) {
255 #if defined(__NR_accept)
256 case __NR_accept: case __NR_accept4:
257 #endif
258 case __NR_alarm:
259 case __NR_brk:
260 case __NR_clock_gettime:
261 case __NR_close:
262 case __NR_dup: case __NR_dup2:
263 case __NR_epoll_create: case __NR_epoll_ctl: case __NR_epoll_wait:
264 case __NR_exit: case __NR_exit_group:
265 case __NR_fcntl:
266 #if defined(__NR_fcntl64)
267 case __NR_fcntl64:
268 #endif
269 case __NR_fdatasync:
270 case __NR_fstat:
271 #if defined(__NR_fstat64)
272 case __NR_fstat64:
273 #endif
274 case __NR_ftruncate:
275 case __NR_futex:
276 case __NR_getdents: case __NR_getdents64:
277 case __NR_getegid:
278 #if defined(__NR_getegid32)
279 case __NR_getegid32:
280 #endif
281 case __NR_geteuid:
282 #if defined(__NR_geteuid32)
283 case __NR_geteuid32:
284 #endif
285 case __NR_getgid:
286 #if defined(__NR_getgid32)
287 case __NR_getgid32:
288 #endif
289 case __NR_getitimer: case __NR_setitimer:
290 #if defined(__NR_getpeername)
291 case __NR_getpeername:
292 #endif
293 case __NR_getpid: case __NR_gettid:
294 #if defined(__NR_getsockname)
295 case __NR_getsockname:
296 #endif
297 case __NR_gettimeofday:
298 case __NR_getuid:
299 #if defined(__NR_getuid32)
300 case __NR_getuid32:
301 #endif
302 #if defined(__NR__llseek)
303 case __NR__llseek:
304 #endif
305 case __NR_lseek:
306 case __NR_nanosleep:
307 case __NR_pipe: case __NR_pipe2:
308 case __NR_poll:
309 case __NR_pread64: case __NR_preadv:
310 case __NR_pwrite64: case __NR_pwritev:
311 case __NR_read: case __NR_readv:
312 case __NR_restart_syscall:
313 case __NR_set_robust_list:
314 case __NR_rt_sigaction:
315 #if defined(__NR_sigaction)
316 case __NR_sigaction:
317 #endif
318 #if defined(__NR_signal)
319 case __NR_signal:
320 #endif
321 case __NR_rt_sigprocmask:
322 #if defined(__NR_sigprocmask)
323 case __NR_sigprocmask:
324 #endif
325 #if defined(__NR_shutdown)
326 case __NR_shutdown:
327 #endif
328 case __NR_rt_sigreturn:
329 #if defined(__NR_sigreturn)
330 case __NR_sigreturn:
331 #endif
332 #if defined(__NR_socketpair)
333 case __NR_socketpair:
334 #endif
335 case __NR_time:
336 case __NR_uname:
337 case __NR_write: case __NR_writev:
338 return ErrorCode(ErrorCode::ERR_ALLOWED);
339
340 case __NR_prctl:
341 // Allow PR_SET_DUMPABLE and PR_GET_DUMPABLE. Do not allow anything else.
342 return sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
343 PR_SET_DUMPABLE,
344 ErrorCode(ErrorCode::ERR_ALLOWED),
345 sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
346 PR_GET_DUMPABLE,
347 ErrorCode(ErrorCode::ERR_ALLOWED),
348 sandbox->Trap(DefaultHandler, NULL)));
349
350 // The following system calls are temporarily permitted. This must be
351 // tightened later. But we currently don't implement enough of the sandboxing
352 // API to do so.
353 // As is, this sandbox isn't exactly safe :-/
354 #if defined(__NR_sendmsg)
355 case __NR_sendmsg: case __NR_sendto:
356 case __NR_recvmsg: case __NR_recvfrom:
357 case __NR_getsockopt: case __NR_setsockopt:
358 #elif defined(__NR_socketcall)
359 case __NR_socketcall:
360 #endif
361 #if defined(__NR_shmat)
362 case __NR_shmat: case __NR_shmctl: case __NR_shmdt: case __NR_shmget:
363 #elif defined(__NR_ipc)
364 case __NR_ipc:
365 #endif
366 #if defined(__NR_mmap2)
367 case __NR_mmap2:
368 #else
369 case __NR_mmap:
370 #endif
371 #if defined(__NR_ugetrlimit)
372 case __NR_ugetrlimit:
373 #endif
374 case __NR_getrlimit:
375 case __NR_ioctl:
376 case __NR_clone:
377 case __NR_munmap: case __NR_mprotect: case __NR_madvise:
378 case __NR_remap_file_pages:
379 return ErrorCode(ErrorCode::ERR_ALLOWED);
380
381 // Everything that isn't explicitly allowed is denied.
382 default:
383 return sandbox->Trap(DefaultHandler, NULL);
384 }
385 }
386
ThreadFnc(void * arg)387 void *ThreadFnc(void *arg) {
388 return arg;
389 }
390
SendmsgStressThreadFnc(void * arg)391 void *SendmsgStressThreadFnc(void *arg) {
392 if (arg) { }
393 static const int repetitions = 100;
394 static const int kNumFds = 3;
395 for (int rep = 0; rep < repetitions; ++rep) {
396 int fds[2 + kNumFds];
397 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
398 perror("socketpair()");
399 _exit(1);
400 }
401 size_t len = 4;
402 char buf[4];
403 if (!SendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
404 !GetFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
405 len != 4 ||
406 memcmp(buf, "test", len) ||
407 write(fds[2], "demo", 4) != 4 ||
408 read(fds[0], buf, 4) != 4 ||
409 memcmp(buf, "demo", 4)) {
410 perror("sending/receiving of fds");
411 _exit(1);
412 }
413 for (int i = 0; i < 2+kNumFds; ++i) {
414 if (close(fds[i])) {
415 perror("close");
416 _exit(1);
417 }
418 }
419 }
420 return NULL;
421 }
422
423 } // namespace
424
main(int argc,char * argv[])425 int main(int argc, char *argv[]) {
426 if (argc) { }
427 if (argv) { }
428 int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
429 if (SandboxBPF::SupportsSeccompSandbox(proc_fd) !=
430 SandboxBPF::STATUS_AVAILABLE) {
431 perror("sandbox");
432 _exit(1);
433 }
434 SandboxBPF sandbox;
435 sandbox.set_proc_fd(proc_fd);
436 sandbox.SetSandboxPolicy(new DemoPolicy());
437 if (!sandbox.StartSandbox(SandboxBPF::PROCESS_SINGLE_THREADED)) {
438 fprintf(stderr, "StartSandbox() failed");
439 _exit(1);
440 }
441
442 // Check that we can create threads
443 pthread_t thr;
444 if (!pthread_create(&thr, NULL, ThreadFnc,
445 reinterpret_cast<void *>(0x1234))) {
446 void *ret;
447 pthread_join(thr, &ret);
448 if (ret != reinterpret_cast<void *>(0x1234)) {
449 perror("clone() failed");
450 _exit(1);
451 }
452 } else {
453 perror("clone() failed");
454 _exit(1);
455 }
456
457 // Check that we handle restart_syscall() without dieing. This is a little
458 // tricky to trigger. And I can't think of a good way to verify whether it
459 // actually executed.
460 signal(SIGALRM, SIG_IGN);
461 const struct itimerval tv = { { 0, 0 }, { 0, 5*1000 } };
462 const struct timespec tmo = { 0, 100*1000*1000 };
463 setitimer(ITIMER_REAL, &tv, NULL);
464 nanosleep(&tmo, NULL);
465
466 // Check that we can query the size of the stack, but that all other
467 // calls to getrlimit() fail.
468 if (((errno = 0), !getrlimit(RLIMIT_STACK, NULL)) || errno != EFAULT ||
469 ((errno = 0), !getrlimit(RLIMIT_CORE, NULL)) || errno != ERR) {
470 perror("getrlimit()");
471 _exit(1);
472 }
473
474 // Check that we can query TCGETS and TIOCGWINSZ, but no other ioctls().
475 if (((errno = 0), !ioctl(2, TCGETS, NULL)) || errno != EFAULT ||
476 ((errno = 0), !ioctl(2, TIOCGWINSZ, NULL)) || errno != EFAULT ||
477 ((errno = 0), !ioctl(2, TCSETS, NULL)) || errno != ERR) {
478 perror("ioctl()");
479 _exit(1);
480 }
481
482 // Check that prctl() can manipulate the dumpable flag, but nothing else.
483 if (((errno = 0), !prctl(PR_GET_DUMPABLE)) || errno ||
484 ((errno = 0), prctl(PR_SET_DUMPABLE, 1)) || errno ||
485 ((errno = 0), !prctl(PR_SET_SECCOMP, 0)) || errno != ERR) {
486 perror("prctl()");
487 _exit(1);
488 }
489
490 // Check that we can send and receive file handles.
491 int fds[3];
492 if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
493 perror("socketpair()");
494 _exit(1);
495 }
496 size_t len = 4;
497 char buf[4];
498 if (!SendFds(fds[0], "test", 4, fds[1], -1) ||
499 !GetFds(fds[1], buf, &len, fds+2, NULL) ||
500 len != 4 ||
501 memcmp(buf, "test", len) ||
502 write(fds[2], "demo", 4) != 4 ||
503 read(fds[0], buf, 4) != 4 ||
504 memcmp(buf, "demo", 4) ||
505 close(fds[0]) ||
506 close(fds[1]) ||
507 close(fds[2])) {
508 perror("sending/receiving of fds");
509 _exit(1);
510 }
511
512 // Check whether SysV IPC works.
513 int shmid;
514 void *addr;
515 if ((shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT|0600)) < 0 ||
516 (addr = shmat(shmid, NULL, 0)) == reinterpret_cast<void *>(-1) ||
517 shmdt(addr) ||
518 shmctl(shmid, IPC_RMID, NULL)) {
519 perror("sysv IPC");
520 _exit(1);
521 }
522
523 // Print a message so that the user can see the sandbox is activated.
524 time_t tm = time(NULL);
525 printf("Sandbox has been started at %s", ctime(&tm));
526
527 // Stress-test the sendmsg() code
528 static const int kSendmsgStressNumThreads = 10;
529 pthread_t sendmsgStressThreads[kSendmsgStressNumThreads];
530 for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
531 if (pthread_create(sendmsgStressThreads + i, NULL,
532 SendmsgStressThreadFnc, NULL)) {
533 perror("pthread_create");
534 _exit(1);
535 }
536 }
537 for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
538 pthread_join(sendmsgStressThreads[i], NULL);
539 }
540
541 return 0;
542 }
543