• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <linux/unistd.h>
8 #include <netinet/in.h>
9 #include <netinet/tcp.h>
10 #include <netinet/udp.h>
11 #include <pthread.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <string.h>
17 #include <sys/ioctl.h>
18 #include <sys/ipc.h>
19 #include <sys/mman.h>
20 #include <sys/prctl.h>
21 #include <sys/resource.h>
22 #include <sys/shm.h>
23 #include <sys/socket.h>
24 #include <sys/time.h>
25 #include <sys/types.h>
26 #include <time.h>
27 #include <unistd.h>
28 
29 #include "base/macros.h"
30 #include "base/posix/eintr_wrapper.h"
31 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
32 #include "sandbox/linux/seccomp-bpf/sandbox_bpf_policy.h"
33 #include "sandbox/linux/services/linux_syscalls.h"
34 
35 using sandbox::ErrorCode;
36 using sandbox::SandboxBPF;
37 using sandbox::SandboxBPFPolicy;
38 using sandbox::arch_seccomp_data;
39 
40 #define ERR EPERM
41 
42 // We don't expect our sandbox to do anything useful yet. So, we will fail
43 // almost immediately. For now, force the code to continue running. The
44 // following line should be removed as soon as the sandbox is starting to
45 // actually enforce restrictions in a meaningful way:
46 #define _exit(x) do { } while (0)
47 
48 namespace {
49 
SendFds(int transport,const void * buf,size_t len,...)50 bool SendFds(int transport, const void *buf, size_t len, ...) {
51   int count = 0;
52   va_list ap;
53   va_start(ap, len);
54   while (va_arg(ap, int) >= 0) {
55     ++count;
56   }
57   va_end(ap);
58   if (!count) {
59     return false;
60   }
61   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
62   memset(cmsg_buf, 0, sizeof(cmsg_buf));
63   struct iovec  iov[2] = { { 0 } };
64   struct msghdr msg    = { 0 };
65   int dummy            = 0;
66   iov[0].iov_base      = &dummy;
67   iov[0].iov_len       = sizeof(dummy);
68   if (buf && len > 0) {
69     iov[1].iov_base    = const_cast<void *>(buf);
70     iov[1].iov_len     = len;
71   }
72   msg.msg_iov          = iov;
73   msg.msg_iovlen       = (buf && len > 0) ? 2 : 1;
74   msg.msg_control      = cmsg_buf;
75   msg.msg_controllen   = CMSG_LEN(count*sizeof(int));
76   struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
77   cmsg->cmsg_level     = SOL_SOCKET;
78   cmsg->cmsg_type      = SCM_RIGHTS;
79   cmsg->cmsg_len       = CMSG_LEN(count*sizeof(int));
80   va_start(ap, len);
81   for (int i = 0, fd; (fd = va_arg(ap, int)) >= 0; ++i) {
82     (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i] = fd;
83   }
84   return sendmsg(transport, &msg, 0) ==
85       static_cast<ssize_t>(sizeof(dummy) + ((buf && len > 0) ? len : 0));
86 }
87 
GetFds(int transport,void * buf,size_t * len,...)88 bool GetFds(int transport, void *buf, size_t *len, ...) {
89   int count = 0;
90   va_list ap;
91   va_start(ap, len);
92   for (int *fd; (fd = va_arg(ap, int *)) != NULL; ++count) {
93     *fd = -1;
94   }
95   va_end(ap);
96   if (!count) {
97     return false;
98   }
99   char cmsg_buf[CMSG_SPACE(count*sizeof(int))];
100   memset(cmsg_buf, 0, sizeof(cmsg_buf));
101   struct iovec iov[2] = { { 0 } };
102   struct msghdr msg   = { 0 };
103   int err;
104   iov[0].iov_base     = &err;
105   iov[0].iov_len      = sizeof(int);
106   if (buf && len && *len > 0) {
107     iov[1].iov_base   = buf;
108     iov[1].iov_len    = *len;
109   }
110   msg.msg_iov         = iov;
111   msg.msg_iovlen      = (buf && len && *len > 0) ? 2 : 1;
112   msg.msg_control     = cmsg_buf;
113   msg.msg_controllen  = CMSG_LEN(count*sizeof(int));
114   ssize_t bytes = recvmsg(transport, &msg, 0);
115   if (len) {
116     *len = bytes > static_cast<int>(sizeof(int)) ? bytes - sizeof(int) : 0;
117   }
118   if (bytes != static_cast<ssize_t>(sizeof(int) + iov[1].iov_len)) {
119     if (bytes >= 0) {
120       errno = 0;
121     }
122     return false;
123   }
124   if (err) {
125     // "err" is the first four bytes of the payload. If these are non-zero,
126     // the sender on the other side of the socketpair sent us an errno value.
127     // We don't expect to get any file handles in this case.
128     errno = err;
129     return false;
130   }
131   struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
132   if ((msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) ||
133       !cmsg                                    ||
134       cmsg->cmsg_level != SOL_SOCKET           ||
135       cmsg->cmsg_type  != SCM_RIGHTS           ||
136       cmsg->cmsg_len   != CMSG_LEN(count*sizeof(int))) {
137     errno = EBADF;
138     return false;
139   }
140   va_start(ap, len);
141   for (int *fd, i = 0; (fd = va_arg(ap, int *)) != NULL; ++i) {
142     *fd = (reinterpret_cast<int *>(CMSG_DATA(cmsg)))[i];
143   }
144   va_end(ap);
145   return true;
146 }
147 
148 
149 // POSIX doesn't define any async-signal safe function for converting
150 // an integer to ASCII. We'll have to define our own version.
151 // itoa_r() converts a (signed) integer to ASCII. It returns "buf", if the
152 // conversion was successful or NULL otherwise. It never writes more than "sz"
153 // bytes. Output will be truncated as needed, and a NUL character is always
154 // appended.
itoa_r(int i,char * buf,size_t sz)155 char *itoa_r(int i, char *buf, size_t sz) {
156   // Make sure we can write at least one NUL byte.
157   size_t n = 1;
158   if (n > sz) {
159     return NULL;
160   }
161 
162   // Handle negative numbers.
163   char *start = buf;
164   int minint = 0;
165   if (i < 0) {
166     // Make sure we can write the '-' character.
167     if (++n > sz) {
168       *start = '\000';
169       return NULL;
170     }
171     *start++ = '-';
172 
173     // Turn our number positive.
174     if (i == -i) {
175       // The lowest-most negative integer needs special treatment.
176       minint = 1;
177       i = -(i + 1);
178     } else {
179       // "Normal" negative numbers are easy.
180       i = -i;
181     }
182   }
183 
184   // Loop until we have converted the entire number. Output at least one
185   // character (i.e. '0').
186   char *ptr = start;
187   do {
188     // Make sure there is still enough space left in our output buffer.
189     if (++n > sz) {
190       buf = NULL;
191       goto truncate;
192     }
193 
194     // Output the next digit and (if necessary) compensate for the lowest-most
195     // negative integer needing special treatment. This works because, no
196     // matter the bit width of the integer, the lowest-most integer always ends
197     // in 2, 4, 6, or 8.
198     *ptr++ = i%10 + '0' + minint;
199     minint = 0;
200     i /= 10;
201   } while (i);
202  truncate:  // Terminate the output with a NUL character.
203   *ptr = '\000';
204 
205   // Conversion to ASCII actually resulted in the digits being in reverse
206   // order. We can't easily generate them in forward order, as we can't tell
207   // the number of characters needed until we are done converting.
208   // So, now, we reverse the string (except for the possible "-" sign).
209   while (--ptr > start) {
210     char ch = *ptr;
211     *ptr = *start;
212     *start++ = ch;
213   }
214   return buf;
215 }
216 
217 // This handler gets called, whenever we encounter a system call that we
218 // don't recognize explicitly. For the purposes of this program, we just
219 // log the system call and then deny it. More elaborate sandbox policies
220 // might try to evaluate the system call in user-space, instead.
221 // The only notable complication is that this function must be async-signal
222 // safe. This restricts the libary functions that we can call.
DefaultHandler(const struct arch_seccomp_data & data,void *)223 intptr_t DefaultHandler(const struct arch_seccomp_data& data, void *) {
224   static const char msg0[] = "Disallowed system call #";
225   static const char msg1[] = "\n";
226   char buf[sizeof(msg0) - 1 + 25 + sizeof(msg1)];
227 
228   *buf = '\000';
229   strncat(buf, msg0, sizeof(buf) - 1);
230 
231   char *ptr = strrchr(buf, '\000');
232   itoa_r(data.nr, ptr, sizeof(buf) - (ptr - buf));
233 
234   ptr = strrchr(ptr, '\000');
235   strncat(ptr, msg1, sizeof(buf) - (ptr - buf));
236 
237   ptr = strrchr(ptr, '\000');
238   if (HANDLE_EINTR(write(2, buf, ptr - buf))) { }
239 
240   return -ERR;
241 }
242 
243 class DemoPolicy : public SandboxBPFPolicy {
244  public:
DemoPolicy()245   DemoPolicy() {}
246   virtual ErrorCode EvaluateSyscall(SandboxBPF* sandbox,
247                                     int sysno) const OVERRIDE;
248 
249  private:
250   DISALLOW_COPY_AND_ASSIGN(DemoPolicy);
251 };
252 
EvaluateSyscall(SandboxBPF * sandbox,int sysno) const253 ErrorCode DemoPolicy::EvaluateSyscall(SandboxBPF* sandbox, int sysno) const {
254   switch (sysno) {
255 #if defined(__NR_accept)
256   case __NR_accept: case __NR_accept4:
257 #endif
258   case __NR_alarm:
259   case __NR_brk:
260   case __NR_clock_gettime:
261   case __NR_close:
262   case __NR_dup: case __NR_dup2:
263   case __NR_epoll_create: case __NR_epoll_ctl: case __NR_epoll_wait:
264   case __NR_exit: case __NR_exit_group:
265   case __NR_fcntl:
266 #if defined(__NR_fcntl64)
267   case __NR_fcntl64:
268 #endif
269   case __NR_fdatasync:
270   case __NR_fstat:
271 #if defined(__NR_fstat64)
272   case __NR_fstat64:
273 #endif
274   case __NR_ftruncate:
275   case __NR_futex:
276   case __NR_getdents: case __NR_getdents64:
277   case __NR_getegid:
278 #if defined(__NR_getegid32)
279   case __NR_getegid32:
280 #endif
281   case __NR_geteuid:
282 #if defined(__NR_geteuid32)
283   case __NR_geteuid32:
284 #endif
285   case __NR_getgid:
286 #if defined(__NR_getgid32)
287   case __NR_getgid32:
288 #endif
289   case __NR_getitimer: case __NR_setitimer:
290 #if defined(__NR_getpeername)
291   case __NR_getpeername:
292 #endif
293   case __NR_getpid: case __NR_gettid:
294 #if defined(__NR_getsockname)
295   case __NR_getsockname:
296 #endif
297   case __NR_gettimeofday:
298   case __NR_getuid:
299 #if defined(__NR_getuid32)
300   case __NR_getuid32:
301 #endif
302 #if defined(__NR__llseek)
303   case __NR__llseek:
304 #endif
305   case __NR_lseek:
306   case __NR_nanosleep:
307   case __NR_pipe: case __NR_pipe2:
308   case __NR_poll:
309   case __NR_pread64: case __NR_preadv:
310   case __NR_pwrite64: case __NR_pwritev:
311   case __NR_read: case __NR_readv:
312   case __NR_restart_syscall:
313   case __NR_set_robust_list:
314   case __NR_rt_sigaction:
315 #if defined(__NR_sigaction)
316   case __NR_sigaction:
317 #endif
318 #if defined(__NR_signal)
319   case __NR_signal:
320 #endif
321   case __NR_rt_sigprocmask:
322 #if defined(__NR_sigprocmask)
323   case __NR_sigprocmask:
324 #endif
325 #if defined(__NR_shutdown)
326   case __NR_shutdown:
327 #endif
328   case __NR_rt_sigreturn:
329 #if defined(__NR_sigreturn)
330   case __NR_sigreturn:
331 #endif
332 #if defined(__NR_socketpair)
333   case __NR_socketpair:
334 #endif
335   case __NR_time:
336   case __NR_uname:
337   case __NR_write: case __NR_writev:
338     return ErrorCode(ErrorCode::ERR_ALLOWED);
339 
340   case __NR_prctl:
341     // Allow PR_SET_DUMPABLE and PR_GET_DUMPABLE. Do not allow anything else.
342     return sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
343                          PR_SET_DUMPABLE,
344                          ErrorCode(ErrorCode::ERR_ALLOWED),
345            sandbox->Cond(1, ErrorCode::TP_32BIT, ErrorCode::OP_EQUAL,
346                          PR_GET_DUMPABLE,
347                          ErrorCode(ErrorCode::ERR_ALLOWED),
348            sandbox->Trap(DefaultHandler, NULL)));
349 
350   // The following system calls are temporarily permitted. This must be
351   // tightened later. But we currently don't implement enough of the sandboxing
352   // API to do so.
353   // As is, this sandbox isn't exactly safe :-/
354 #if defined(__NR_sendmsg)
355   case __NR_sendmsg: case __NR_sendto:
356   case __NR_recvmsg: case __NR_recvfrom:
357   case __NR_getsockopt: case __NR_setsockopt:
358 #elif defined(__NR_socketcall)
359   case __NR_socketcall:
360 #endif
361 #if defined(__NR_shmat)
362   case __NR_shmat: case __NR_shmctl: case __NR_shmdt: case __NR_shmget:
363 #elif defined(__NR_ipc)
364   case __NR_ipc:
365 #endif
366 #if defined(__NR_mmap2)
367   case __NR_mmap2:
368 #else
369   case __NR_mmap:
370 #endif
371 #if defined(__NR_ugetrlimit)
372   case __NR_ugetrlimit:
373 #endif
374   case __NR_getrlimit:
375   case __NR_ioctl:
376   case __NR_clone:
377   case __NR_munmap: case __NR_mprotect: case __NR_madvise:
378   case __NR_remap_file_pages:
379     return ErrorCode(ErrorCode::ERR_ALLOWED);
380 
381   // Everything that isn't explicitly allowed is denied.
382   default:
383     return sandbox->Trap(DefaultHandler, NULL);
384   }
385 }
386 
ThreadFnc(void * arg)387 void *ThreadFnc(void *arg) {
388   return arg;
389 }
390 
SendmsgStressThreadFnc(void * arg)391 void *SendmsgStressThreadFnc(void *arg) {
392   if (arg) { }
393   static const int repetitions = 100;
394   static const int kNumFds = 3;
395   for (int rep = 0; rep < repetitions; ++rep) {
396     int fds[2 + kNumFds];
397     if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
398       perror("socketpair()");
399       _exit(1);
400     }
401     size_t len = 4;
402     char buf[4];
403     if (!SendFds(fds[0], "test", 4, fds[1], fds[1], fds[1], -1) ||
404         !GetFds(fds[1], buf, &len, fds+2, fds+3, fds+4, NULL) ||
405         len != 4 ||
406         memcmp(buf, "test", len) ||
407         write(fds[2], "demo", 4) != 4 ||
408         read(fds[0], buf, 4) != 4 ||
409         memcmp(buf, "demo", 4)) {
410       perror("sending/receiving of fds");
411       _exit(1);
412     }
413     for (int i = 0; i < 2+kNumFds; ++i) {
414       if (close(fds[i])) {
415         perror("close");
416         _exit(1);
417       }
418     }
419   }
420   return NULL;
421 }
422 
423 }  // namespace
424 
main(int argc,char * argv[])425 int main(int argc, char *argv[]) {
426   if (argc) { }
427   if (argv) { }
428   int proc_fd = open("/proc", O_RDONLY|O_DIRECTORY);
429   if (SandboxBPF::SupportsSeccompSandbox(proc_fd) !=
430       SandboxBPF::STATUS_AVAILABLE) {
431     perror("sandbox");
432     _exit(1);
433   }
434   SandboxBPF sandbox;
435   sandbox.set_proc_fd(proc_fd);
436   sandbox.SetSandboxPolicy(new DemoPolicy());
437   if (!sandbox.StartSandbox(SandboxBPF::PROCESS_SINGLE_THREADED)) {
438     fprintf(stderr, "StartSandbox() failed");
439     _exit(1);
440   }
441 
442   // Check that we can create threads
443   pthread_t thr;
444   if (!pthread_create(&thr, NULL, ThreadFnc,
445                       reinterpret_cast<void *>(0x1234))) {
446     void *ret;
447     pthread_join(thr, &ret);
448     if (ret != reinterpret_cast<void *>(0x1234)) {
449       perror("clone() failed");
450       _exit(1);
451     }
452   } else {
453     perror("clone() failed");
454     _exit(1);
455   }
456 
457   // Check that we handle restart_syscall() without dieing. This is a little
458   // tricky to trigger. And I can't think of a good way to verify whether it
459   // actually executed.
460   signal(SIGALRM, SIG_IGN);
461   const struct itimerval tv = { { 0, 0 }, { 0, 5*1000 } };
462   const struct timespec tmo = { 0, 100*1000*1000 };
463   setitimer(ITIMER_REAL, &tv, NULL);
464   nanosleep(&tmo, NULL);
465 
466   // Check that we can query the size of the stack, but that all other
467   // calls to getrlimit() fail.
468   if (((errno = 0), !getrlimit(RLIMIT_STACK, NULL)) || errno != EFAULT ||
469       ((errno = 0), !getrlimit(RLIMIT_CORE,  NULL)) || errno != ERR) {
470     perror("getrlimit()");
471     _exit(1);
472   }
473 
474   // Check that we can query TCGETS and TIOCGWINSZ, but no other ioctls().
475   if (((errno = 0), !ioctl(2, TCGETS,     NULL)) || errno != EFAULT ||
476       ((errno = 0), !ioctl(2, TIOCGWINSZ, NULL)) || errno != EFAULT ||
477       ((errno = 0), !ioctl(2, TCSETS,     NULL)) || errno != ERR) {
478     perror("ioctl()");
479     _exit(1);
480   }
481 
482   // Check that prctl() can manipulate the dumpable flag, but nothing else.
483   if (((errno = 0), !prctl(PR_GET_DUMPABLE))    || errno ||
484       ((errno = 0),  prctl(PR_SET_DUMPABLE, 1)) || errno ||
485       ((errno = 0), !prctl(PR_SET_SECCOMP,  0)) || errno != ERR) {
486     perror("prctl()");
487     _exit(1);
488   }
489 
490   // Check that we can send and receive file handles.
491   int fds[3];
492   if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) {
493     perror("socketpair()");
494     _exit(1);
495   }
496   size_t len = 4;
497   char buf[4];
498   if (!SendFds(fds[0], "test", 4, fds[1], -1) ||
499       !GetFds(fds[1], buf, &len, fds+2, NULL) ||
500       len != 4 ||
501       memcmp(buf, "test", len) ||
502       write(fds[2], "demo", 4) != 4 ||
503       read(fds[0], buf, 4) != 4 ||
504       memcmp(buf, "demo", 4) ||
505       close(fds[0]) ||
506       close(fds[1]) ||
507       close(fds[2])) {
508     perror("sending/receiving of fds");
509     _exit(1);
510   }
511 
512   // Check whether SysV IPC works.
513   int shmid;
514   void *addr;
515   if ((shmid = shmget(IPC_PRIVATE, 4096, IPC_CREAT|0600)) < 0 ||
516       (addr = shmat(shmid, NULL, 0)) == reinterpret_cast<void *>(-1) ||
517       shmdt(addr) ||
518       shmctl(shmid, IPC_RMID, NULL)) {
519     perror("sysv IPC");
520     _exit(1);
521   }
522 
523   // Print a message so that the user can see the sandbox is activated.
524   time_t tm = time(NULL);
525   printf("Sandbox has been started at %s", ctime(&tm));
526 
527   // Stress-test the sendmsg() code
528   static const int kSendmsgStressNumThreads = 10;
529   pthread_t sendmsgStressThreads[kSendmsgStressNumThreads];
530   for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
531     if (pthread_create(sendmsgStressThreads + i, NULL,
532                        SendmsgStressThreadFnc, NULL)) {
533       perror("pthread_create");
534       _exit(1);
535     }
536   }
537   for (int i = 0; i < kSendmsgStressNumThreads; ++i) {
538     pthread_join(sendmsgStressThreads[i], NULL);
539   }
540 
541   return 0;
542 }
543