• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include "sandboxed_api/sandbox2/monitor_unotify.h"
2 
3 #include <linux/audit.h>
4 #include <linux/seccomp.h>
5 #include <poll.h>
6 #include <sys/eventfd.h>
7 #include <sys/ioctl.h>
8 #include <sys/ptrace.h>
9 #include <sys/resource.h>
10 #include <sys/sysinfo.h>
11 #include <sys/uio.h>
12 #include <sys/wait.h>
13 #include <syscall.h>
14 #include <unistd.h>
15 
16 #include <algorithm>
17 #include <atomic>
18 #include <cerrno>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 #include <memory>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "absl/base/macros.h"
28 #include "absl/cleanup/cleanup.h"
29 #include "absl/log/check.h"
30 #include "absl/log/log.h"
31 #include "absl/status/status.h"
32 #include "absl/status/statusor.h"
33 #include "absl/strings/str_cat.h"
34 #include "absl/synchronization/mutex.h"
35 #include "absl/synchronization/notification.h"
36 #include "absl/time/clock.h"
37 #include "absl/time/time.h"
38 #include "absl/types/span.h"
39 #include "sandboxed_api/config.h"
40 #include "sandboxed_api/sandbox2/bpf_evaluator.h"
41 #include "sandboxed_api/sandbox2/client.h"
42 #include "sandboxed_api/sandbox2/executor.h"
43 #include "sandboxed_api/sandbox2/forkserver.pb.h"
44 #include "sandboxed_api/sandbox2/monitor_base.h"
45 #include "sandboxed_api/sandbox2/notify.h"
46 #include "sandboxed_api/sandbox2/policy.h"
47 #include "sandboxed_api/sandbox2/result.h"
48 #include "sandboxed_api/util/fileops.h"
49 #include "sandboxed_api/util/status_macros.h"
50 #include "sandboxed_api/util/thread.h"
51 
52 #ifndef SECCOMP_RET_USER_NOTIF
53 #define SECCOMP_RET_USER_NOTIF 0x7fc00000U /* notifies userspace */
54 #endif
55 
56 #ifndef SECCOMP_USER_NOTIF_FLAG_CONTINUE
57 #define SECCOMP_USER_NOTIF_FLAG_CONTINUE 1
58 #endif
59 
60 #define DO_USER_NOTIF BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_USER_NOTIF)
61 
62 #ifndef SECCOMP_GET_NOTIF_SIZES
63 #define SECCOMP_GET_NOTIF_SIZES 3
64 
65 struct seccomp_notif_sizes {
66   __u16 seccomp_notif;
67   __u16 seccomp_notif_resp;
68   __u16 seccomp_data;
69 };
70 #endif
71 
72 #ifndef SECCOMP_IOCTL_NOTIF_RECV
73 #ifndef SECCOMP_IOWR
74 #define SECCOMP_IOC_MAGIC '!'
75 #define SECCOMP_IO(nr) _IO(SECCOMP_IOC_MAGIC, nr)
76 #define SECCOMP_IOWR(nr, type) _IOWR(SECCOMP_IOC_MAGIC, nr, type)
77 #endif
78 
79 // Flags for seccomp notification fd ioctl.
80 #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
81 #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, struct seccomp_notif_resp)
82 #endif
83 
84 namespace sandbox2 {
85 
86 namespace {
87 
88 using ::sapi::file_util::fileops::FDCloser;
89 
seccomp(unsigned int operation,unsigned int flags,void * args)90 int seccomp(unsigned int operation, unsigned int flags, void* args) {
91   return syscall(SYS_seccomp, operation, flags, args);
92 }
93 
AuditArchToCPUArch(uint32_t arch)94 sapi::cpu::Architecture AuditArchToCPUArch(uint32_t arch) {
95   switch (arch) {
96     case AUDIT_ARCH_AARCH64:
97       return sapi::cpu::Architecture::kArm64;
98     case AUDIT_ARCH_ARM:
99       return sapi::cpu::Architecture::kArm;
100     case AUDIT_ARCH_X86_64:
101       return sapi::cpu::Architecture::kX8664;
102     case AUDIT_ARCH_I386:
103       return sapi::cpu::Architecture::kX86;
104     case AUDIT_ARCH_PPC64LE:
105       return sapi::cpu::Architecture::kPPC64LE;
106     default:
107       return sapi::cpu::Architecture::kUnknown;
108   }
109 }
110 
WaitForFdReadable(int fd,absl::Time deadline)111 absl::Status WaitForFdReadable(int fd, absl::Time deadline) {
112   pollfd pfds[] = {
113       {.fd = fd, .events = POLLIN},
114   };
115   for (absl::Duration remaining = deadline - absl::Now();
116        remaining > absl::ZeroDuration(); remaining = deadline - absl::Now()) {
117     int ret = poll(pfds, ABSL_ARRAYSIZE(pfds),
118                    static_cast<int>(absl::ToInt64Milliseconds(remaining)));
119     if (ret > 0) {
120       if (pfds[0].revents & POLLIN) {
121         return absl::OkStatus();
122       }
123       if (pfds[0].revents & POLLHUP) {
124         return absl::UnavailableError("hangup");
125       }
126       return absl::InternalError("poll");
127     }
128     if (ret == -1 && errno != EINTR) {
129       return absl::ErrnoToStatus(errno, "poll");
130     }
131   }
132   return absl::DeadlineExceededError("waiting for fd");
133 }
134 
ReadWholeWithDeadline(int fd,std::vector<iovec> vecs_vec,absl::Time deadline)135 absl::Status ReadWholeWithDeadline(int fd, std::vector<iovec> vecs_vec,
136                                    absl::Time deadline) {
137   absl::Span<iovec> vecs = absl::MakeSpan(vecs_vec);
138   while (!vecs.empty()) {
139     SAPI_RETURN_IF_ERROR(WaitForFdReadable(fd, deadline));
140     ssize_t r = readv(fd, vecs.data(), vecs.size());
141     if (r < 0 && errno != EINTR) {
142       return absl::ErrnoToStatus(errno, "readv");
143     }
144     while (r > 0) {
145       if (vecs.empty()) {
146         return absl::InternalError("readv return value too big");
147       }
148       iovec& vec = vecs.front();
149       if (r < vec.iov_len) {
150         vec.iov_len -= r;
151         vec.iov_base = reinterpret_cast<char*>(vec.iov_base) + r;
152         break;
153       }
154       r -= vec.iov_len;
155       vecs.remove_prefix(1);
156     }
157   }
158   return absl::OkStatus();
159 }
160 
161 }  // namespace
162 
UnotifyMonitor(Executor * executor,Policy * policy,Notify * notify)163 UnotifyMonitor::UnotifyMonitor(Executor* executor, Policy* policy,
164                                Notify* notify)
165     : MonitorBase(executor, policy, notify) {
166   type_ = FORKSERVER_MONITOR_UNOTIFY;
167   if (executor_->limits()->wall_time_limit() != absl::ZeroDuration()) {
168     auto deadline = absl::Now() + executor_->limits()->wall_time_limit();
169     deadline_millis_.store(absl::ToUnixMillis(deadline),
170                            std::memory_order_relaxed);
171   }
172   external_kill_request_flag_.test_and_set(std::memory_order_relaxed);
173   dump_stack_request_flag_.test_and_set(std::memory_order_relaxed);
174 }
175 
RunInternal()176 void UnotifyMonitor::RunInternal() {
177   thread_ = sapi::Thread(this, &UnotifyMonitor::Run, "sandbox2-Monitor");
178 
179   // Wait for the Monitor to set-up the sandboxee correctly (or fail while
180   // doing that). From here on, it is safe to use the IPC object for
181   // non-sandbox-related data exchange.
182   setup_notification_.WaitForNotification();
183 }
184 
SendPolicy(const std::vector<sock_filter> & policy)185 absl::Status UnotifyMonitor::SendPolicy(
186     const std::vector<sock_filter>& policy) {
187   original_policy_ = policy;
188   std::vector<sock_filter> modified_policy = policy;
189   const sock_filter trace_action = SANDBOX2_TRACE;
190   for (sock_filter& filter : modified_policy) {
191     if ((filter.code == BPF_RET + BPF_K && filter.k == SECCOMP_RET_KILL) ||
192         (filter.code == trace_action.code && filter.k == trace_action.k)) {
193       filter = DO_USER_NOTIF;
194     }
195   }
196   return MonitorBase::SendPolicy(modified_policy);
197 }
198 
HandleViolation(const Syscall & syscall)199 void UnotifyMonitor::HandleViolation(const Syscall& syscall) {
200   ViolationType violation_type = syscall.arch() == Syscall::GetHostArch()
201                                      ? ViolationType::kSyscall
202                                      : ViolationType::kArchitectureSwitch;
203   LogSyscallViolation(syscall);
204   notify_->EventSyscallViolation(syscall, violation_type);
205   MaybeGetStackTrace(req_->pid, Result::VIOLATION);
206   SetExitStatusCode(Result::VIOLATION, syscall.nr());
207   notify_->EventSyscallViolation(syscall, violation_type);
208   result_.SetSyscall(std::make_unique<Syscall>(syscall));
209   KillSandboxee();
210 }
211 
AllowSyscallViaUnotify()212 void UnotifyMonitor::AllowSyscallViaUnotify() {
213   memset(resp_.get(), 0, resp_size_);
214   resp_->id = req_->id;
215   resp_->val = 0;
216   resp_->error = 0;
217   resp_->flags = SECCOMP_USER_NOTIF_FLAG_CONTINUE;
218   if (ioctl(seccomp_notify_fd_.get(), SECCOMP_IOCTL_NOTIF_SEND, resp_.get()) !=
219       0) {
220     if (errno == ENOENT) {
221       VLOG(1) << "Unotify send failed with ENOENT";
222     } else {
223       LOG_IF(ERROR, errno == EINVAL)
224           << "Unotify send failed with EINVAL. Likely "
225              "SECCOMP_USER_NOTIF_FLAG_CONTINUE unsupported by the kernel.";
226       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_NOTIFY);
227     }
228   }
229 }
230 
HandleUnotify()231 void UnotifyMonitor::HandleUnotify() {
232   memset(req_.get(), 0, req_size_);
233   if (ioctl(seccomp_notify_fd_.get(), SECCOMP_IOCTL_NOTIF_RECV, req_.get()) !=
234       0) {
235     if (errno == ENOENT) {
236       VLOG(1) << "Unotify recv failed with ENOENT";
237     } else {
238       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_NOTIFY);
239     }
240     return;
241   }
242   Syscall syscall(AuditArchToCPUArch(req_->data.arch), req_->data.nr,
243                   {req_->data.args[0], req_->data.args[1], req_->data.args[2],
244                    req_->data.args[3], req_->data.args[4], req_->data.args[5]},
245                   req_->pid, 0, req_->data.instruction_pointer);
246   absl::StatusOr<uint32_t> policy_ret =
247       bpf::Evaluate(original_policy_, req_->data);
248   if (!policy_ret.ok()) {
249     LOG(ERROR) << "Failed to evaluate policy: " << policy_ret.status();
250     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_NOTIFY);
251   }
252   const sock_filter trace_action = SANDBOX2_TRACE;
253   bool should_trace = *policy_ret == trace_action.k;
254   Notify::TraceAction trace_response = Notify::TraceAction::kDeny;
255   if (should_trace) {
256     trace_response = notify_->EventSyscallTrace(syscall);
257   }
258   switch (trace_response) {
259     case Notify::TraceAction::kAllow:
260       AllowSyscallViaUnotify();
261       return;
262     case Notify::TraceAction::kDeny:
263       HandleViolation(syscall);
264       return;
265     case Notify::TraceAction::kInspectAfterReturn:
266       LOG(FATAL) << "TraceAction::kInspectAfterReturn not supported by unotify "
267                     "monitor";
268     default:
269       LOG(FATAL) << "Unknown TraceAction: " << static_cast<int>(trace_response);
270   }
271 }
272 
Run()273 void UnotifyMonitor::Run() {
274   absl::Cleanup monitor_done = [this] {
275     getrusage(RUSAGE_THREAD, result_.GetRUsageMonitor());
276     OnDone();
277   };
278 
279   absl::Cleanup setup_notify = [this] { setup_notification_.Notify(); };
280   if (!InitSetupUnotify()) {
281     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY);
282     return;
283   }
284   if (!InitSetupNotifyEventFd()) {
285     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_NOTIFY);
286     return;
287   }
288 
289   std::move(setup_notify).Invoke();
290 
291   pollfd pfds[] = {
292       {.fd = process_.status_fd.get(), .events = POLLIN},
293       {.fd = seccomp_notify_fd_.get(), .events = POLLIN},
294       {.fd = monitor_notify_fd_.get(), .events = POLLIN},
295   };
296   while (result_.final_status() == Result::UNSET) {
297     int64_t deadline = deadline_millis_.load(std::memory_order_relaxed);
298     absl::Duration remaining = absl::FromUnixMillis(deadline) - absl::Now();
299     if (deadline != 0 && remaining <= absl::ZeroDuration()) {
300       VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
301       timed_out_ = true;
302       MaybeGetStackTrace(process_.main_pid, Result::TIMEOUT);
303       KillSandboxee();
304       SetExitStatusFromStatusPipe();
305       break;
306     }
307 
308     if (!external_kill_request_flag_.test_and_set(std::memory_order_relaxed)) {
309       external_kill_ = true;
310       MaybeGetStackTrace(process_.main_pid, Result::EXTERNAL_KILL);
311       KillSandboxee();
312       SetExitStatusFromStatusPipe();
313       break;
314     }
315 
316     if (network_proxy_server_ &&
317         network_proxy_server_->violation_occurred_.load(
318             std::memory_order_acquire) &&
319         !network_violation_) {
320       network_violation_ = true;
321       MaybeGetStackTrace(process_.main_pid, Result::VIOLATION);
322       KillSandboxee();
323       SetExitStatusFromStatusPipe();
324       break;
325     }
326     constexpr int64_t kMinWakeupMsec = 30000;
327     int timeout_msec = kMinWakeupMsec;
328     if (remaining > absl::ZeroDuration()) {
329       timeout_msec = static_cast<int>(
330           std::min(kMinWakeupMsec, absl::ToInt64Milliseconds(remaining)));
331     }
332     int ret = poll(pfds, ABSL_ARRAYSIZE(pfds), timeout_msec);
333     if (ret == 0 || (ret == -1 && errno == EINTR)) {
334       continue;
335     }
336     if (ret == -1) {
337       PLOG(ERROR) << "waiting for action failed";
338       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_MONITOR);
339       break;
340     }
341     if (pfds[2].revents & POLLIN) {
342       uint64_t value = 0;
343       (void)read(monitor_notify_fd_.get(), &value, sizeof(value));
344       continue;
345     }
346     if (pfds[0].revents & POLLIN) {
347       SetExitStatusFromStatusPipe();
348       break;
349     }
350     if (pfds[0].revents & POLLHUP) {
351       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_MONITOR);
352       break;
353     }
354     if (pfds[1].revents & POLLIN) {
355       HandleUnotify();
356     }
357   }
358   KillInit();
359 }
360 
SetExitStatusFromStatusPipe()361 void UnotifyMonitor::SetExitStatusFromStatusPipe() {
362   int code, status;
363   rusage usage;
364 
365   std::vector<iovec> iov = {
366       {.iov_base = &code, .iov_len = sizeof(code)},
367       {.iov_base = &status, .iov_len = sizeof(status)},
368       {.iov_base = &usage, .iov_len = sizeof(usage)},
369   };
370 
371   if (absl::Status status = ReadWholeWithDeadline(
372           process_.status_fd.get(), iov, absl::Now() + absl::Seconds(1));
373       !status.ok()) {
374     PLOG(ERROR) << "reading status pipe failed " << status;
375     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_MONITOR);
376     return;
377   }
378 
379   result_.SetRUsageSandboxee(usage);
380   if (code == CLD_EXITED) {
381     SetExitStatusCode(Result::OK, status);
382   } else if (code == CLD_KILLED || code == CLD_DUMPED) {
383     if (network_violation_) {
384       SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
385       result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
386     } else if (external_kill_) {
387       SetExitStatusCode(Result::EXTERNAL_KILL, 0);
388     } else if (timed_out_) {
389       SetExitStatusCode(Result::TIMEOUT, 0);
390     } else {
391       SetExitStatusCode(Result::SIGNALED, status);
392     }
393   } else {
394     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_MONITOR);
395   }
396 }
397 
InitSetupUnotify()398 bool UnotifyMonitor::InitSetupUnotify() {
399   if (!comms_->SendUint32(Client::kSandbox2ClientUnotify)) {
400     LOG(ERROR) << "Couldn't send Client::kSandbox2ClientUnotify message";
401     return false;
402   }
403   int fd;
404   if (!comms_->RecvFD(&fd)) {
405     LOG(ERROR) << "Couldn't recv unotify fd";
406     return false;
407   }
408   seccomp_notify_fd_ = FDCloser(fd);
409   struct seccomp_notif_sizes sizes = {};
410   if (seccomp(SECCOMP_GET_NOTIF_SIZES, 0, &sizes) == -1) {
411     LOG(ERROR) << "Couldn't get seccomp_notif_sizes";
412     return false;
413   }
414   req_size_ = sizes.seccomp_notif;
415   req_.reset(static_cast<seccomp_notif*>(malloc(req_size_)));
416   resp_size_ = sizes.seccomp_notif_resp;
417   resp_.reset(static_cast<seccomp_notif_resp*>(malloc(resp_size_)));
418   return true;
419 }
420 
InitSetupNotifyEventFd()421 bool UnotifyMonitor::InitSetupNotifyEventFd() {
422   int fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
423   if (fd == -1) {
424     PLOG(ERROR) << "failed creating monitor pipe";
425     return false;
426   }
427   monitor_notify_fd_ = FDCloser(fd);
428   return true;
429 }
430 
NotifyMonitor()431 void UnotifyMonitor::NotifyMonitor() {
432   absl::ReaderMutexLock lock(&notify_mutex_);
433   if (monitor_notify_fd_.get() < 0) {
434     return;
435   }
436   uint64_t value = 1;
437   PCHECK(write(monitor_notify_fd_.get(), &value, sizeof(value)) ==
438          sizeof(value));
439 }
440 
KillSandboxee()441 bool UnotifyMonitor::KillSandboxee() {
442   VLOG(1) << "Sending SIGKILL to the PID: " << process_.main_pid;
443   if (kill(process_.main_pid, SIGKILL) != 0) {
444     PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.main_pid;
445     return false;
446   }
447   return true;
448 }
449 
KillInit()450 void UnotifyMonitor::KillInit() {
451   VLOG(1) << "Sending SIGKILL to the PID: " << process_.init_pid;
452   if (kill(process_.init_pid, SIGKILL) != 0) {
453     PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.init_pid;
454   }
455 }
456 
Join()457 void UnotifyMonitor::Join() {
458   absl::MutexLock lock(&notify_mutex_);
459   if (thread_.IsJoinable()) {
460     thread_.Join();
461     CHECK(IsDone()) << "Monitor did not terminate";
462     VLOG(1) << "Final execution status: " << result_.ToString();
463     CHECK(result_.final_status() != Result::UNSET);
464     monitor_notify_fd_.Close();
465   }
466 }
467 
MaybeGetStackTrace(pid_t pid,Result::StatusEnum status)468 void UnotifyMonitor::MaybeGetStackTrace(pid_t pid, Result::StatusEnum status) {
469   if (ShouldCollectStackTrace(status)) {
470     auto stack = GetStackTrace(pid);
471     if (stack.ok()) {
472       result_.set_stack_trace(*stack);
473     } else {
474       LOG(ERROR) << "Getting stack trace: " << stack.status();
475     }
476   }
477 }
478 
GetStackTrace(pid_t pid)479 absl::StatusOr<std::vector<std::string>> UnotifyMonitor::GetStackTrace(
480     pid_t pid) {
481   if (ptrace(PTRACE_ATTACH, pid, 0, 0) != 0) {
482     return absl::ErrnoToStatus(errno,
483                                absl::StrCat("could not attach to pid = ", pid));
484   }
485   int wstatus = 0;
486   while (!WIFSTOPPED(wstatus)) {
487     pid_t ret =
488         waitpid(pid, &wstatus, __WNOTHREAD | __WALL | WUNTRACED | WNOHANG);
489     if (ret == -1) {
490       return absl::ErrnoToStatus(errno,
491                                  absl::StrCat("waiting for stop, pid = ", pid));
492     }
493   }
494   absl::Cleanup cleanup = [pid] {
495     if (ptrace(PTRACE_DETACH, pid, 0, 0) != 0) {
496       LOG(ERROR) << "Could not detach after obtaining stack trace from pid = "
497                  << pid;
498     }
499   };
500   Regs regs(pid);
501   absl::Status status = regs.Fetch();
502   if (!status.ok()) {
503     if (absl::IsNotFound(status)) {
504       LOG(WARNING) << "failed to fetch regs: " << status;
505       return status;
506     }
507     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
508     return status;
509   }
510   return GetAndLogStackTrace(&regs);
511 }
512 
513 }  // namespace sandbox2
514