1 // Copyright 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::PtraceMonitor class.
16
17 #include "sandboxed_api/sandbox2/monitor_ptrace.h"
18
19 #include <sys/ptrace.h>
20 #include <sys/resource.h>
21 #include <sys/wait.h>
22 #include <syscall.h>
23 #include <unistd.h>
24
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <cstdint>
29 #include <ctime>
30 #include <fstream>
31 #include <ios>
32 #include <memory>
33 #include <sstream>
34 #include <string>
35 #include <utility>
36 #include <vector>
37
38 #include "absl/base/optimization.h"
39 #include "absl/cleanup/cleanup.h"
40 #include "absl/container/flat_hash_map.h"
41 #include "absl/container/flat_hash_set.h"
42 #include "absl/flags/declare.h"
43 #include "absl/flags/flag.h"
44 #include "absl/log/check.h"
45 #include "absl/log/log.h"
46 #include "absl/log/vlog_is_on.h"
47 #include "absl/status/status.h"
48 #include "absl/status/statusor.h"
49 #include "absl/strings/str_cat.h"
50 #include "absl/strings/str_format.h"
51 #include "absl/strings/string_view.h"
52 #include "absl/synchronization/mutex.h"
53 #include "absl/synchronization/notification.h"
54 #include "absl/time/clock.h"
55 #include "absl/time/time.h"
56 #include "sandboxed_api/config.h"
57 #include "sandboxed_api/sandbox2/client.h"
58 #include "sandboxed_api/sandbox2/comms.h"
59 #include "sandboxed_api/sandbox2/executor.h"
60 #include "sandboxed_api/sandbox2/notify.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/regs.h"
63 #include "sandboxed_api/sandbox2/result.h"
64 #include "sandboxed_api/sandbox2/sanitizer.h"
65 #include "sandboxed_api/sandbox2/syscall.h"
66 #include "sandboxed_api/sandbox2/util.h"
67 #include "sandboxed_api/sandbox2/util/pid_waiter.h"
68 #include "sandboxed_api/util/status_macros.h"
69 #include "sandboxed_api/util/thread.h"
70
71 ABSL_FLAG(bool, sandbox2_log_all_stack_traces, false,
72 "If set, sandbox2 monitor will log stack traces of all monitored "
73 "threads/processes that are reported to terminate with a signal.");
74
75 ABSL_FLAG(bool, sandbox2_monitor_ptrace_use_deadline_manager, false,
76 "If set, ptrace monitor will use deadline manager to enforce "
77 "deadlines and as notification mechanism");
78
79 ABSL_FLAG(bool, sandbox2_log_unobtainable_stack_traces_errors, true,
80 "If set, unobtainable stack trace will be logged as errors.");
81
82 ABSL_FLAG(absl::Duration, sandbox2_stack_traces_collection_timeout,
83 absl::Seconds(1),
84 "How much time should be spent on logging threads' stack traces on "
85 "monitor shut down. Only relevent when collection of all stack "
86 "traces is enabled.");
87
88 ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
89
90 namespace sandbox2 {
91 namespace {
92
93 // We could use the ProcMapsIterator, however we want the full file content.
ReadProcMaps(pid_t pid)94 std::string ReadProcMaps(pid_t pid) {
95 std::ifstream input(absl::StrCat("/proc/", pid, "/maps"),
96 std::ios_base::in | std::ios_base::binary);
97 std::ostringstream contents;
98 contents << input.rdbuf();
99 return contents.str();
100 }
101
ContinueProcess(pid_t pid,int signo)102 void ContinueProcess(pid_t pid, int signo) {
103 if (ptrace(PTRACE_CONT, pid, 0, signo) == -1) {
104 if (errno == ESRCH) {
105 LOG(WARNING) << "Process " << pid
106 << " died while trying to PTRACE_CONT it";
107 } else {
108 PLOG(ERROR) << "ptrace(PTRACE_CONT, pid=" << pid << ", sig=" << signo
109 << ")";
110 }
111 }
112 }
113
StopProcess(pid_t pid,int signo)114 void StopProcess(pid_t pid, int signo) {
115 if (ptrace(PTRACE_LISTEN, pid, 0, signo) == -1) {
116 if (errno == ESRCH) {
117 LOG(WARNING) << "Process " << pid
118 << " died while trying to PTRACE_LISTEN it";
119 } else {
120 PLOG(ERROR) << "ptrace(PTRACE_LISTEN, pid=" << pid << ", sig=" << signo
121 << ")";
122 }
123 }
124 }
125
CompleteSyscall(pid_t pid,int signo)126 void CompleteSyscall(pid_t pid, int signo) {
127 if (ptrace(PTRACE_SYSCALL, pid, 0, signo) == -1) {
128 if (errno == ESRCH) {
129 LOG(WARNING) << "Process " << pid
130 << " died while trying to PTRACE_SYSCALL it";
131 } else {
132 PLOG(ERROR) << "ptrace(PTRACE_SYSCALL, pid=" << pid << ", sig=" << signo
133 << ")";
134 }
135 }
136 }
137
138 } // namespace
139
PtraceMonitor(Executor * executor,Policy * policy,Notify * notify)140 PtraceMonitor::PtraceMonitor(Executor* executor, Policy* policy, Notify* notify)
141 : MonitorBase(executor, policy, notify),
142 wait_for_execve_(executor->enable_sandboxing_pre_execve_) {
143 if (executor_->limits()->wall_time_limit() != absl::ZeroDuration()) {
144 auto deadline = absl::Now() + executor_->limits()->wall_time_limit();
145 deadline_millis_.store(absl::ToUnixMillis(deadline),
146 std::memory_order_relaxed);
147 }
148 external_kill_request_flag_.test_and_set(std::memory_order_relaxed);
149 dump_stack_request_flag_.test_and_set(std::memory_order_relaxed);
150 use_deadline_manager_ =
151 absl::GetFlag(FLAGS_sandbox2_monitor_ptrace_use_deadline_manager);
152 }
153
IsActivelyMonitoring()154 bool PtraceMonitor::IsActivelyMonitoring() {
155 // If we're still waiting for execve(), then we allow all syscalls.
156 return !wait_for_execve_;
157 }
158
SetActivelyMonitoring()159 void PtraceMonitor::SetActivelyMonitoring() { wait_for_execve_ = false; }
160
SetAdditionalResultInfo(std::unique_ptr<Regs> regs)161 void PtraceMonitor::SetAdditionalResultInfo(std::unique_ptr<Regs> regs) {
162 pid_t pid = regs->pid();
163 result_.SetRegs(std::move(regs));
164 result_.SetProgName(util::GetProgName(pid));
165 result_.SetProcMaps(ReadProcMaps(pid));
166 if (!ShouldCollectStackTrace(result_.final_status())) {
167 VLOG(1) << "Stack traces have been disabled";
168 return;
169 }
170
171 absl::StatusOr<std::vector<std::string>> stack_trace =
172 GetAndLogStackTrace(result_.GetRegs());
173 if (!stack_trace.ok()) {
174 LOG_IF(ERROR,
175 absl::GetFlag(FLAGS_sandbox2_log_unobtainable_stack_traces_errors))
176 << "Could not obtain stack trace: " << stack_trace.status();
177 return;
178 }
179 result_.set_stack_trace(*stack_trace);
180 }
181
KillSandboxee()182 bool PtraceMonitor::KillSandboxee() {
183 VLOG(1) << "Sending SIGKILL to the PID: " << process_.main_pid;
184 if (kill(process_.main_pid, SIGKILL) != 0) {
185 PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.main_pid;
186 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_KILL);
187 return false;
188 }
189 constexpr absl::Duration kGracefullKillTimeout = absl::Milliseconds(1000);
190 if (hard_deadline_ == absl::InfiniteFuture()) {
191 hard_deadline_ = absl::Now() + kGracefullKillTimeout;
192 }
193 return true;
194 }
195
InterruptSandboxee()196 bool PtraceMonitor::InterruptSandboxee() {
197 if (ptrace(PTRACE_INTERRUPT, process_.main_pid, 0, 0) == -1) {
198 PLOG(ERROR) << "Could not send interrupt to pid=" << process_.main_pid;
199 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INTERRUPT);
200 return false;
201 }
202 return true;
203 }
204
205 // Not defined in glibc.
206 #define __WPTRACEEVENT(x) ((x & 0xff0000) >> 16)
207
NotifyMonitor()208 void PtraceMonitor::NotifyMonitor() {
209 if (use_deadline_manager_) {
210 pid_waiter_.Notify();
211 } else {
212 absl::MutexLock lock(&thread_mutex_);
213 if (thread_.IsJoinable()) {
214 pthread_kill(thread_.handle(), SIGCHLD);
215 }
216 }
217 }
218
Join()219 void PtraceMonitor::Join() {
220 absl::MutexLock lock(&thread_mutex_);
221 if (thread_.IsJoinable()) {
222 thread_.Join();
223 CHECK(IsDone()) << "Monitor did not terminate";
224 VLOG(1) << "Final execution status: " << result_.ToString();
225 CHECK(result_.final_status() != Result::UNSET);
226 }
227 }
228
RunInternal()229 void PtraceMonitor::RunInternal() {
230 {
231 absl::MutexLock lock(&thread_mutex_);
232 thread_ = sapi::Thread(this, &PtraceMonitor::Run, "sandbox2-Monitor");
233 }
234
235 // Wait for the Monitor to set-up the sandboxee correctly (or fail while
236 // doing that). From here on, it is safe to use the IPC object for
237 // non-sandbox-related data exchange.
238 setup_notification_.WaitForNotification();
239 }
240
Run()241 void PtraceMonitor::Run() {
242 absl::Cleanup monitor_done = [this] {
243 getrusage(RUSAGE_THREAD, result_.GetRUsageMonitor());
244 OnDone();
245 };
246
247 absl::Cleanup setup_notify = [this] { setup_notification_.Notify(); };
248 // It'd be costly to initialize the sigset_t for each sigtimedwait()
249 // invocation, so do it once per Monitor.
250 if (!use_deadline_manager_ && !InitSetupSignals()) {
251 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SIGNALS);
252 return;
253 }
254 // This call should be the last in the init sequence, because it can cause the
255 // sandboxee to enter ptrace-stopped state, in which it will not be able to
256 // send any messages over the Comms channel.
257 if (!InitPtraceAttach()) {
258 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_PTRACE);
259 return;
260 }
261
262 // Tell the parent thread (Sandbox2 object) that we're done with the initial
263 // set-up process of the sandboxee.
264 std::move(setup_notify).Invoke();
265
266 bool sandboxee_exited = false;
267 pid_waiter_.SetPriorityPid(process_.main_pid);
268 int status;
269 // All possible still running children of main process, will be killed due to
270 // PTRACE_O_EXITKILL ptrace() flag.
271 while (result().final_status() == Result::UNSET) {
272 if (absl::Now() >= hard_deadline_) {
273 LOG(WARNING) << "Hard deadline exceeded (timed_out=" << timed_out_
274 << ", external_kill=" << external_kill_
275 << ", network_violation=" << network_violation_ << ").";
276 SetExitStatusCode(Result::TIMEOUT, 0);
277 break;
278 }
279 int64_t deadline = deadline_millis_.load(std::memory_order_relaxed);
280 if (deadline != 0 && absl::Now() >= absl::FromUnixMillis(deadline)) {
281 VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
282 timed_out_ = true;
283 if (!KillSandboxee()) {
284 break;
285 }
286 }
287
288 if (!dump_stack_request_flag_.test_and_set(std::memory_order_relaxed)) {
289 should_dump_stack_ = true;
290 if (!InterruptSandboxee()) {
291 break;
292 }
293 }
294
295 if (!external_kill_request_flag_.test_and_set(std::memory_order_relaxed)) {
296 external_kill_ = true;
297 if (!KillSandboxee()) {
298 break;
299 }
300 }
301
302 if (network_proxy_server_ &&
303 network_proxy_server_->violation_occurred_.load(
304 std::memory_order_acquire) &&
305 !network_violation_) {
306 network_violation_ = true;
307 if (!KillSandboxee()) {
308 break;
309 }
310 }
311 if (use_deadline_manager_) {
312 absl::Time effective_deadline = hard_deadline_;
313 if (deadline != 0 && hard_deadline_ == absl::InfiniteFuture()) {
314 effective_deadline = absl::FromUnixMillis(deadline);
315 }
316 pid_waiter_.SetDeadline(effective_deadline);
317 }
318 pid_t ret = pid_waiter_.Wait(&status);
319 if (ret == 0) {
320 if (!use_deadline_manager_) {
321 constexpr timespec ts = {kWakeUpPeriodSec, kWakeUpPeriodNSec};
322 int signo = sigtimedwait(&sset_, nullptr, &ts);
323 LOG_IF(ERROR, signo != -1 && signo != SIGCHLD)
324 << "Unknown signal received: " << signo;
325 }
326 continue;
327 }
328
329 if (ret == -1) {
330 if (errno == ECHILD) {
331 LOG(ERROR) << "PANIC(). The main process has not exited yet, "
332 << "yet we haven't seen its exit event";
333 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_CHILD);
334 } else if (!use_deadline_manager_ || errno != EINTR) {
335 PLOG(ERROR) << "waitpid() failed";
336 }
337 continue;
338 }
339
340 VLOG(3) << "waitpid() returned with PID: " << ret << ", status: " << status;
341
342 if (WIFEXITED(status)) {
343 VLOG(1) << "PID: " << ret
344 << " finished with code: " << WEXITSTATUS(status);
345 // That's the main process, set the exit code, and exit. It will kill
346 // all remaining processes (if there are any) because of the
347 // PTRACE_O_EXITKILL ptrace() flag.
348 if (ret == process_.main_pid) {
349 if (IsActivelyMonitoring()) {
350 SetExitStatusCode(Result::OK, WEXITSTATUS(status));
351 } else {
352 SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_MONITOR);
353 }
354 sandboxee_exited = true;
355 }
356 } else if (WIFSIGNALED(status)) {
357 // This usually does not happen, but might.
358 // Quote from the manual:
359 // A SIGKILL signal may still cause a PTRACE_EVENT_EXIT stop before
360 // actual signal death. This may be changed in the future;
361 VLOG(1) << "PID: " << ret << " terminated with signal: "
362 << util::GetSignalName(WTERMSIG(status));
363 if (ret == process_.main_pid) {
364 if (network_violation_) {
365 SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
366 result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
367 } else if (external_kill_) {
368 SetExitStatusCode(Result::EXTERNAL_KILL, 0);
369 } else if (timed_out_) {
370 SetExitStatusCode(Result::TIMEOUT, 0);
371 } else {
372 SetExitStatusCode(Result::SIGNALED, WTERMSIG(status));
373 }
374 sandboxee_exited = true;
375 }
376 } else if (WIFSTOPPED(status)) {
377 VLOG(2) << "PID: " << ret
378 << " received signal: " << util::GetSignalName(WSTOPSIG(status))
379 << " with event: "
380 << util::GetPtraceEventName(__WPTRACEEVENT(status));
381 StateProcessStopped(ret, status);
382 } else if (WIFCONTINUED(status)) {
383 VLOG(2) << "PID: " << ret << " is being continued";
384 }
385 }
386
387 if (!sandboxee_exited) {
388 const bool log_stack_traces =
389 result_.final_status() != Result::OK &&
390 absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
391 constexpr auto kGracefullExitTimeout = absl::Milliseconds(200);
392 auto deadline = absl::Now() + kGracefullExitTimeout;
393 if (log_stack_traces) {
394 deadline = absl::Now() +
395 absl::GetFlag(FLAGS_sandbox2_stack_traces_collection_timeout);
396 }
397 for (;;) {
398 auto left = deadline - absl::Now();
399 if (absl::Now() >= deadline) {
400 LOG(WARNING)
401 << "Waiting for sandboxee exit timed out. Sandboxee result: "
402 << result_.ToString();
403 break;
404 }
405 if (use_deadline_manager_) {
406 pid_waiter_.SetDeadline(deadline);
407 }
408 pid_t ret = pid_waiter_.Wait(&status);
409 if (ret == -1) {
410 if (use_deadline_manager_ && errno == EINTR) {
411 continue;
412 }
413 if (!log_stack_traces || ret != ECHILD) {
414 PLOG(ERROR) << "waitpid() failed";
415 }
416 break;
417 }
418 if (!log_stack_traces) {
419 if (ret == process_.main_pid &&
420 (WIFSIGNALED(status) || WIFEXITED(status))) {
421 break;
422 }
423 kill(process_.main_pid, SIGKILL);
424 }
425
426 if (ret == 0) {
427 if (!use_deadline_manager_) {
428 auto ts = absl::ToTimespec(left);
429 sigtimedwait(&sset_, nullptr, &ts);
430 }
431 continue;
432 }
433
434 if (WIFSTOPPED(status)) {
435 if (log_stack_traces) {
436 LogStackTraceOfPid(ret);
437 }
438
439 if (__WPTRACEEVENT(status) == PTRACE_EVENT_EXIT) {
440 VLOG(2) << "PID: " << ret << " PTRACE_EVENT_EXIT ";
441 ContinueProcess(ret, 0);
442 continue;
443 }
444 }
445 }
446 }
447 }
448
LogStackTraceOfPid(pid_t pid)449 void PtraceMonitor::LogStackTraceOfPid(pid_t pid) {
450 if (!StackTraceCollectionPossible()) {
451 return;
452 }
453
454 Regs regs(pid);
455 if (auto status = regs.Fetch(); !status.ok()) {
456 LOG(ERROR) << "Failed to get regs, PID:" << pid << " status:" << status;
457 return;
458 }
459
460 if (auto stack_trace = GetAndLogStackTrace(®s); !stack_trace.ok()) {
461 LOG(ERROR) << "Failed to get stack trace, PID:" << pid
462 << " status:" << stack_trace.status();
463 }
464 }
465
InitSetupSignals()466 bool PtraceMonitor::InitSetupSignals() {
467 if (sigemptyset(&sset_) == -1) {
468 PLOG(ERROR) << "sigemptyset()";
469 return false;
470 }
471
472 // sigtimedwait will react (wake-up) to arrival of this signal.
473 if (sigaddset(&sset_, SIGCHLD) == -1) {
474 PLOG(ERROR) << "sigaddset(SIGCHLD)";
475 return false;
476 }
477
478 if (pthread_sigmask(SIG_BLOCK, &sset_, nullptr) == -1) {
479 PLOG(ERROR) << "pthread_sigmask(SIG_BLOCK, SIGCHLD)";
480 return false;
481 }
482
483 return true;
484 }
485
TryAttach(const absl::flat_hash_set<int> & tasks,absl::Time deadline,absl::flat_hash_set<int> & tasks_attached)486 absl::Status TryAttach(const absl::flat_hash_set<int>& tasks,
487 absl::Time deadline,
488 absl::flat_hash_set<int>& tasks_attached) {
489 constexpr intptr_t kPtraceOptions =
490 PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK |
491 PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC |
492 PTRACE_O_TRACEEXIT | PTRACE_O_TRACESECCOMP | PTRACE_O_EXITKILL;
493 auto format_ptrace_error = [](int task, absl::string_view message) {
494 return absl::StrCat("ptrace(PTRACE_SEIZE, ", task, ", 0, ", "0x",
495 absl::Hex(kPtraceOptions), "): ", message);
496 };
497
498 absl::flat_hash_set<int> cur_tasks = tasks;
499 int retries = 0;
500
501 // In some situations we allow ptrace to try again when it fails.
502 while (!cur_tasks.empty()) {
503 absl::flat_hash_set<int> retry_tasks;
504 for (int task : cur_tasks) {
505 if (tasks_attached.contains(task)) {
506 continue;
507 }
508 int ret = ptrace(PTRACE_SEIZE, task, 0, kPtraceOptions);
509 if (ret != 0) {
510 if (errno == EPERM) {
511 // Sometimes when a task is exiting we can get an EPERM from ptrace.
512 // Let's try again up until the timeout in this situation.
513 PLOG(WARNING) << format_ptrace_error(task, "Retrying after EPERM");
514 retry_tasks.insert(task);
515 continue;
516 }
517 if (errno == ESRCH) {
518 // A task may have exited since we captured the task list, we will
519 // allow things to continue after we log a warning.
520 PLOG(WARNING) << format_ptrace_error(
521 task, "Skipping exited task. Continuing with other tasks.");
522 continue;
523 }
524 // Any other errno will be considered a failure.
525 return absl::ErrnoToStatus(errno, format_ptrace_error(task, "Failure"));
526 }
527 tasks_attached.insert(task);
528 }
529 if (!retry_tasks.empty()) {
530 if (absl::Now() >= deadline) {
531 return absl::DeadlineExceededError(absl::StrCat(
532 "Attaching to sandboxee timed out: could not attach to ",
533 cur_tasks.size(), " tasks"));
534 }
535 // Exponential Backoff.
536 constexpr absl::Duration kInitialRetry = absl::Milliseconds(1);
537 constexpr absl::Duration kMaxRetry = absl::Milliseconds(20);
538 const absl::Duration retry_interval =
539 kInitialRetry * (1 << std::min(10, retries++));
540 absl::SleepFor(
541 std::min({retry_interval, kMaxRetry, deadline - absl::Now()}));
542 }
543 cur_tasks = std::move(retry_tasks);
544 }
545
546 return absl::OkStatus();
547 }
548
InitPtraceAttach()549 bool PtraceMonitor::InitPtraceAttach() {
550 if (process_.init_pid > 0) {
551 if (ptrace(PTRACE_SEIZE, process_.init_pid, 0, PTRACE_O_EXITKILL) != 0) {
552 if (errno != ESRCH) {
553 PLOG(ERROR) << "attaching to init process failed";
554 }
555 return false;
556 }
557 }
558
559 // Get a list of tasks.
560 absl::StatusOr<absl::flat_hash_set<int>> tasks =
561 sanitizer::GetListOfTasks(process_.main_pid);
562 if (!tasks.ok()) {
563 LOG(ERROR) << "Could not get list of tasks: " << tasks.status().message();
564 return false;
565 }
566
567 if (!tasks->contains(process_.main_pid)) {
568 LOG(ERROR) << "The pid " << process_.main_pid
569 << " was not found in its own tasklist.";
570 return false;
571 }
572
573 // With TSYNC, we can allow threads: seccomp applies to all threads.
574 if (tasks->size() > 1) {
575 LOG(WARNING) << "PID " << process_.main_pid << " has " << tasks->size()
576 << " threads, at the time of call to SandboxMeHere(). If you "
577 "are seeing more sandbox violations than expected, this "
578 "might be the reason why"
579 << ".";
580 }
581
582 absl::flat_hash_set<int> tasks_attached;
583 absl::Time deadline = absl::Now() + absl::Seconds(4);
584
585 constexpr int kMaxRetries = 3;
586 for (int retries = 0; retries < kMaxRetries && *tasks != tasks_attached;
587 ++retries) {
588 if (retries > 0) {
589 LOG(ERROR) << "PID " << process_.main_pid
590 << " spawned new threads while we were trying to attach to it "
591 "(attempt "
592 << retries << "/" << kMaxRetries << ")";
593 }
594 if (absl::Status status = TryAttach(*tasks, deadline, tasks_attached);
595 !status.ok()) {
596 LOG(ERROR) << status.message();
597 return false;
598 }
599
600 // Get a list of tasks after attaching.
601 tasks = sanitizer::GetListOfTasks(process_.main_pid);
602 if (!tasks.ok()) {
603 LOG(ERROR) << "Could not get list of tasks: " << tasks.status().message();
604 return false;
605 }
606 }
607 if (*tasks != tasks_attached) {
608 LOG(ERROR) << "PID " << process_.main_pid
609 << " spawned new threads while we were trying to attach to it "
610 "(retries exhausted)";
611 return false;
612 }
613
614 // No glibc wrapper for gettid - see 'man gettid'.
615 VLOG(1) << "Monitor (PID: " << getpid()
616 << ", TID: " << util::Syscall(__NR_gettid)
617 << ") attached to PID: " << process_.main_pid;
618
619 // Technically, the sandboxee can be in a ptrace-stopped state right now,
620 // because some signal could have arrived in the meantime. Yet, this
621 // Comms::SendUint32 call shouldn't lock our process, because the underlying
622 // socketpair() channel is buffered, hence it will accept the uint32_t message
623 // no matter what is the current state of the sandboxee, and it will allow for
624 // our process to continue and unlock the sandboxee with the proper ptrace
625 // event handling.
626 if (!comms_->SendUint32(Client::kSandbox2ClientDone)) {
627 LOG(ERROR) << "Couldn't send Client::kSandbox2ClientDone message";
628 return false;
629 }
630 return true;
631 }
632
ActionProcessSyscall(Regs * regs,const Syscall & syscall)633 void PtraceMonitor::ActionProcessSyscall(Regs* regs, const Syscall& syscall) {
634 // If the sandboxing is not enabled yet, allow the first __NR_execveat.
635 if (syscall.nr() == __NR_execveat && !IsActivelyMonitoring()) {
636 VLOG(1) << "[PERMITTED/BEFORE_EXECVEAT]: " << "SYSCALL ::: PID: "
637 << regs->pid() << ", PROG: '" << util::GetProgName(regs->pid())
638 << "' : " << syscall.GetDescription();
639 ContinueProcess(regs->pid(), 0);
640 return;
641 }
642
643 // Notify can decide whether we want to allow this syscall. It could be useful
644 // for sandbox setups in which some syscalls might still need some logging,
645 // but nonetheless be allowed ('permissible syscalls' in sandbox v1).
646 auto trace_response = notify_->EventSyscallTrace(syscall);
647 if (trace_response == Notify::TraceAction::kAllow) {
648 ContinueProcess(regs->pid(), 0);
649 return;
650 }
651 if (trace_response == Notify::TraceAction::kInspectAfterReturn) {
652 // Note that a process might die without an exit-stop before the syscall is
653 // completed (eg. a thread calls execve() and the thread group leader dies),
654 // so the entry is removed when the process exits.
655 syscalls_in_progress_[regs->pid()] = syscall;
656 CompleteSyscall(regs->pid(), 0);
657 return;
658 }
659
660 if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || log_file_) {
661 std::string syscall_description = syscall.GetDescription();
662 if (log_file_) {
663 PCHECK(absl::FPrintF(log_file_, "PID: %d %s\n", regs->pid(),
664 syscall_description) >= 0);
665 }
666 VLOG(1) << "PID: " << regs->pid() << " " << syscall_description;
667 ContinueProcess(regs->pid(), 0);
668 return;
669 }
670
671 ActionProcessSyscallViolation(regs, syscall, ViolationType::kSyscall);
672 }
673
ActionProcessSyscallViolation(Regs * regs,const Syscall & syscall,ViolationType violation_type)674 void PtraceMonitor::ActionProcessSyscallViolation(
675 Regs* regs, const Syscall& syscall, ViolationType violation_type) {
676 LogSyscallViolation(syscall);
677 notify_->EventSyscallViolation(syscall, violation_type);
678 SetExitStatusCode(Result::VIOLATION, syscall.nr());
679 result_.SetSyscall(std::make_unique<Syscall>(syscall));
680 SetAdditionalResultInfo(std::make_unique<Regs>(*regs));
681 // Rewrite the syscall argument to something invalid (-1).
682 // The process will be killed anyway so this is just a precaution.
683 auto status = regs->SkipSyscallReturnValue(-ENOSYS);
684 if (!status.ok()) {
685 LOG(ERROR) << status;
686 }
687 }
688
EventPtraceSeccomp(pid_t pid,int event_msg)689 void PtraceMonitor::EventPtraceSeccomp(pid_t pid, int event_msg) {
690 if (event_msg < sapi::cpu::Architecture::kUnknown ||
691 event_msg > sapi::cpu::Architecture::kMax) {
692 // We've observed that, if the process has exited, the event_msg may contain
693 // the exit status even though we haven't received the exit event yet.
694 // To work around this, if the event msg is not in the range of the known
695 // architectures, we assume that it's an exit status. We deal with it by
696 // ignoring this event, and we'll get the exit event in the next iteration.
697 LOG(WARNING) << "received event_msg for unknown architecture: " << event_msg
698 << "; the program may have exited";
699 return;
700 }
701
702 // If the seccomp-policy is using RET_TRACE, we request that it returns the
703 // syscall architecture identifier in the SECCOMP_RET_DATA.
704 const auto syscall_arch = static_cast<sapi::cpu::Architecture>(event_msg);
705 Regs regs(pid);
706 auto status = regs.Fetch();
707 if (!status.ok()) {
708 // Ignore if process is killed in the meanwhile
709 if (absl::IsNotFound(status)) {
710 LOG(WARNING) << "failed to fetch regs: " << status;
711 return;
712 }
713 LOG(ERROR) << "failed to fetch regs: " << status;
714 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
715 return;
716 }
717
718 Syscall syscall = regs.ToSyscall(syscall_arch);
719 // If the architecture of the syscall used is different that the current host
720 // architecture, report a violation.
721 if (syscall_arch != Syscall::GetHostArch()) {
722 ActionProcessSyscallViolation(®s, syscall,
723 ViolationType::kArchitectureSwitch);
724 return;
725 }
726
727 ActionProcessSyscall(®s, syscall);
728 }
729
EventSyscallExit(pid_t pid)730 void PtraceMonitor::EventSyscallExit(pid_t pid) {
731 // Check that the monitor wants to inspect the current syscall's return value.
732 auto index = syscalls_in_progress_.find(pid);
733 if (index == syscalls_in_progress_.end()) {
734 LOG(ERROR) << "Expected a syscall in progress in PID " << pid;
735 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
736 return;
737 }
738 Regs regs(pid);
739 auto status = regs.Fetch();
740 if (!status.ok()) {
741 // Ignore if process is killed in the meanwhile
742 if (absl::IsNotFound(status)) {
743 LOG(WARNING) << "failed to fetch regs: " << status;
744 return;
745 }
746 LOG(ERROR) << "failed to fetch regs: " << status;
747 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
748 return;
749 }
750 int64_t return_value = regs.GetReturnValue(sapi::host_cpu::Architecture());
751 notify_->EventSyscallReturn(index->second, return_value);
752 syscalls_in_progress_.erase(index);
753 ContinueProcess(pid, 0);
754 }
755
EventPtraceNewProcess(pid_t pid,int event_msg)756 void PtraceMonitor::EventPtraceNewProcess(pid_t pid, int event_msg) {
757 // ptrace doesn't issue syscall-exit-stops for successful fork/vfork/clone
758 // system calls. Check if the monitor wanted to inspect the syscall's return
759 // value, and call EventSyscallReturn for the parent process if so.
760 auto index = syscalls_in_progress_.find(pid);
761 if (index != syscalls_in_progress_.end()) {
762 auto syscall_nr = index->second.nr();
763 bool creating_new_process = syscall_nr == __NR_clone;
764 #ifdef __NR_clone3
765 creating_new_process = creating_new_process || syscall_nr == __NR_clone3;
766 #endif
767 #ifdef __NR_fork
768 creating_new_process = creating_new_process || syscall_nr == __NR_fork;
769 #endif
770 #ifdef __NR_vfork
771 creating_new_process = creating_new_process || syscall_nr == __NR_vfork;
772 #endif
773 if (!creating_new_process) {
774 LOG(ERROR) << "Expected a fork/vfork/clone syscall in progress in PID "
775 << pid << "; actual: " << index->second.GetDescription();
776 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
777 return;
778 }
779 notify_->EventSyscallReturn(index->second, event_msg);
780 syscalls_in_progress_.erase(index);
781 }
782 ContinueProcess(pid, 0);
783 }
784
EventPtraceExec(pid_t pid,int event_msg)785 void PtraceMonitor::EventPtraceExec(pid_t pid, int event_msg) {
786 if (!IsActivelyMonitoring()) {
787 VLOG(1) << "PTRACE_EVENT_EXEC seen from PID: " << event_msg
788 << ". SANDBOX ENABLED!";
789 SetActivelyMonitoring();
790 } else {
791 // ptrace doesn't issue syscall-exit-stops for successful execve/execveat
792 // system calls. Check if the monitor wanted to inspect the syscall's return
793 // value, and call EventSyscallReturn if so.
794 auto index = syscalls_in_progress_.find(pid);
795 if (index != syscalls_in_progress_.end()) {
796 auto syscall_nr = index->second.nr();
797 if (syscall_nr != __NR_execve && syscall_nr != __NR_execveat) {
798 LOG(ERROR) << "Expected an execve/execveat syscall in progress in PID "
799 << pid << "; actual: " << index->second.GetDescription();
800 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
801 return;
802 }
803 notify_->EventSyscallReturn(index->second, 0);
804 syscalls_in_progress_.erase(index);
805 }
806 }
807 ContinueProcess(pid, 0);
808 }
809
EventPtraceExit(pid_t pid,int event_msg)810 void PtraceMonitor::EventPtraceExit(pid_t pid, int event_msg) {
811 // Forget about any syscalls in progress for this PID.
812 syscalls_in_progress_.erase(pid);
813
814 // A regular exit, let it continue (fast-path).
815 if (ABSL_PREDICT_TRUE(WIFEXITED(event_msg) &&
816 (!policy_->collect_stacktrace_on_exit() ||
817 pid != process_.main_pid))) {
818 ContinueProcess(pid, 0);
819 return;
820 }
821
822 const bool is_seccomp =
823 WIFSIGNALED(event_msg) && WTERMSIG(event_msg) == SIGSYS;
824 const bool log_stack_trace =
825 absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
826 // Fetch the registers as we'll need them to fill the result in any case
827 auto regs = std::make_unique<Regs>(pid);
828 if (is_seccomp || pid == process_.main_pid || log_stack_trace) {
829 auto status = regs->Fetch();
830 if (!status.ok()) {
831 LOG(ERROR) << "failed to fetch regs: " << status;
832 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
833 return;
834 }
835 }
836
837 // Process signaled due to seccomp violation.
838 if (is_seccomp) {
839 VLOG(1) << "PID: " << pid << " violation uncovered via the EXIT_EVENT";
840 ActionProcessSyscallViolation(regs.get(),
841 regs->ToSyscall(Syscall::GetHostArch()),
842 ViolationType::kSyscall);
843 return;
844 }
845
846 // This can be reached in four cases:
847 // 1) Process was killed from the sandbox.
848 // 2) Process was killed because it hit a timeout.
849 // 3) Regular signal/other exit cause.
850 // 4) Normal exit for which we want to obtain stack trace.
851 if (pid == process_.main_pid) {
852 VLOG(1) << "PID: " << pid << " main special exit";
853 if (network_violation_) {
854 SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
855 result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
856 } else if (external_kill_) {
857 SetExitStatusCode(Result::EXTERNAL_KILL, 0);
858 } else if (timed_out_) {
859 SetExitStatusCode(Result::TIMEOUT, 0);
860 } else if (WIFEXITED(event_msg)) {
861 SetExitStatusCode(Result::OK, WEXITSTATUS(event_msg));
862 } else {
863 SetExitStatusCode(Result::SIGNALED, WTERMSIG(event_msg));
864 }
865 SetAdditionalResultInfo(std::move(regs));
866 } else if (log_stack_trace) {
867 // In case pid == pid_ the stack trace will be logged anyway. So we need
868 // to do explicit logging only when this is not a main PID.
869 if (StackTraceCollectionPossible()) {
870 if (auto stack_trace = GetAndLogStackTrace(regs.get());
871 !stack_trace.ok()) {
872 LOG(ERROR) << "Failed to get stack trace, PID:" << pid
873 << " status:" << stack_trace.status();
874 }
875 }
876 }
877 VLOG(1) << "Continuing";
878 ContinueProcess(pid, 0);
879 }
880
EventPtraceStop(pid_t pid,int stopsig)881 void PtraceMonitor::EventPtraceStop(pid_t pid, int stopsig) {
882 // It's not a real stop signal. For example PTRACE_O_TRACECLONE and similar
883 // flags to ptrace(PTRACE_SEIZE) might generate this event with SIGTRAP.
884 if (stopsig != SIGSTOP && stopsig != SIGTSTP && stopsig != SIGTTIN &&
885 stopsig != SIGTTOU) {
886 ContinueProcess(pid, 0);
887 return;
888 }
889 // It's our PID stop signal. Stop it.
890 VLOG(2) << "PID: " << pid << " stopped due to "
891 << util::GetSignalName(stopsig);
892 StopProcess(pid, 0);
893 }
894
StateProcessStopped(pid_t pid,int status)895 void PtraceMonitor::StateProcessStopped(pid_t pid, int status) {
896 int stopsig = WSTOPSIG(status);
897 // We use PTRACE_O_TRACESYSGOOD, so we can tell it's a syscall stop without
898 // calling PTRACE_GETSIGINFO by checking the value of the reported signal.
899 bool is_syscall_exit = stopsig == (SIGTRAP | 0x80);
900 if (__WPTRACEEVENT(status) == 0 && !is_syscall_exit) {
901 // Must be a regular signal delivery.
902 VLOG(2) << "PID: " << pid
903 << " received signal: " << util::GetSignalName(stopsig);
904 notify_->EventSignal(pid, stopsig);
905 ContinueProcess(pid, stopsig);
906 return;
907 }
908
909 unsigned long event_msg; // NOLINT
910 if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &event_msg) == -1) {
911 if (errno == ESRCH) {
912 // This happens from time to time, the kernel does not guarantee us that
913 // we get the event in time.
914 PLOG(INFO) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
915 return;
916 }
917 PLOG(ERROR) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
918 SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_GETEVENT);
919 return;
920 }
921
922 if (ABSL_PREDICT_FALSE(pid == process_.main_pid && should_dump_stack_ &&
923 executor_->libunwind_sbox_for_pid_ == 0 &&
924 policy_->GetNamespace())) {
925 auto stack_trace = [this,
926 pid]() -> absl::StatusOr<std::vector<std::string>> {
927 Regs regs(pid);
928 SAPI_RETURN_IF_ERROR(regs.Fetch());
929 return GetStackTrace(®s);
930 }();
931
932 if (!stack_trace.ok()) {
933 LOG(WARNING) << "FAILED TO GET SANDBOX STACK : " << stack_trace.status();
934 } else if (VLOG_IS_ON(0)) {
935 VLOG(0) << "SANDBOX STACK: PID: " << pid << ", [";
936 for (const auto& frame : *stack_trace) {
937 VLOG(0) << " " << frame;
938 }
939 VLOG(0) << "]";
940 }
941 should_dump_stack_ = false;
942 }
943
944 if (is_syscall_exit) {
945 VLOG(2) << "PID: " << pid << " syscall-exit-stop: " << event_msg;
946 EventSyscallExit(pid);
947 return;
948 }
949
950 switch (__WPTRACEEVENT(status)) {
951 case PTRACE_EVENT_FORK:
952 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_FORK, PID: " << event_msg;
953 EventPtraceNewProcess(pid, event_msg);
954 break;
955 case PTRACE_EVENT_VFORK:
956 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_VFORK, PID: " << event_msg;
957 EventPtraceNewProcess(pid, event_msg);
958 break;
959 case PTRACE_EVENT_CLONE:
960 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_CLONE, PID: " << event_msg;
961 EventPtraceNewProcess(pid, event_msg);
962 break;
963 case PTRACE_EVENT_VFORK_DONE:
964 ContinueProcess(pid, 0);
965 break;
966 case PTRACE_EVENT_EXEC:
967 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXEC, PID: " << event_msg;
968 EventPtraceExec(pid, event_msg);
969 break;
970 case PTRACE_EVENT_EXIT:
971 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXIT: " << event_msg;
972 EventPtraceExit(pid, event_msg);
973 break;
974 case PTRACE_EVENT_STOP:
975 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_STOP: " << event_msg;
976 EventPtraceStop(pid, stopsig);
977 break;
978 case PTRACE_EVENT_SECCOMP:
979 VLOG(2) << "PID: " << pid << " PTRACE_EVENT_SECCOMP: " << event_msg;
980 EventPtraceSeccomp(pid, event_msg);
981 break;
982 default:
983 LOG(ERROR) << "Unknown ptrace event: " << __WPTRACEEVENT(status)
984 << " with data: " << event_msg;
985 break;
986 }
987 }
988
989 } // namespace sandbox2
990