1 // Copyright 2023 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Implementation file for the sandbox2::PtraceMonitor class.
16 
17 #include "sandboxed_api/sandbox2/monitor_ptrace.h"
18 
19 #include <sys/ptrace.h>
20 #include <sys/resource.h>
21 #include <sys/wait.h>
22 #include <syscall.h>
23 #include <unistd.h>
24 
25 #include <algorithm>
26 #include <atomic>
27 #include <cerrno>
28 #include <cstdint>
29 #include <ctime>
30 #include <fstream>
31 #include <ios>
32 #include <memory>
33 #include <sstream>
34 #include <string>
35 #include <utility>
36 #include <vector>
37 
38 #include "absl/base/optimization.h"
39 #include "absl/cleanup/cleanup.h"
40 #include "absl/container/flat_hash_map.h"
41 #include "absl/container/flat_hash_set.h"
42 #include "absl/flags/declare.h"
43 #include "absl/flags/flag.h"
44 #include "absl/log/check.h"
45 #include "absl/log/log.h"
46 #include "absl/log/vlog_is_on.h"
47 #include "absl/status/status.h"
48 #include "absl/status/statusor.h"
49 #include "absl/strings/str_cat.h"
50 #include "absl/strings/str_format.h"
51 #include "absl/strings/string_view.h"
52 #include "absl/synchronization/mutex.h"
53 #include "absl/synchronization/notification.h"
54 #include "absl/time/clock.h"
55 #include "absl/time/time.h"
56 #include "sandboxed_api/config.h"
57 #include "sandboxed_api/sandbox2/client.h"
58 #include "sandboxed_api/sandbox2/comms.h"
59 #include "sandboxed_api/sandbox2/executor.h"
60 #include "sandboxed_api/sandbox2/notify.h"
61 #include "sandboxed_api/sandbox2/policy.h"
62 #include "sandboxed_api/sandbox2/regs.h"
63 #include "sandboxed_api/sandbox2/result.h"
64 #include "sandboxed_api/sandbox2/sanitizer.h"
65 #include "sandboxed_api/sandbox2/syscall.h"
66 #include "sandboxed_api/sandbox2/util.h"
67 #include "sandboxed_api/sandbox2/util/pid_waiter.h"
68 #include "sandboxed_api/util/status_macros.h"
69 #include "sandboxed_api/util/thread.h"
70 
71 ABSL_FLAG(bool, sandbox2_log_all_stack_traces, false,
72           "If set, sandbox2 monitor will log stack traces of all monitored "
73           "threads/processes that are reported to terminate with a signal.");
74 
75 ABSL_FLAG(bool, sandbox2_monitor_ptrace_use_deadline_manager, false,
76           "If set, ptrace monitor will use deadline manager to enforce "
77           "deadlines and as notification mechanism");
78 
79 ABSL_FLAG(bool, sandbox2_log_unobtainable_stack_traces_errors, true,
80           "If set, unobtainable stack trace will be logged as errors.");
81 
82 ABSL_FLAG(absl::Duration, sandbox2_stack_traces_collection_timeout,
83           absl::Seconds(1),
84           "How much time should be spent on logging threads' stack traces on "
85           "monitor shut down. Only relevent when collection of all stack "
86           "traces is enabled.");
87 
88 ABSL_DECLARE_FLAG(bool, sandbox2_danger_danger_permit_all);
89 
90 namespace sandbox2 {
91 namespace {
92 
93 // We could use the ProcMapsIterator, however we want the full file content.
ReadProcMaps(pid_t pid)94 std::string ReadProcMaps(pid_t pid) {
95   std::ifstream input(absl::StrCat("/proc/", pid, "/maps"),
96                       std::ios_base::in | std::ios_base::binary);
97   std::ostringstream contents;
98   contents << input.rdbuf();
99   return contents.str();
100 }
101 
ContinueProcess(pid_t pid,int signo)102 void ContinueProcess(pid_t pid, int signo) {
103   if (ptrace(PTRACE_CONT, pid, 0, signo) == -1) {
104     if (errno == ESRCH) {
105       LOG(WARNING) << "Process " << pid
106                    << " died while trying to PTRACE_CONT it";
107     } else {
108       PLOG(ERROR) << "ptrace(PTRACE_CONT, pid=" << pid << ", sig=" << signo
109                   << ")";
110     }
111   }
112 }
113 
StopProcess(pid_t pid,int signo)114 void StopProcess(pid_t pid, int signo) {
115   if (ptrace(PTRACE_LISTEN, pid, 0, signo) == -1) {
116     if (errno == ESRCH) {
117       LOG(WARNING) << "Process " << pid
118                    << " died while trying to PTRACE_LISTEN it";
119     } else {
120       PLOG(ERROR) << "ptrace(PTRACE_LISTEN, pid=" << pid << ", sig=" << signo
121                   << ")";
122     }
123   }
124 }
125 
CompleteSyscall(pid_t pid,int signo)126 void CompleteSyscall(pid_t pid, int signo) {
127   if (ptrace(PTRACE_SYSCALL, pid, 0, signo) == -1) {
128     if (errno == ESRCH) {
129       LOG(WARNING) << "Process " << pid
130                    << " died while trying to PTRACE_SYSCALL it";
131     } else {
132       PLOG(ERROR) << "ptrace(PTRACE_SYSCALL, pid=" << pid << ", sig=" << signo
133                   << ")";
134     }
135   }
136 }
137 
138 }  // namespace
139 
PtraceMonitor(Executor * executor,Policy * policy,Notify * notify)140 PtraceMonitor::PtraceMonitor(Executor* executor, Policy* policy, Notify* notify)
141     : MonitorBase(executor, policy, notify),
142       wait_for_execve_(executor->enable_sandboxing_pre_execve_) {
143   if (executor_->limits()->wall_time_limit() != absl::ZeroDuration()) {
144     auto deadline = absl::Now() + executor_->limits()->wall_time_limit();
145     deadline_millis_.store(absl::ToUnixMillis(deadline),
146                            std::memory_order_relaxed);
147   }
148   external_kill_request_flag_.test_and_set(std::memory_order_relaxed);
149   dump_stack_request_flag_.test_and_set(std::memory_order_relaxed);
150   use_deadline_manager_ =
151       absl::GetFlag(FLAGS_sandbox2_monitor_ptrace_use_deadline_manager);
152 }
153 
IsActivelyMonitoring()154 bool PtraceMonitor::IsActivelyMonitoring() {
155   // If we're still waiting for execve(), then we allow all syscalls.
156   return !wait_for_execve_;
157 }
158 
SetActivelyMonitoring()159 void PtraceMonitor::SetActivelyMonitoring() { wait_for_execve_ = false; }
160 
SetAdditionalResultInfo(std::unique_ptr<Regs> regs)161 void PtraceMonitor::SetAdditionalResultInfo(std::unique_ptr<Regs> regs) {
162   pid_t pid = regs->pid();
163   result_.SetRegs(std::move(regs));
164   result_.SetProgName(util::GetProgName(pid));
165   result_.SetProcMaps(ReadProcMaps(pid));
166   if (!ShouldCollectStackTrace(result_.final_status())) {
167     VLOG(1) << "Stack traces have been disabled";
168     return;
169   }
170 
171   absl::StatusOr<std::vector<std::string>> stack_trace =
172       GetAndLogStackTrace(result_.GetRegs());
173   if (!stack_trace.ok()) {
174     LOG_IF(ERROR,
175            absl::GetFlag(FLAGS_sandbox2_log_unobtainable_stack_traces_errors))
176         << "Could not obtain stack trace: " << stack_trace.status();
177     return;
178   }
179   result_.set_stack_trace(*stack_trace);
180 }
181 
KillSandboxee()182 bool PtraceMonitor::KillSandboxee() {
183   VLOG(1) << "Sending SIGKILL to the PID: " << process_.main_pid;
184   if (kill(process_.main_pid, SIGKILL) != 0) {
185     PLOG(ERROR) << "Could not send SIGKILL to PID " << process_.main_pid;
186     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_KILL);
187     return false;
188   }
189   constexpr absl::Duration kGracefullKillTimeout = absl::Milliseconds(1000);
190   if (hard_deadline_ == absl::InfiniteFuture()) {
191     hard_deadline_ = absl::Now() + kGracefullKillTimeout;
192   }
193   return true;
194 }
195 
InterruptSandboxee()196 bool PtraceMonitor::InterruptSandboxee() {
197   if (ptrace(PTRACE_INTERRUPT, process_.main_pid, 0, 0) == -1) {
198     PLOG(ERROR) << "Could not send interrupt to pid=" << process_.main_pid;
199     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INTERRUPT);
200     return false;
201   }
202   return true;
203 }
204 
205 // Not defined in glibc.
206 #define __WPTRACEEVENT(x) ((x & 0xff0000) >> 16)
207 
NotifyMonitor()208 void PtraceMonitor::NotifyMonitor() {
209   if (use_deadline_manager_) {
210     pid_waiter_.Notify();
211   } else {
212     absl::MutexLock lock(&thread_mutex_);
213     if (thread_.IsJoinable()) {
214       pthread_kill(thread_.handle(), SIGCHLD);
215     }
216   }
217 }
218 
Join()219 void PtraceMonitor::Join() {
220   absl::MutexLock lock(&thread_mutex_);
221   if (thread_.IsJoinable()) {
222     thread_.Join();
223     CHECK(IsDone()) << "Monitor did not terminate";
224     VLOG(1) << "Final execution status: " << result_.ToString();
225     CHECK(result_.final_status() != Result::UNSET);
226   }
227 }
228 
RunInternal()229 void PtraceMonitor::RunInternal() {
230   {
231     absl::MutexLock lock(&thread_mutex_);
232     thread_ = sapi::Thread(this, &PtraceMonitor::Run, "sandbox2-Monitor");
233   }
234 
235   // Wait for the Monitor to set-up the sandboxee correctly (or fail while
236   // doing that). From here on, it is safe to use the IPC object for
237   // non-sandbox-related data exchange.
238   setup_notification_.WaitForNotification();
239 }
240 
Run()241 void PtraceMonitor::Run() {
242   absl::Cleanup monitor_done = [this] {
243     getrusage(RUSAGE_THREAD, result_.GetRUsageMonitor());
244     OnDone();
245   };
246 
247   absl::Cleanup setup_notify = [this] { setup_notification_.Notify(); };
248   // It'd be costly to initialize the sigset_t for each sigtimedwait()
249   // invocation, so do it once per Monitor.
250   if (!use_deadline_manager_ && !InitSetupSignals()) {
251     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_SIGNALS);
252     return;
253   }
254   // This call should be the last in the init sequence, because it can cause the
255   // sandboxee to enter ptrace-stopped state, in which it will not be able to
256   // send any messages over the Comms channel.
257   if (!InitPtraceAttach()) {
258     SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_PTRACE);
259     return;
260   }
261 
262   // Tell the parent thread (Sandbox2 object) that we're done with the initial
263   // set-up process of the sandboxee.
264   std::move(setup_notify).Invoke();
265 
266   bool sandboxee_exited = false;
267   pid_waiter_.SetPriorityPid(process_.main_pid);
268   int status;
269   // All possible still running children of main process, will be killed due to
270   // PTRACE_O_EXITKILL ptrace() flag.
271   while (result().final_status() == Result::UNSET) {
272     if (absl::Now() >= hard_deadline_) {
273       LOG(WARNING) << "Hard deadline exceeded (timed_out=" << timed_out_
274                    << ", external_kill=" << external_kill_
275                    << ", network_violation=" << network_violation_ << ").";
276       SetExitStatusCode(Result::TIMEOUT, 0);
277       break;
278     }
279     int64_t deadline = deadline_millis_.load(std::memory_order_relaxed);
280     if (deadline != 0 && absl::Now() >= absl::FromUnixMillis(deadline)) {
281       VLOG(1) << "Sandbox process hit timeout due to the walltime timer";
282       timed_out_ = true;
283       if (!KillSandboxee()) {
284         break;
285       }
286     }
287 
288     if (!dump_stack_request_flag_.test_and_set(std::memory_order_relaxed)) {
289       should_dump_stack_ = true;
290       if (!InterruptSandboxee()) {
291         break;
292       }
293     }
294 
295     if (!external_kill_request_flag_.test_and_set(std::memory_order_relaxed)) {
296       external_kill_ = true;
297       if (!KillSandboxee()) {
298         break;
299       }
300     }
301 
302     if (network_proxy_server_ &&
303         network_proxy_server_->violation_occurred_.load(
304             std::memory_order_acquire) &&
305         !network_violation_) {
306       network_violation_ = true;
307       if (!KillSandboxee()) {
308         break;
309       }
310     }
311     if (use_deadline_manager_) {
312       absl::Time effective_deadline = hard_deadline_;
313       if (deadline != 0 && hard_deadline_ == absl::InfiniteFuture()) {
314         effective_deadline = absl::FromUnixMillis(deadline);
315       }
316       pid_waiter_.SetDeadline(effective_deadline);
317     }
318     pid_t ret = pid_waiter_.Wait(&status);
319     if (ret == 0) {
320       if (!use_deadline_manager_) {
321         constexpr timespec ts = {kWakeUpPeriodSec, kWakeUpPeriodNSec};
322         int signo = sigtimedwait(&sset_, nullptr, &ts);
323         LOG_IF(ERROR, signo != -1 && signo != SIGCHLD)
324             << "Unknown signal received: " << signo;
325       }
326       continue;
327     }
328 
329     if (ret == -1) {
330       if (errno == ECHILD) {
331         LOG(ERROR) << "PANIC(). The main process has not exited yet, "
332                    << "yet we haven't seen its exit event";
333         SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_CHILD);
334       } else if (!use_deadline_manager_ || errno != EINTR) {
335         PLOG(ERROR) << "waitpid() failed";
336       }
337       continue;
338     }
339 
340     VLOG(3) << "waitpid() returned with PID: " << ret << ", status: " << status;
341 
342     if (WIFEXITED(status)) {
343       VLOG(1) << "PID: " << ret
344               << " finished with code: " << WEXITSTATUS(status);
345       // That's the main process, set the exit code, and exit. It will kill
346       // all remaining processes (if there are any) because of the
347       // PTRACE_O_EXITKILL ptrace() flag.
348       if (ret == process_.main_pid) {
349         if (IsActivelyMonitoring()) {
350           SetExitStatusCode(Result::OK, WEXITSTATUS(status));
351         } else {
352           SetExitStatusCode(Result::SETUP_ERROR, Result::FAILED_MONITOR);
353         }
354         sandboxee_exited = true;
355       }
356     } else if (WIFSIGNALED(status)) {
357       //  This usually does not happen, but might.
358       //  Quote from the manual:
359       //   A SIGKILL signal may still cause a PTRACE_EVENT_EXIT stop before
360       //   actual signal death.  This may be changed in the future;
361       VLOG(1) << "PID: " << ret << " terminated with signal: "
362               << util::GetSignalName(WTERMSIG(status));
363       if (ret == process_.main_pid) {
364         if (network_violation_) {
365           SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
366           result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
367         } else if (external_kill_) {
368           SetExitStatusCode(Result::EXTERNAL_KILL, 0);
369         } else if (timed_out_) {
370           SetExitStatusCode(Result::TIMEOUT, 0);
371         } else {
372           SetExitStatusCode(Result::SIGNALED, WTERMSIG(status));
373         }
374         sandboxee_exited = true;
375       }
376     } else if (WIFSTOPPED(status)) {
377       VLOG(2) << "PID: " << ret
378               << " received signal: " << util::GetSignalName(WSTOPSIG(status))
379               << " with event: "
380               << util::GetPtraceEventName(__WPTRACEEVENT(status));
381       StateProcessStopped(ret, status);
382     } else if (WIFCONTINUED(status)) {
383       VLOG(2) << "PID: " << ret << " is being continued";
384     }
385   }
386 
387   if (!sandboxee_exited) {
388     const bool log_stack_traces =
389         result_.final_status() != Result::OK &&
390         absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
391     constexpr auto kGracefullExitTimeout = absl::Milliseconds(200);
392     auto deadline = absl::Now() + kGracefullExitTimeout;
393     if (log_stack_traces) {
394       deadline = absl::Now() +
395                  absl::GetFlag(FLAGS_sandbox2_stack_traces_collection_timeout);
396     }
397     for (;;) {
398       auto left = deadline - absl::Now();
399       if (absl::Now() >= deadline) {
400         LOG(WARNING)
401             << "Waiting for sandboxee exit timed out. Sandboxee result: "
402             << result_.ToString();
403         break;
404       }
405       if (use_deadline_manager_) {
406         pid_waiter_.SetDeadline(deadline);
407       }
408       pid_t ret = pid_waiter_.Wait(&status);
409       if (ret == -1) {
410         if (use_deadline_manager_ && errno == EINTR) {
411           continue;
412         }
413         if (!log_stack_traces || ret != ECHILD) {
414           PLOG(ERROR) << "waitpid() failed";
415         }
416         break;
417       }
418       if (!log_stack_traces) {
419         if (ret == process_.main_pid &&
420             (WIFSIGNALED(status) || WIFEXITED(status))) {
421           break;
422         }
423         kill(process_.main_pid, SIGKILL);
424       }
425 
426       if (ret == 0) {
427         if (!use_deadline_manager_) {
428           auto ts = absl::ToTimespec(left);
429           sigtimedwait(&sset_, nullptr, &ts);
430         }
431         continue;
432       }
433 
434       if (WIFSTOPPED(status)) {
435         if (log_stack_traces) {
436           LogStackTraceOfPid(ret);
437         }
438 
439         if (__WPTRACEEVENT(status) == PTRACE_EVENT_EXIT) {
440           VLOG(2) << "PID: " << ret << " PTRACE_EVENT_EXIT ";
441           ContinueProcess(ret, 0);
442           continue;
443         }
444       }
445     }
446   }
447 }
448 
LogStackTraceOfPid(pid_t pid)449 void PtraceMonitor::LogStackTraceOfPid(pid_t pid) {
450   if (!StackTraceCollectionPossible()) {
451     return;
452   }
453 
454   Regs regs(pid);
455   if (auto status = regs.Fetch(); !status.ok()) {
456     LOG(ERROR) << "Failed to get regs, PID:" << pid << " status:" << status;
457     return;
458   }
459 
460   if (auto stack_trace = GetAndLogStackTrace(®s); !stack_trace.ok()) {
461     LOG(ERROR) << "Failed to get stack trace, PID:" << pid
462                << " status:" << stack_trace.status();
463   }
464 }
465 
InitSetupSignals()466 bool PtraceMonitor::InitSetupSignals() {
467   if (sigemptyset(&sset_) == -1) {
468     PLOG(ERROR) << "sigemptyset()";
469     return false;
470   }
471 
472   // sigtimedwait will react (wake-up) to arrival of this signal.
473   if (sigaddset(&sset_, SIGCHLD) == -1) {
474     PLOG(ERROR) << "sigaddset(SIGCHLD)";
475     return false;
476   }
477 
478   if (pthread_sigmask(SIG_BLOCK, &sset_, nullptr) == -1) {
479     PLOG(ERROR) << "pthread_sigmask(SIG_BLOCK, SIGCHLD)";
480     return false;
481   }
482 
483   return true;
484 }
485 
TryAttach(const absl::flat_hash_set<int> & tasks,absl::Time deadline,absl::flat_hash_set<int> & tasks_attached)486 absl::Status TryAttach(const absl::flat_hash_set<int>& tasks,
487                        absl::Time deadline,
488                        absl::flat_hash_set<int>& tasks_attached) {
489   constexpr intptr_t kPtraceOptions =
490       PTRACE_O_TRACESYSGOOD | PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK |
491       PTRACE_O_TRACEVFORKDONE | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC |
492       PTRACE_O_TRACEEXIT | PTRACE_O_TRACESECCOMP | PTRACE_O_EXITKILL;
493   auto format_ptrace_error = [](int task, absl::string_view message) {
494     return absl::StrCat("ptrace(PTRACE_SEIZE, ", task, ", 0, ", "0x",
495                         absl::Hex(kPtraceOptions), "): ", message);
496   };
497 
498   absl::flat_hash_set<int> cur_tasks = tasks;
499   int retries = 0;
500 
501   // In some situations we allow ptrace to try again when it fails.
502   while (!cur_tasks.empty()) {
503     absl::flat_hash_set<int> retry_tasks;
504     for (int task : cur_tasks) {
505       if (tasks_attached.contains(task)) {
506         continue;
507       }
508       int ret = ptrace(PTRACE_SEIZE, task, 0, kPtraceOptions);
509       if (ret != 0) {
510         if (errno == EPERM) {
511           // Sometimes when a task is exiting we can get an EPERM from ptrace.
512           // Let's try again up until the timeout in this situation.
513           PLOG(WARNING) << format_ptrace_error(task, "Retrying after EPERM");
514           retry_tasks.insert(task);
515           continue;
516         }
517         if (errno == ESRCH) {
518           // A task may have exited since we captured the task list, we will
519           // allow things to continue after we log a warning.
520           PLOG(WARNING) << format_ptrace_error(
521               task, "Skipping exited task. Continuing with other tasks.");
522           continue;
523         }
524         // Any other errno will be considered a failure.
525         return absl::ErrnoToStatus(errno, format_ptrace_error(task, "Failure"));
526       }
527       tasks_attached.insert(task);
528     }
529     if (!retry_tasks.empty()) {
530       if (absl::Now() >= deadline) {
531         return absl::DeadlineExceededError(absl::StrCat(
532             "Attaching to sandboxee timed out: could not attach to ",
533             cur_tasks.size(), " tasks"));
534       }
535       // Exponential Backoff.
536       constexpr absl::Duration kInitialRetry = absl::Milliseconds(1);
537       constexpr absl::Duration kMaxRetry = absl::Milliseconds(20);
538       const absl::Duration retry_interval =
539           kInitialRetry * (1 << std::min(10, retries++));
540       absl::SleepFor(
541           std::min({retry_interval, kMaxRetry, deadline - absl::Now()}));
542     }
543     cur_tasks = std::move(retry_tasks);
544   }
545 
546   return absl::OkStatus();
547 }
548 
InitPtraceAttach()549 bool PtraceMonitor::InitPtraceAttach() {
550   if (process_.init_pid > 0) {
551     if (ptrace(PTRACE_SEIZE, process_.init_pid, 0, PTRACE_O_EXITKILL) != 0) {
552       if (errno != ESRCH) {
553         PLOG(ERROR) << "attaching to init process failed";
554       }
555       return false;
556     }
557   }
558 
559   // Get a list of tasks.
560   absl::StatusOr<absl::flat_hash_set<int>> tasks =
561       sanitizer::GetListOfTasks(process_.main_pid);
562   if (!tasks.ok()) {
563     LOG(ERROR) << "Could not get list of tasks: " << tasks.status().message();
564     return false;
565   }
566 
567   if (!tasks->contains(process_.main_pid)) {
568     LOG(ERROR) << "The pid " << process_.main_pid
569                << " was not found in its own tasklist.";
570     return false;
571   }
572 
573   // With TSYNC, we can allow threads: seccomp applies to all threads.
574   if (tasks->size() > 1) {
575     LOG(WARNING) << "PID " << process_.main_pid << " has " << tasks->size()
576                  << " threads, at the time of call to SandboxMeHere(). If you "
577                     "are seeing more sandbox violations than expected, this "
578                     "might be the reason why"
579                  << ".";
580   }
581 
582   absl::flat_hash_set<int> tasks_attached;
583   absl::Time deadline = absl::Now() + absl::Seconds(4);
584 
585   constexpr int kMaxRetries = 3;
586   for (int retries = 0; retries < kMaxRetries && *tasks != tasks_attached;
587        ++retries) {
588     if (retries > 0) {
589       LOG(ERROR) << "PID " << process_.main_pid
590                  << " spawned new threads while we were trying to attach to it "
591                     "(attempt "
592                  << retries << "/" << kMaxRetries << ")";
593     }
594     if (absl::Status status = TryAttach(*tasks, deadline, tasks_attached);
595         !status.ok()) {
596       LOG(ERROR) << status.message();
597       return false;
598     }
599 
600     // Get a list of tasks after attaching.
601     tasks = sanitizer::GetListOfTasks(process_.main_pid);
602     if (!tasks.ok()) {
603       LOG(ERROR) << "Could not get list of tasks: " << tasks.status().message();
604       return false;
605     }
606   }
607   if (*tasks != tasks_attached) {
608     LOG(ERROR) << "PID " << process_.main_pid
609                << " spawned new threads while we were trying to attach to it "
610                   "(retries exhausted)";
611     return false;
612   }
613 
614   // No glibc wrapper for gettid - see 'man gettid'.
615   VLOG(1) << "Monitor (PID: " << getpid()
616           << ", TID: " << util::Syscall(__NR_gettid)
617           << ") attached to PID: " << process_.main_pid;
618 
619   // Technically, the sandboxee can be in a ptrace-stopped state right now,
620   // because some signal could have arrived in the meantime. Yet, this
621   // Comms::SendUint32 call shouldn't lock our process, because the underlying
622   // socketpair() channel is buffered, hence it will accept the uint32_t message
623   // no matter what is the current state of the sandboxee, and it will allow for
624   // our process to continue and unlock the sandboxee with the proper ptrace
625   // event handling.
626   if (!comms_->SendUint32(Client::kSandbox2ClientDone)) {
627     LOG(ERROR) << "Couldn't send Client::kSandbox2ClientDone message";
628     return false;
629   }
630   return true;
631 }
632 
ActionProcessSyscall(Regs * regs,const Syscall & syscall)633 void PtraceMonitor::ActionProcessSyscall(Regs* regs, const Syscall& syscall) {
634   // If the sandboxing is not enabled yet, allow the first __NR_execveat.
635   if (syscall.nr() == __NR_execveat && !IsActivelyMonitoring()) {
636     VLOG(1) << "[PERMITTED/BEFORE_EXECVEAT]: " << "SYSCALL ::: PID: "
637             << regs->pid() << ", PROG: '" << util::GetProgName(regs->pid())
638             << "' : " << syscall.GetDescription();
639     ContinueProcess(regs->pid(), 0);
640     return;
641   }
642 
643   // Notify can decide whether we want to allow this syscall. It could be useful
644   // for sandbox setups in which some syscalls might still need some logging,
645   // but nonetheless be allowed ('permissible syscalls' in sandbox v1).
646   auto trace_response = notify_->EventSyscallTrace(syscall);
647   if (trace_response == Notify::TraceAction::kAllow) {
648     ContinueProcess(regs->pid(), 0);
649     return;
650   }
651   if (trace_response == Notify::TraceAction::kInspectAfterReturn) {
652     // Note that a process might die without an exit-stop before the syscall is
653     // completed (eg. a thread calls execve() and the thread group leader dies),
654     // so the entry is removed when the process exits.
655     syscalls_in_progress_[regs->pid()] = syscall;
656     CompleteSyscall(regs->pid(), 0);
657     return;
658   }
659 
660   if (absl::GetFlag(FLAGS_sandbox2_danger_danger_permit_all) || log_file_) {
661     std::string syscall_description = syscall.GetDescription();
662     if (log_file_) {
663       PCHECK(absl::FPrintF(log_file_, "PID: %d %s\n", regs->pid(),
664                            syscall_description) >= 0);
665     }
666     VLOG(1) << "PID: " << regs->pid() << " " << syscall_description;
667     ContinueProcess(regs->pid(), 0);
668     return;
669   }
670 
671   ActionProcessSyscallViolation(regs, syscall, ViolationType::kSyscall);
672 }
673 
ActionProcessSyscallViolation(Regs * regs,const Syscall & syscall,ViolationType violation_type)674 void PtraceMonitor::ActionProcessSyscallViolation(
675     Regs* regs, const Syscall& syscall, ViolationType violation_type) {
676   LogSyscallViolation(syscall);
677   notify_->EventSyscallViolation(syscall, violation_type);
678   SetExitStatusCode(Result::VIOLATION, syscall.nr());
679   result_.SetSyscall(std::make_unique<Syscall>(syscall));
680   SetAdditionalResultInfo(std::make_unique<Regs>(*regs));
681   // Rewrite the syscall argument to something invalid (-1).
682   // The process will be killed anyway so this is just a precaution.
683   auto status = regs->SkipSyscallReturnValue(-ENOSYS);
684   if (!status.ok()) {
685     LOG(ERROR) << status;
686   }
687 }
688 
EventPtraceSeccomp(pid_t pid,int event_msg)689 void PtraceMonitor::EventPtraceSeccomp(pid_t pid, int event_msg) {
690   if (event_msg < sapi::cpu::Architecture::kUnknown ||
691       event_msg > sapi::cpu::Architecture::kMax) {
692     // We've observed that, if the process has exited, the event_msg may contain
693     // the exit status even though we haven't received the exit event yet.
694     // To work around this, if the event msg is not in the range of the known
695     // architectures, we assume that it's an exit status. We deal with it by
696     // ignoring this event, and we'll get the exit event in the next iteration.
697     LOG(WARNING) << "received event_msg for unknown architecture: " << event_msg
698                  << "; the program may have exited";
699     return;
700   }
701 
702   // If the seccomp-policy is using RET_TRACE, we request that it returns the
703   // syscall architecture identifier in the SECCOMP_RET_DATA.
704   const auto syscall_arch = static_cast<sapi::cpu::Architecture>(event_msg);
705   Regs regs(pid);
706   auto status = regs.Fetch();
707   if (!status.ok()) {
708     // Ignore if process is killed in the meanwhile
709     if (absl::IsNotFound(status)) {
710       LOG(WARNING) << "failed to fetch regs: " << status;
711       return;
712     }
713     LOG(ERROR) << "failed to fetch regs: " << status;
714     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
715     return;
716   }
717 
718   Syscall syscall = regs.ToSyscall(syscall_arch);
719   // If the architecture of the syscall used is different that the current host
720   // architecture, report a violation.
721   if (syscall_arch != Syscall::GetHostArch()) {
722     ActionProcessSyscallViolation(®s, syscall,
723                                   ViolationType::kArchitectureSwitch);
724     return;
725   }
726 
727   ActionProcessSyscall(®s, syscall);
728 }
729 
EventSyscallExit(pid_t pid)730 void PtraceMonitor::EventSyscallExit(pid_t pid) {
731   // Check that the monitor wants to inspect the current syscall's return value.
732   auto index = syscalls_in_progress_.find(pid);
733   if (index == syscalls_in_progress_.end()) {
734     LOG(ERROR) << "Expected a syscall in progress in PID " << pid;
735     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
736     return;
737   }
738   Regs regs(pid);
739   auto status = regs.Fetch();
740   if (!status.ok()) {
741     // Ignore if process is killed in the meanwhile
742     if (absl::IsNotFound(status)) {
743       LOG(WARNING) << "failed to fetch regs: " << status;
744       return;
745     }
746     LOG(ERROR) << "failed to fetch regs: " << status;
747     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
748     return;
749   }
750   int64_t return_value = regs.GetReturnValue(sapi::host_cpu::Architecture());
751   notify_->EventSyscallReturn(index->second, return_value);
752   syscalls_in_progress_.erase(index);
753   ContinueProcess(pid, 0);
754 }
755 
EventPtraceNewProcess(pid_t pid,int event_msg)756 void PtraceMonitor::EventPtraceNewProcess(pid_t pid, int event_msg) {
757   // ptrace doesn't issue syscall-exit-stops for successful fork/vfork/clone
758   // system calls. Check if the monitor wanted to inspect the syscall's return
759   // value, and call EventSyscallReturn for the parent process if so.
760   auto index = syscalls_in_progress_.find(pid);
761   if (index != syscalls_in_progress_.end()) {
762     auto syscall_nr = index->second.nr();
763     bool creating_new_process = syscall_nr == __NR_clone;
764 #ifdef __NR_clone3
765     creating_new_process = creating_new_process || syscall_nr == __NR_clone3;
766 #endif
767 #ifdef __NR_fork
768     creating_new_process = creating_new_process || syscall_nr == __NR_fork;
769 #endif
770 #ifdef __NR_vfork
771     creating_new_process = creating_new_process || syscall_nr == __NR_vfork;
772 #endif
773     if (!creating_new_process) {
774       LOG(ERROR) << "Expected a fork/vfork/clone syscall in progress in PID "
775                  << pid << "; actual: " << index->second.GetDescription();
776       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
777       return;
778     }
779     notify_->EventSyscallReturn(index->second, event_msg);
780     syscalls_in_progress_.erase(index);
781   }
782   ContinueProcess(pid, 0);
783 }
784 
EventPtraceExec(pid_t pid,int event_msg)785 void PtraceMonitor::EventPtraceExec(pid_t pid, int event_msg) {
786   if (!IsActivelyMonitoring()) {
787     VLOG(1) << "PTRACE_EVENT_EXEC seen from PID: " << event_msg
788             << ". SANDBOX ENABLED!";
789     SetActivelyMonitoring();
790   } else {
791     // ptrace doesn't issue syscall-exit-stops for successful execve/execveat
792     // system calls. Check if the monitor wanted to inspect the syscall's return
793     // value, and call EventSyscallReturn if so.
794     auto index = syscalls_in_progress_.find(pid);
795     if (index != syscalls_in_progress_.end()) {
796       auto syscall_nr = index->second.nr();
797       if (syscall_nr != __NR_execve && syscall_nr != __NR_execveat) {
798         LOG(ERROR) << "Expected an execve/execveat syscall in progress in PID "
799                    << pid << "; actual: " << index->second.GetDescription();
800         SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_INSPECT);
801         return;
802       }
803       notify_->EventSyscallReturn(index->second, 0);
804       syscalls_in_progress_.erase(index);
805     }
806   }
807   ContinueProcess(pid, 0);
808 }
809 
EventPtraceExit(pid_t pid,int event_msg)810 void PtraceMonitor::EventPtraceExit(pid_t pid, int event_msg) {
811   // Forget about any syscalls in progress for this PID.
812   syscalls_in_progress_.erase(pid);
813 
814   // A regular exit, let it continue (fast-path).
815   if (ABSL_PREDICT_TRUE(WIFEXITED(event_msg) &&
816                         (!policy_->collect_stacktrace_on_exit() ||
817                          pid != process_.main_pid))) {
818     ContinueProcess(pid, 0);
819     return;
820   }
821 
822   const bool is_seccomp =
823       WIFSIGNALED(event_msg) && WTERMSIG(event_msg) == SIGSYS;
824   const bool log_stack_trace =
825       absl::GetFlag(FLAGS_sandbox2_log_all_stack_traces);
826   // Fetch the registers as we'll need them to fill the result in any case
827   auto regs = std::make_unique<Regs>(pid);
828   if (is_seccomp || pid == process_.main_pid || log_stack_trace) {
829     auto status = regs->Fetch();
830     if (!status.ok()) {
831       LOG(ERROR) << "failed to fetch regs: " << status;
832       SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_FETCH);
833       return;
834     }
835   }
836 
837   // Process signaled due to seccomp violation.
838   if (is_seccomp) {
839     VLOG(1) << "PID: " << pid << " violation uncovered via the EXIT_EVENT";
840     ActionProcessSyscallViolation(regs.get(),
841                                   regs->ToSyscall(Syscall::GetHostArch()),
842                                   ViolationType::kSyscall);
843     return;
844   }
845 
846   // This can be reached in four cases:
847   // 1) Process was killed from the sandbox.
848   // 2) Process was killed because it hit a timeout.
849   // 3) Regular signal/other exit cause.
850   // 4) Normal exit for which we want to obtain stack trace.
851   if (pid == process_.main_pid) {
852     VLOG(1) << "PID: " << pid << " main special exit";
853     if (network_violation_) {
854       SetExitStatusCode(Result::VIOLATION, Result::VIOLATION_NETWORK);
855       result_.SetNetworkViolation(network_proxy_server_->violation_msg_);
856     } else if (external_kill_) {
857       SetExitStatusCode(Result::EXTERNAL_KILL, 0);
858     } else if (timed_out_) {
859       SetExitStatusCode(Result::TIMEOUT, 0);
860     } else if (WIFEXITED(event_msg)) {
861       SetExitStatusCode(Result::OK, WEXITSTATUS(event_msg));
862     } else {
863       SetExitStatusCode(Result::SIGNALED, WTERMSIG(event_msg));
864     }
865     SetAdditionalResultInfo(std::move(regs));
866   } else if (log_stack_trace) {
867     // In case pid == pid_ the stack trace will be logged anyway. So we need
868     // to do explicit logging only when this is not a main PID.
869     if (StackTraceCollectionPossible()) {
870       if (auto stack_trace = GetAndLogStackTrace(regs.get());
871           !stack_trace.ok()) {
872         LOG(ERROR) << "Failed to get stack trace, PID:" << pid
873                    << " status:" << stack_trace.status();
874       }
875     }
876   }
877   VLOG(1) << "Continuing";
878   ContinueProcess(pid, 0);
879 }
880 
EventPtraceStop(pid_t pid,int stopsig)881 void PtraceMonitor::EventPtraceStop(pid_t pid, int stopsig) {
882   // It's not a real stop signal. For example PTRACE_O_TRACECLONE and similar
883   // flags to ptrace(PTRACE_SEIZE) might generate this event with SIGTRAP.
884   if (stopsig != SIGSTOP && stopsig != SIGTSTP && stopsig != SIGTTIN &&
885       stopsig != SIGTTOU) {
886     ContinueProcess(pid, 0);
887     return;
888   }
889   // It's our PID stop signal. Stop it.
890   VLOG(2) << "PID: " << pid << " stopped due to "
891           << util::GetSignalName(stopsig);
892   StopProcess(pid, 0);
893 }
894 
StateProcessStopped(pid_t pid,int status)895 void PtraceMonitor::StateProcessStopped(pid_t pid, int status) {
896   int stopsig = WSTOPSIG(status);
897   // We use PTRACE_O_TRACESYSGOOD, so we can tell it's a syscall stop without
898   // calling PTRACE_GETSIGINFO by checking the value of the reported signal.
899   bool is_syscall_exit = stopsig == (SIGTRAP | 0x80);
900   if (__WPTRACEEVENT(status) == 0 && !is_syscall_exit) {
901     // Must be a regular signal delivery.
902     VLOG(2) << "PID: " << pid
903             << " received signal: " << util::GetSignalName(stopsig);
904     notify_->EventSignal(pid, stopsig);
905     ContinueProcess(pid, stopsig);
906     return;
907   }
908 
909   unsigned long event_msg;  // NOLINT
910   if (ptrace(PTRACE_GETEVENTMSG, pid, 0, &event_msg) == -1) {
911     if (errno == ESRCH) {
912       // This happens from time to time, the kernel does not guarantee us that
913       // we get the event in time.
914       PLOG(INFO) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
915       return;
916     }
917     PLOG(ERROR) << "ptrace(PTRACE_GETEVENTMSG, " << pid << ")";
918     SetExitStatusCode(Result::INTERNAL_ERROR, Result::FAILED_GETEVENT);
919     return;
920   }
921 
922   if (ABSL_PREDICT_FALSE(pid == process_.main_pid && should_dump_stack_ &&
923                          executor_->libunwind_sbox_for_pid_ == 0 &&
924                          policy_->GetNamespace())) {
925     auto stack_trace = [this,
926                         pid]() -> absl::StatusOr<std::vector<std::string>> {
927       Regs regs(pid);
928       SAPI_RETURN_IF_ERROR(regs.Fetch());
929       return GetStackTrace(®s);
930     }();
931 
932     if (!stack_trace.ok()) {
933       LOG(WARNING) << "FAILED TO GET SANDBOX STACK : " << stack_trace.status();
934     } else if (VLOG_IS_ON(0)) {
935       VLOG(0) << "SANDBOX STACK: PID: " << pid << ", [";
936       for (const auto& frame : *stack_trace) {
937         VLOG(0) << "  " << frame;
938       }
939       VLOG(0) << "]";
940     }
941     should_dump_stack_ = false;
942   }
943 
944   if (is_syscall_exit) {
945     VLOG(2) << "PID: " << pid << " syscall-exit-stop: " << event_msg;
946     EventSyscallExit(pid);
947     return;
948   }
949 
950   switch (__WPTRACEEVENT(status)) {
951     case PTRACE_EVENT_FORK:
952       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_FORK, PID: " << event_msg;
953       EventPtraceNewProcess(pid, event_msg);
954       break;
955     case PTRACE_EVENT_VFORK:
956       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_VFORK, PID: " << event_msg;
957       EventPtraceNewProcess(pid, event_msg);
958       break;
959     case PTRACE_EVENT_CLONE:
960       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_CLONE, PID: " << event_msg;
961       EventPtraceNewProcess(pid, event_msg);
962       break;
963     case PTRACE_EVENT_VFORK_DONE:
964       ContinueProcess(pid, 0);
965       break;
966     case PTRACE_EVENT_EXEC:
967       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXEC, PID: " << event_msg;
968       EventPtraceExec(pid, event_msg);
969       break;
970     case PTRACE_EVENT_EXIT:
971       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_EXIT: " << event_msg;
972       EventPtraceExit(pid, event_msg);
973       break;
974     case PTRACE_EVENT_STOP:
975       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_STOP: " << event_msg;
976       EventPtraceStop(pid, stopsig);
977       break;
978     case PTRACE_EVENT_SECCOMP:
979       VLOG(2) << "PID: " << pid << " PTRACE_EVENT_SECCOMP: " << event_msg;
980       EventPtraceSeccomp(pid, event_msg);
981       break;
982     default:
983       LOG(ERROR) << "Unknown ptrace event: " << __WPTRACEEVENT(status)
984                  << " with data: " << event_msg;
985       break;
986   }
987 }
988 
989 }  // namespace sandbox2
990