1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Implementation file for the sandbox2::Client class.
16
17 #include "sandboxed_api/sandbox2/client.h"
18
19 #include <fcntl.h>
20 #include <linux/bpf_common.h>
21 #include <linux/filter.h>
22 #include <linux/seccomp.h>
23 #include <sys/prctl.h>
24 #include <syscall.h>
25 #include <unistd.h>
26
27 #include <atomic>
28 #include <cerrno>
29 #include <cinttypes>
30 #include <cstdint>
31 #include <cstdlib>
32 #include <limits>
33 #include <memory>
34 #include <string>
35 #include <thread> // NOLINT(build/c++11)
36 #include <utility>
37 #include <vector>
38
39 #include "absl/base/attributes.h"
40 #include "absl/base/macros.h"
41 #include "absl/container/flat_hash_map.h"
42 #include "absl/status/status.h"
43 #include "absl/strings/numbers.h"
44 #include "absl/strings/str_cat.h"
45 #include "absl/strings/str_join.h"
46 #include "absl/strings/str_split.h"
47 #include "absl/strings/string_view.h"
48 #include "sandboxed_api/sandbox2/comms.h"
49 #include "sandboxed_api/sandbox2/logsink.h"
50 #include "sandboxed_api/sandbox2/network_proxy/client.h"
51 #include "sandboxed_api/sandbox2/policy.h"
52 #include "sandboxed_api/sandbox2/sanitizer.h"
53 #include "sandboxed_api/sandbox2/syscall.h"
54 #include "sandboxed_api/sandbox2/util/bpf_helper.h"
55 #include "sandboxed_api/util/raw_logging.h"
56
57 #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
58 #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
59 #endif
60
61 namespace sandbox2 {
62 namespace {
63
InitSeccompUnotify(sock_fprog prog,Comms * comms,uint32_t seccomp_extra_flags)64 void InitSeccompUnotify(sock_fprog prog, Comms* comms,
65 uint32_t seccomp_extra_flags) {
66 // The policy might not allow sending the notify FD.
67 // Create a separate thread that won't get the seccomp policy to send the FD.
68 // Synchronize with it using plain atomics + seccomp TSYNC, so we don't need
69 // any additional syscalls.
70 std::atomic<int> fd(-1);
71 std::atomic<int> tid(-1);
72
73 std::thread th([comms, &fd, &tid]() {
74 int notify_fd = -1;
75 while (notify_fd == -1) {
76 notify_fd = fd.load(std::memory_order_seq_cst);
77 }
78 SAPI_RAW_CHECK(comms->SendFD(notify_fd), "sending unotify fd");
79 SAPI_RAW_CHECK(close(notify_fd) == 0, "closing unotify fd");
80 sock_filter filter = ALLOW;
81 struct sock_fprog allow_prog = {
82 .len = 1,
83 .filter = &filter,
84 };
85 int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0,
86 reinterpret_cast<uintptr_t>(&allow_prog));
87 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
88 tid.store(syscall(__NR_gettid), std::memory_order_seq_cst);
89 });
90 th.detach();
91 int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
92 SECCOMP_FILTER_FLAG_NEW_LISTENER,
93 reinterpret_cast<uintptr_t>(&prog));
94 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
95 fd.store(result, std::memory_order_seq_cst);
96 pid_t child = -1;
97 while (child == -1) {
98 child = tid.load(std::memory_order_seq_cst);
99 }
100 // Apply seccomp.
101 struct sock_filter code[] = {
102 LOAD_ARCH,
103 JNE32(sandbox2::Syscall::GetHostAuditArch(), ALLOW),
104 LOAD_SYSCALL_NR,
105 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, __NR_seccomp, 0, 3),
106 ARG_32(3),
107 BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, internal::kExecveMagic, 0, 1),
108 DENY,
109 ALLOW,
110 };
111 prog.len = ABSL_ARRAYSIZE(code);
112 prog.filter = code;
113 do {
114 result =
115 syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
116 SECCOMP_FILTER_FLAG_TSYNC | seccomp_extra_flags,
117 reinterpret_cast<uintptr_t>(&prog), internal::kExecveMagic);
118 } while (result == child);
119 SAPI_RAW_CHECK(result == 0, "Enabling seccomp filter");
120 }
121
InitSeccompRegular(sock_fprog prog,uint32_t seccomp_extra_flags)122 void InitSeccompRegular(sock_fprog prog, uint32_t seccomp_extra_flags) {
123 int result = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
124 SECCOMP_FILTER_FLAG_TSYNC | seccomp_extra_flags,
125 reinterpret_cast<uintptr_t>(&prog));
126 SAPI_RAW_PCHECK(result != -1, "setting seccomp filter");
127 SAPI_RAW_PCHECK(result == 0,
128 "synchronizing threads using SECCOMP_FILTER_FLAG_TSYNC flag "
129 "for thread=%d",
130 result);
131 }
132
133 } // namespace
134
Client(Comms * comms)135 Client::Client(Comms* comms) : comms_(comms) {
136 char* fdmap_envvar = getenv(kFDMapEnvVar);
137 if (!fdmap_envvar) {
138 return;
139 }
140 absl::flat_hash_map<absl::string_view, absl::string_view> vars =
141 absl::StrSplit(fdmap_envvar, ',', absl::SkipEmpty());
142 for (const auto& [name, mapped_fd] : vars) {
143 int fd;
144 SAPI_RAW_CHECK(absl::SimpleAtoi(mapped_fd, &fd), "failed to parse fd map");
145 SAPI_RAW_CHECK(fd_map_.emplace(std::string(name), fd).second,
146 "could not insert mapping into fd map (duplicate)");
147 }
148 unsetenv(kFDMapEnvVar);
149 }
150
GetFdMapEnvVar() const151 std::string Client::GetFdMapEnvVar() const {
152 return absl::StrCat(kFDMapEnvVar, "=",
153 absl::StrJoin(fd_map_, ",", absl::PairFormatter(",")));
154 }
155
PrepareEnvironment(int * preserved_fd)156 void Client::PrepareEnvironment(int* preserved_fd) {
157 SetUpIPC(preserved_fd);
158 SetUpCwd();
159 }
160
EnableSandbox()161 void Client::EnableSandbox() {
162 ReceivePolicy();
163 ApplyPolicyAndBecomeTracee();
164 }
165
SandboxMeHere()166 void Client::SandboxMeHere() {
167 PrepareEnvironment();
168 EnableSandbox();
169 }
170
SetUpCwd()171 void Client::SetUpCwd() {
172 {
173 // Get the current working directory to check if we are in a mount
174 // namespace.
175 // Note: glibc 2.27 no longer returns a relative path in that case, but
176 // fails with ENOENT and returns a nullptr instead. The code still
177 // needs to run on lower version for the time being.
178 char cwd_buf[PATH_MAX + 1] = {0};
179 char* cwd = getcwd(cwd_buf, ABSL_ARRAYSIZE(cwd_buf));
180 SAPI_RAW_PCHECK(cwd != nullptr || errno == ENOENT,
181 "no current working directory");
182
183 // Outside of the mount namespace, the path is of the form
184 // '(unreachable)/...'. Only check for the slash, since Linux might make up
185 // other prefixes in the future.
186 if (errno == ENOENT || cwd_buf[0] != '/') {
187 SAPI_RAW_VLOG(1, "chdir into mount namespace, cwd was '%s'", cwd_buf);
188 // If we are in a mount namespace but fail to chdir, then it can lead to a
189 // sandbox escape -- we need to fail with FATAL if the chdir fails.
190 SAPI_RAW_PCHECK(chdir("/") != -1, "corrective chdir");
191 }
192 }
193
194 // Receive the user-supplied current working directory and change into it.
195 std::string cwd;
196 SAPI_RAW_CHECK(comms_->RecvString(&cwd), "receiving working directory");
197 if (!cwd.empty()) {
198 // On the other hand this chdir can fail without a sandbox escape. It will
199 // probably not have the intended behavior though.
200 if (chdir(cwd.c_str()) == -1 && SAPI_RAW_VLOG_IS_ON(1)) {
201 SAPI_RAW_PLOG(
202 INFO,
203 "chdir(%s) failed, falling back to previous cwd or / (with "
204 "namespaces). Use Executor::SetCwd() to set a working directory",
205 cwd.c_str());
206 }
207 }
208 }
209
SetUpIPC(int * preserved_fd)210 void Client::SetUpIPC(int* preserved_fd) {
211 uint32_t num_of_fd_pairs;
212 SAPI_RAW_CHECK(comms_->RecvUint32(&num_of_fd_pairs),
213 "receiving number of fd pairs");
214 SAPI_RAW_CHECK(fd_map_.empty(), "fd map not empty");
215
216 SAPI_RAW_VLOG(1, "Will receive %d file descriptor pairs", num_of_fd_pairs);
217
218 absl::flat_hash_map<int, int*> preserve_fds_map;
219 if (preserved_fd) {
220 preserve_fds_map.emplace(*preserved_fd, preserved_fd);
221 }
222
223 for (uint32_t i = 0; i < num_of_fd_pairs; ++i) {
224 int32_t requested_fd;
225 int32_t fd;
226 std::string name;
227
228 SAPI_RAW_CHECK(comms_->RecvInt32(&requested_fd), "receiving requested fd");
229 SAPI_RAW_CHECK(comms_->RecvFD(&fd), "receiving current fd");
230 SAPI_RAW_CHECK(comms_->RecvString(&name), "receiving name string");
231
232 if (auto it = preserve_fds_map.find(requested_fd);
233 it != preserve_fds_map.end()) {
234 int old_fd = it->first;
235 int new_fd = dup(old_fd);
236 SAPI_RAW_PCHECK(new_fd != -1, "Failed to duplicate preserved fd=%d",
237 old_fd);
238 SAPI_RAW_LOG(INFO, "Moved preserved fd=%d to %d", old_fd, new_fd);
239 close(old_fd);
240 int* pfd = it->second;
241 *pfd = new_fd;
242 preserve_fds_map.erase(it);
243 preserve_fds_map.emplace(new_fd, pfd);
244 }
245
246 if (requested_fd == comms_->GetConnectionFD()) {
247 comms_->MoveToAnotherFd();
248 SAPI_RAW_LOG(INFO,
249 "Trying to map over comms fd (%d). Remapped comms to %d",
250 requested_fd, comms_->GetConnectionFD());
251 }
252
253 if (requested_fd != -1 && fd != requested_fd) {
254 if (requested_fd > STDERR_FILENO && fcntl(requested_fd, F_GETFD) != -1) {
255 // Dup2 will silently close the FD if one is already at requested_fd.
256 // If someone is using the deferred sandbox entry, ie. SandboxMeHere,
257 // the application might have something actually using that fd.
258 // Therefore let's log a big warning if that FD is already in use.
259 // Note: this check doesn't happen for STDIN,STDOUT,STDERR.
260 SAPI_RAW_LOG(
261 WARNING,
262 "Cloning received fd %d over %d which is already open and will "
263 "be silently closed. This may lead to unexpected behavior!",
264 fd, requested_fd);
265 }
266
267 SAPI_RAW_VLOG(1, "Cloning received fd=%d onto fd=%d", fd, requested_fd);
268 SAPI_RAW_PCHECK(dup2(fd, requested_fd) != -1, "");
269
270 // Close the newly received FD if it differs from the new one.
271 close(fd);
272 fd = requested_fd;
273 }
274
275 if (!name.empty()) {
276 SAPI_RAW_CHECK(fd_map_.emplace(name, fd).second, "duplicate fd mapping");
277 }
278 }
279 }
280
ReceivePolicy()281 void Client::ReceivePolicy() {
282 std::vector<uint8_t> bytes;
283 SAPI_RAW_CHECK(comms_->RecvBytes(&bytes), "receive bytes");
284 policy_ = std::move(bytes);
285 }
286
ApplyPolicyAndBecomeTracee()287 void Client::ApplyPolicyAndBecomeTracee() {
288 // When running under *SAN, we need to notify *SANs background thread that we
289 // want it to exit and wait for it to be done. When not running under *SAN,
290 // this function does nothing.
291 sanitizer::WaitForSanitizer();
292
293 // Creds can be received w/o synchronization, once the connection is
294 // established.
295 pid_t cred_pid;
296 uid_t cred_uid ABSL_ATTRIBUTE_UNUSED;
297 gid_t cred_gid ABSL_ATTRIBUTE_UNUSED;
298 SAPI_RAW_CHECK(comms_->RecvCreds(&cred_pid, &cred_uid, &cred_gid),
299 "receiving credentials");
300
301 SAPI_RAW_CHECK(prctl(PR_SET_DUMPABLE, 1) == 0,
302 "setting PR_SET_DUMPABLE flag");
303 if (prctl(PR_SET_PTRACER, cred_pid) == -1) {
304 SAPI_RAW_VLOG(1, "No YAMA on this system. Continuing");
305 }
306
307 SAPI_RAW_CHECK(prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == 0,
308 "setting PR_SET_NO_NEW_PRIVS flag");
309 SAPI_RAW_CHECK(prctl(PR_SET_KEEPCAPS, 0) == 0,
310 "setting PR_SET_KEEPCAPS flag");
311
312 sock_fprog prog;
313 SAPI_RAW_CHECK(policy_.size() / sizeof(sock_filter) <=
314 std::numeric_limits<uint16_t>::max(),
315 "seccomp policy too long");
316 prog.len = static_cast<uint16_t>(policy_.size() / sizeof(sock_filter));
317 prog.filter = reinterpret_cast<sock_filter*>(&policy_.front());
318
319 SAPI_RAW_VLOG(1,
320 "Applying policy in PID %zd, sock_fprog.len: %" PRId16
321 " entries (%" PRIuPTR " bytes)",
322 syscall(__NR_gettid), prog.len, policy_.size());
323
324 // Signal executor we are ready to have limits applied on us and be ptraced.
325 // We want limits at the last moment to avoid triggering them too early and we
326 // want ptrace at the last moment to avoid synchronization deadlocks.
327 SAPI_RAW_CHECK(comms_->SendUint32(kClient2SandboxReady),
328 "receiving ready signal from executor");
329 uint32_t message; // wait for confirmation
330 SAPI_RAW_CHECK(comms_->RecvUint32(&message),
331 "receving confirmation from executor");
332 uint32_t seccomp_extra_flags =
333 allow_speculation_ ? SECCOMP_FILTER_FLAG_SPEC_ALLOW : 0;
334 if (message == kSandbox2ClientUnotify) {
335 InitSeccompUnotify(prog, comms_, seccomp_extra_flags);
336 } else {
337 SAPI_RAW_CHECK(message == kSandbox2ClientDone,
338 "invalid confirmation from executor");
339 InitSeccompRegular(prog, seccomp_extra_flags);
340 }
341 }
342
GetMappedFD(const std::string & name)343 int Client::GetMappedFD(const std::string& name) {
344 auto it = fd_map_.find(name);
345 SAPI_RAW_CHECK(it != fd_map_.end(),
346 "mapped fd not found (function called twice?)");
347 int fd = it->second;
348 fd_map_.erase(it);
349 return fd;
350 }
351
HasMappedFD(const std::string & name)352 bool Client::HasMappedFD(const std::string& name) {
353 return fd_map_.find(name) != fd_map_.end();
354 }
355
SendLogsToSupervisor()356 void Client::SendLogsToSupervisor() {
357 // This LogSink will register itself and send all logs to the executor until
358 // the object is destroyed.
359 logsink_ = std::make_unique<LogSink>(GetMappedFD(LogSink::kLogFDName));
360 }
361
GetNetworkProxyClient()362 NetworkProxyClient* Client::GetNetworkProxyClient() {
363 if (proxy_client_ == nullptr) {
364 proxy_client_ = std::make_unique<NetworkProxyClient>(
365 GetMappedFD(NetworkProxyClient::kFDName));
366 }
367 return proxy_client_.get();
368 }
369
InstallNetworkProxyHandler()370 absl::Status Client::InstallNetworkProxyHandler() {
371 if (fd_map_.find(NetworkProxyClient::kFDName) == fd_map_.end()) {
372 return absl::FailedPreconditionError(
373 "InstallNetworkProxyHandler() must be called at most once after the "
374 "sandbox is installed. Also, the NetworkProxyServer needs to be "
375 "enabled.");
376 }
377 return NetworkProxyHandler::InstallNetworkProxyHandler(
378 GetNetworkProxyClient());
379 }
380
381 } // namespace sandbox2
382