1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/seccomp-bpf/sandbox_bpf.h"
6
7 #include <errno.h>
8 #include <stdint.h>
9 #include <sys/prctl.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12
13 #include "base/compiler_specific.h"
14 #include "base/files/scoped_file.h"
15 #include "base/logging.h"
16 #include "base/macros.h"
17 #include "base/posix/eintr_wrapper.h"
18 #include "base/third_party/valgrind/valgrind.h"
19 #include "sandbox/linux/bpf_dsl/bpf_dsl.h"
20 #include "sandbox/linux/bpf_dsl/codegen.h"
21 #include "sandbox/linux/bpf_dsl/policy.h"
22 #include "sandbox/linux/bpf_dsl/policy_compiler.h"
23 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
24 #include "sandbox/linux/bpf_dsl/syscall_set.h"
25 #include "sandbox/linux/seccomp-bpf/die.h"
26 #include "sandbox/linux/seccomp-bpf/syscall.h"
27 #include "sandbox/linux/seccomp-bpf/trap.h"
28 #include "sandbox/linux/services/proc_util.h"
29 #include "sandbox/linux/services/syscall_wrappers.h"
30 #include "sandbox/linux/services/thread_helpers.h"
31 #include "sandbox/linux/system_headers/linux_filter.h"
32 #include "sandbox/linux/system_headers/linux_seccomp.h"
33 #include "sandbox/linux/system_headers/linux_syscalls.h"
34
35 namespace sandbox {
36
37 namespace {
38
IsRunningOnValgrind()39 bool IsRunningOnValgrind() { return RUNNING_ON_VALGRIND; }
40
IsSingleThreaded(int proc_fd)41 bool IsSingleThreaded(int proc_fd) {
42 return ThreadHelpers::IsSingleThreaded(proc_fd);
43 }
44
45 // Check if the kernel supports seccomp-filter (a.k.a. seccomp mode 2) via
46 // prctl().
KernelSupportsSeccompBPF()47 bool KernelSupportsSeccompBPF() {
48 errno = 0;
49 const int rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, nullptr);
50
51 if (rv == -1 && EFAULT == errno) {
52 return true;
53 }
54 return false;
55 }
56
57 // LG introduced a buggy syscall, sys_set_media_ext, with the same number as
58 // seccomp. Return true if the current kernel has this buggy syscall.
59 //
60 // We want this to work with upcoming versions of seccomp, so we pass bogus
61 // flags that are unlikely to ever be used by the kernel. A normal kernel would
62 // return -EINVAL, but a buggy LG kernel would return 1.
KernelHasLGBug()63 bool KernelHasLGBug() {
64 #if defined(OS_ANDROID)
65 // sys_set_media will see this as NULL, which should be a safe (non-crashing)
66 // way to invoke it. A genuine seccomp syscall will see it as
67 // SECCOMP_SET_MODE_STRICT.
68 const unsigned int operation = 0;
69 // Chosen by fair dice roll. Guaranteed to be random.
70 const unsigned int flags = 0xf7a46a5c;
71 const int rv = sys_seccomp(operation, flags, nullptr);
72 // A genuine kernel would return -EINVAL (which would set rv to -1 and errno
73 // to EINVAL), or at the very least return some kind of error (which would
74 // set rv to -1). Any other behavior indicates that whatever code received
75 // our syscall was not the real seccomp.
76 if (rv != -1) {
77 return true;
78 }
79 #endif // defined(OS_ANDROID)
80
81 return false;
82 }
83
84 // Check if the kernel supports seccomp-filter via the seccomp system call
85 // and the TSYNC feature to enable seccomp on all threads.
KernelSupportsSeccompTsync()86 bool KernelSupportsSeccompTsync() {
87 if (KernelHasLGBug()) {
88 return false;
89 }
90
91 errno = 0;
92 const int rv =
93 sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, nullptr);
94
95 if (rv == -1 && errno == EFAULT) {
96 return true;
97 } else {
98 // TODO(jln): turn these into DCHECK after 417888 is considered fixed.
99 CHECK_EQ(-1, rv);
100 CHECK(ENOSYS == errno || EINVAL == errno);
101 return false;
102 }
103 }
104
EscapePC()105 uint64_t EscapePC() {
106 intptr_t rv = Syscall::Call(-1);
107 if (rv == -1 && errno == ENOSYS) {
108 return 0;
109 }
110 return static_cast<uint64_t>(static_cast<uintptr_t>(rv));
111 }
112
SandboxPanicTrap(const struct arch_seccomp_data &,void * aux)113 intptr_t SandboxPanicTrap(const struct arch_seccomp_data&, void* aux) {
114 SANDBOX_DIE(static_cast<const char*>(aux));
115 }
116
SandboxPanic(const char * error)117 bpf_dsl::ResultExpr SandboxPanic(const char* error) {
118 return bpf_dsl::Trap(SandboxPanicTrap, error);
119 }
120
121 } // namespace
122
SandboxBPF(bpf_dsl::Policy * policy)123 SandboxBPF::SandboxBPF(bpf_dsl::Policy* policy)
124 : proc_fd_(), sandbox_has_started_(false), policy_(policy) {
125 }
126
~SandboxBPF()127 SandboxBPF::~SandboxBPF() {
128 }
129
130 // static
SupportsSeccompSandbox(SeccompLevel level)131 bool SandboxBPF::SupportsSeccompSandbox(SeccompLevel level) {
132 // Never pretend to support seccomp with Valgrind, as it
133 // throws the tool off.
134 if (IsRunningOnValgrind()) {
135 return false;
136 }
137
138 switch (level) {
139 case SeccompLevel::SINGLE_THREADED:
140 return KernelSupportsSeccompBPF();
141 case SeccompLevel::MULTI_THREADED:
142 return KernelSupportsSeccompTsync();
143 }
144 NOTREACHED();
145 return false;
146 }
147
StartSandbox(SeccompLevel seccomp_level)148 bool SandboxBPF::StartSandbox(SeccompLevel seccomp_level) {
149 DCHECK(policy_);
150 CHECK(seccomp_level == SeccompLevel::SINGLE_THREADED ||
151 seccomp_level == SeccompLevel::MULTI_THREADED);
152
153 if (sandbox_has_started_) {
154 SANDBOX_DIE(
155 "Cannot repeatedly start sandbox. Create a separate Sandbox "
156 "object instead.");
157 return false;
158 }
159
160 if (!proc_fd_.is_valid()) {
161 SetProcFd(ProcUtil::OpenProc());
162 }
163
164 const bool supports_tsync = KernelSupportsSeccompTsync();
165
166 if (seccomp_level == SeccompLevel::SINGLE_THREADED) {
167 // Wait for /proc/self/task/ to update if needed and assert the
168 // process is single threaded.
169 ThreadHelpers::AssertSingleThreaded(proc_fd_.get());
170 } else if (seccomp_level == SeccompLevel::MULTI_THREADED) {
171 if (IsSingleThreaded(proc_fd_.get())) {
172 SANDBOX_DIE("Cannot start sandbox; "
173 "process may be single-threaded when reported as not");
174 return false;
175 }
176 if (!supports_tsync) {
177 SANDBOX_DIE("Cannot start sandbox; kernel does not support synchronizing "
178 "filters for a threadgroup");
179 return false;
180 }
181 }
182
183 // We no longer need access to any files in /proc. We want to do this
184 // before installing the filters, just in case that our policy denies
185 // close().
186 if (proc_fd_.is_valid()) {
187 proc_fd_.reset();
188 }
189
190 // Install the filters.
191 InstallFilter(supports_tsync ||
192 seccomp_level == SeccompLevel::MULTI_THREADED);
193
194 return true;
195 }
196
SetProcFd(base::ScopedFD proc_fd)197 void SandboxBPF::SetProcFd(base::ScopedFD proc_fd) {
198 proc_fd_.swap(proc_fd);
199 }
200
201 // static
IsValidSyscallNumber(int sysnum)202 bool SandboxBPF::IsValidSyscallNumber(int sysnum) {
203 return SyscallSet::IsValid(sysnum);
204 }
205
206 // static
IsRequiredForUnsafeTrap(int sysno)207 bool SandboxBPF::IsRequiredForUnsafeTrap(int sysno) {
208 return bpf_dsl::PolicyCompiler::IsRequiredForUnsafeTrap(sysno);
209 }
210
211 // static
ForwardSyscall(const struct arch_seccomp_data & args)212 intptr_t SandboxBPF::ForwardSyscall(const struct arch_seccomp_data& args) {
213 return Syscall::Call(
214 args.nr, static_cast<intptr_t>(args.args[0]),
215 static_cast<intptr_t>(args.args[1]), static_cast<intptr_t>(args.args[2]),
216 static_cast<intptr_t>(args.args[3]), static_cast<intptr_t>(args.args[4]),
217 static_cast<intptr_t>(args.args[5]));
218 }
219
AssembleFilter()220 CodeGen::Program SandboxBPF::AssembleFilter() {
221 DCHECK(policy_);
222
223 bpf_dsl::PolicyCompiler compiler(policy_.get(), Trap::Registry());
224 if (Trap::SandboxDebuggingAllowedByUser()) {
225 compiler.DangerousSetEscapePC(EscapePC());
226 }
227 compiler.SetPanicFunc(SandboxPanic);
228 return compiler.Compile();
229 }
230
InstallFilter(bool must_sync_threads)231 void SandboxBPF::InstallFilter(bool must_sync_threads) {
232 // We want to be very careful in not imposing any requirements on the
233 // policies that are set with SetSandboxPolicy(). This means, as soon as
234 // the sandbox is active, we shouldn't be relying on libraries that could
235 // be making system calls. This, for example, means we should avoid
236 // using the heap and we should avoid using STL functions.
237 // Temporarily copy the contents of the "program" vector into a
238 // stack-allocated array; and then explicitly destroy that object.
239 // This makes sure we don't ex- or implicitly call new/delete after we
240 // installed the BPF filter program in the kernel. Depending on the
241 // system memory allocator that is in effect, these operators can result
242 // in system calls to things like munmap() or brk().
243 CodeGen::Program program = AssembleFilter();
244
245 struct sock_filter bpf[program.size()];
246 const struct sock_fprog prog = {static_cast<unsigned short>(program.size()),
247 bpf};
248 memcpy(bpf, &program[0], sizeof(bpf));
249 CodeGen::Program().swap(program); // vector swap trick
250
251 // Make an attempt to release memory that is no longer needed here, rather
252 // than in the destructor. Try to avoid as much as possible to presume of
253 // what will be possible to do in the new (sandboxed) execution environment.
254 policy_.reset();
255
256 if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
257 SANDBOX_DIE("Kernel refuses to enable no-new-privs");
258 }
259
260 // Install BPF filter program. If the thread state indicates multi-threading
261 // support, then the kernel hass the seccomp system call. Otherwise, fall
262 // back on prctl, which requires the process to be single-threaded.
263 if (must_sync_threads) {
264 int rv =
265 sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &prog);
266 if (rv) {
267 SANDBOX_DIE(
268 "Kernel refuses to turn on and synchronize threads for BPF filters");
269 }
270 } else {
271 if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog)) {
272 SANDBOX_DIE("Kernel refuses to turn on BPF filters");
273 }
274 }
275
276 sandbox_has_started_ = true;
277 }
278
279 } // namespace sandbox
280