1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "sandbox/linux/seccomp-bpf/trap.h"
6
7 #include <errno.h>
8 #include <signal.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <sys/syscall.h>
13
14 #include <algorithm>
15 #include <limits>
16 #include <tuple>
17
18 #include "base/compiler_specific.h"
19 #include "base/logging.h"
20 #include "build/build_config.h"
21 #include "sandbox/linux/bpf_dsl/seccomp_macros.h"
22 #include "sandbox/linux/seccomp-bpf/die.h"
23 #include "sandbox/linux/seccomp-bpf/syscall.h"
24 #include "sandbox/linux/services/syscall_wrappers.h"
25 #include "sandbox/linux/system_headers/linux_seccomp.h"
26 #include "sandbox/linux/system_headers/linux_signal.h"
27
28 namespace {
29
30 struct arch_sigsys {
31 void* ip;
32 int nr;
33 unsigned int arch;
34 };
35
36 const int kCapacityIncrement = 20;
37
38 // Unsafe traps can only be turned on, if the user explicitly allowed them
39 // by setting the CHROME_SANDBOX_DEBUGGING environment variable.
40 const char kSandboxDebuggingEnv[] = "CHROME_SANDBOX_DEBUGGING";
41
42 // We need to tell whether we are performing a "normal" callback, or
43 // whether we were called recursively from within a UnsafeTrap() callback.
44 // This is a little tricky to do, because we need to somehow get access to
45 // per-thread data from within a signal context. Normal TLS storage is not
46 // safely accessible at this time. We could roll our own, but that involves
47 // a lot of complexity. Instead, we co-opt one bit in the signal mask.
48 // If BUS is blocked, we assume that we have been called recursively.
49 // There is a possibility for collision with other code that needs to do
50 // this, but in practice the risks are low.
51 // If SIGBUS turns out to be a problem, we could instead co-opt one of the
52 // realtime signals. There are plenty of them. Unfortunately, there is no
53 // way to mark a signal as allocated. So, the potential for collision is
54 // possibly even worse.
GetIsInSigHandler(const ucontext_t * ctx)55 bool GetIsInSigHandler(const ucontext_t* ctx) {
56 // Note: on Android, sigismember does not take a pointer to const.
57 return sigismember(const_cast<sigset_t*>(&ctx->uc_sigmask), LINUX_SIGBUS);
58 }
59
SetIsInSigHandler()60 void SetIsInSigHandler() {
61 sigset_t mask;
62 if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGBUS) ||
63 sandbox::sys_sigprocmask(LINUX_SIG_BLOCK, &mask, NULL)) {
64 SANDBOX_DIE("Failed to block SIGBUS");
65 }
66 }
67
IsDefaultSignalAction(const struct sigaction & sa)68 bool IsDefaultSignalAction(const struct sigaction& sa) {
69 if (sa.sa_flags & SA_SIGINFO || sa.sa_handler != SIG_DFL) {
70 return false;
71 }
72 return true;
73 }
74
75 } // namespace
76
77 namespace sandbox {
78
Trap()79 Trap::Trap()
80 : trap_array_(NULL),
81 trap_array_size_(0),
82 trap_array_capacity_(0),
83 has_unsafe_traps_(false) {
84 // Set new SIGSYS handler
85 struct sigaction sa = {};
86 // In some toolchain, sa_sigaction is not declared in struct sigaction.
87 // So, here cast the pointer to the sa_handler's type. This works because
88 // |sa_handler| and |sa_sigaction| shares the same memory.
89 sa.sa_handler = reinterpret_cast<void (*)(int)>(SigSysAction);
90 sa.sa_flags = LINUX_SA_SIGINFO | LINUX_SA_NODEFER;
91 struct sigaction old_sa = {};
92 if (sys_sigaction(LINUX_SIGSYS, &sa, &old_sa) < 0) {
93 SANDBOX_DIE("Failed to configure SIGSYS handler");
94 }
95
96 if (!IsDefaultSignalAction(old_sa)) {
97 static const char kExistingSIGSYSMsg[] =
98 "Existing signal handler when trying to install SIGSYS. SIGSYS needs "
99 "to be reserved for seccomp-bpf.";
100 DLOG(FATAL) << kExistingSIGSYSMsg;
101 LOG(ERROR) << kExistingSIGSYSMsg;
102 }
103
104 // Unmask SIGSYS
105 sigset_t mask;
106 if (sigemptyset(&mask) || sigaddset(&mask, LINUX_SIGSYS) ||
107 sys_sigprocmask(LINUX_SIG_UNBLOCK, &mask, NULL)) {
108 SANDBOX_DIE("Failed to configure SIGSYS handler");
109 }
110 }
111
Registry()112 bpf_dsl::TrapRegistry* Trap::Registry() {
113 // Note: This class is not thread safe. It is the caller's responsibility
114 // to avoid race conditions. Normally, this is a non-issue as the sandbox
115 // can only be initialized if there are no other threads present.
116 // Also, this is not a normal singleton. Once created, the global trap
117 // object must never be destroyed again.
118 if (!global_trap_) {
119 global_trap_ = new Trap();
120 if (!global_trap_) {
121 SANDBOX_DIE("Failed to allocate global trap handler");
122 }
123 }
124 return global_trap_;
125 }
126
SigSysAction(int nr,LinuxSigInfo * info,void * void_context)127 void Trap::SigSysAction(int nr, LinuxSigInfo* info, void* void_context) {
128 if (info) {
129 MSAN_UNPOISON(info, sizeof(*info));
130 }
131
132 // Obtain the signal context. This, most notably, gives us access to
133 // all CPU registers at the time of the signal.
134 ucontext_t* ctx = reinterpret_cast<ucontext_t*>(void_context);
135 if (ctx) {
136 MSAN_UNPOISON(ctx, sizeof(*ctx));
137 }
138
139 if (!global_trap_) {
140 RAW_SANDBOX_DIE(
141 "This can't happen. Found no global singleton instance "
142 "for Trap() handling.");
143 }
144 global_trap_->SigSys(nr, info, ctx);
145 }
146
SigSys(int nr,LinuxSigInfo * info,ucontext_t * ctx)147 void Trap::SigSys(int nr, LinuxSigInfo* info, ucontext_t* ctx) {
148 // Signal handlers should always preserve "errno". Otherwise, we could
149 // trigger really subtle bugs.
150 const int old_errno = errno;
151
152 // Various sanity checks to make sure we actually received a signal
153 // triggered by a BPF filter. If something else triggered SIGSYS
154 // (e.g. kill()), there is really nothing we can do with this signal.
155 if (nr != LINUX_SIGSYS || info->si_code != SYS_SECCOMP || !ctx ||
156 info->si_errno <= 0 ||
157 static_cast<size_t>(info->si_errno) > trap_array_size_) {
158 // ATI drivers seem to send SIGSYS, so this cannot be FATAL.
159 // See crbug.com/178166.
160 // TODO(jln): add a DCHECK or move back to FATAL.
161 RAW_LOG(ERROR, "Unexpected SIGSYS received.");
162 errno = old_errno;
163 return;
164 }
165
166
167 // Obtain the siginfo information that is specific to SIGSYS. Unfortunately,
168 // most versions of glibc don't include this information in siginfo_t. So,
169 // we need to explicitly copy it into a arch_sigsys structure.
170 struct arch_sigsys sigsys;
171 memcpy(&sigsys, &info->_sifields, sizeof(sigsys));
172
173 #if defined(__mips__)
174 // When indirect syscall (syscall(__NR_foo, ...)) is made on Mips, the
175 // number in register SECCOMP_SYSCALL(ctx) is always __NR_syscall and the
176 // real number of a syscall (__NR_foo) is in SECCOMP_PARM1(ctx)
177 bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx)) &&
178 sigsys.nr != static_cast<int>(SECCOMP_PARM1(ctx));
179 #else
180 bool sigsys_nr_is_bad = sigsys.nr != static_cast<int>(SECCOMP_SYSCALL(ctx));
181 #endif
182
183 // Some more sanity checks.
184 if (sigsys.ip != reinterpret_cast<void*>(SECCOMP_IP(ctx)) ||
185 sigsys_nr_is_bad || sigsys.arch != SECCOMP_ARCH) {
186 // TODO(markus):
187 // SANDBOX_DIE() can call LOG(FATAL). This is not normally async-signal
188 // safe and can lead to bugs. We should eventually implement a different
189 // logging and reporting mechanism that is safe to be called from
190 // the sigSys() handler.
191 RAW_SANDBOX_DIE("Sanity checks are failing after receiving SIGSYS.");
192 }
193
194 intptr_t rc;
195 if (has_unsafe_traps_ && GetIsInSigHandler(ctx)) {
196 errno = old_errno;
197 if (sigsys.nr == __NR_clone) {
198 RAW_SANDBOX_DIE("Cannot call clone() from an UnsafeTrap() handler.");
199 }
200 #if defined(__mips__)
201 // Mips supports up to eight arguments for syscall.
202 // However, seccomp bpf can filter only up to six arguments, so using eight
203 // arguments has sense only when using UnsafeTrap() handler.
204 rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
205 SECCOMP_PARM1(ctx),
206 SECCOMP_PARM2(ctx),
207 SECCOMP_PARM3(ctx),
208 SECCOMP_PARM4(ctx),
209 SECCOMP_PARM5(ctx),
210 SECCOMP_PARM6(ctx),
211 SECCOMP_PARM7(ctx),
212 SECCOMP_PARM8(ctx));
213 #else
214 rc = Syscall::Call(SECCOMP_SYSCALL(ctx),
215 SECCOMP_PARM1(ctx),
216 SECCOMP_PARM2(ctx),
217 SECCOMP_PARM3(ctx),
218 SECCOMP_PARM4(ctx),
219 SECCOMP_PARM5(ctx),
220 SECCOMP_PARM6(ctx));
221 #endif // defined(__mips__)
222 } else {
223 const TrapKey& trap = trap_array_[info->si_errno - 1];
224 if (!trap.safe) {
225 SetIsInSigHandler();
226 }
227
228 // Copy the seccomp-specific data into a arch_seccomp_data structure. This
229 // is what we are showing to TrapFnc callbacks that the system call
230 // evaluator registered with the sandbox.
231 struct arch_seccomp_data data = {
232 static_cast<int>(SECCOMP_SYSCALL(ctx)),
233 SECCOMP_ARCH,
234 reinterpret_cast<uint64_t>(sigsys.ip),
235 {static_cast<uint64_t>(SECCOMP_PARM1(ctx)),
236 static_cast<uint64_t>(SECCOMP_PARM2(ctx)),
237 static_cast<uint64_t>(SECCOMP_PARM3(ctx)),
238 static_cast<uint64_t>(SECCOMP_PARM4(ctx)),
239 static_cast<uint64_t>(SECCOMP_PARM5(ctx)),
240 static_cast<uint64_t>(SECCOMP_PARM6(ctx))}};
241
242 // Now call the TrapFnc callback associated with this particular instance
243 // of SECCOMP_RET_TRAP.
244 rc = trap.fnc(data, const_cast<void*>(trap.aux));
245 }
246
247 // Update the CPU register that stores the return code of the system call
248 // that we just handled, and restore "errno" to the value that it had
249 // before entering the signal handler.
250 Syscall::PutValueInUcontext(rc, ctx);
251 errno = old_errno;
252
253 return;
254 }
255
operator <(const TrapKey & o) const256 bool Trap::TrapKey::operator<(const TrapKey& o) const {
257 return std::tie(fnc, aux, safe) < std::tie(o.fnc, o.aux, o.safe);
258 }
259
Add(TrapFnc fnc,const void * aux,bool safe)260 uint16_t Trap::Add(TrapFnc fnc, const void* aux, bool safe) {
261 if (!safe && !SandboxDebuggingAllowedByUser()) {
262 // Unless the user set the CHROME_SANDBOX_DEBUGGING environment variable,
263 // we never return an ErrorCode that is marked as "unsafe". This also
264 // means, the BPF compiler will never emit code that allow unsafe system
265 // calls to by-pass the filter (because they use the magic return address
266 // from Syscall::Call(-1)).
267
268 // This SANDBOX_DIE() can optionally be removed. It won't break security,
269 // but it might make error messages from the BPF compiler a little harder
270 // to understand. Removing the SANDBOX_DIE() allows callers to easily check
271 // whether unsafe traps are supported (by checking whether the returned
272 // ErrorCode is ET_INVALID).
273 SANDBOX_DIE(
274 "Cannot use unsafe traps unless CHROME_SANDBOX_DEBUGGING "
275 "is enabled");
276
277 return 0;
278 }
279
280 // Each unique pair of TrapFnc and auxiliary data make up a distinct instance
281 // of a SECCOMP_RET_TRAP.
282 TrapKey key(fnc, aux, safe);
283
284 // We return unique identifiers together with SECCOMP_RET_TRAP. This allows
285 // us to associate trap with the appropriate handler. The kernel allows us
286 // identifiers in the range from 0 to SECCOMP_RET_DATA (0xFFFF). We want to
287 // avoid 0, as it could be confused for a trap without any specific id.
288 // The nice thing about sequentially numbered identifiers is that we can also
289 // trivially look them up from our signal handler without making any system
290 // calls that might be async-signal-unsafe.
291 // In order to do so, we store all of our traps in a C-style trap_array_.
292
293 TrapIds::const_iterator iter = trap_ids_.find(key);
294 if (iter != trap_ids_.end()) {
295 // We have seen this pair before. Return the same id that we assigned
296 // earlier.
297 return iter->second;
298 }
299
300 // This is a new pair. Remember it and assign a new id.
301 if (trap_array_size_ >= SECCOMP_RET_DATA /* 0xFFFF */ ||
302 trap_array_size_ >= std::numeric_limits<uint16_t>::max()) {
303 // In practice, this is pretty much impossible to trigger, as there
304 // are other kernel limitations that restrict overall BPF program sizes.
305 SANDBOX_DIE("Too many SECCOMP_RET_TRAP callback instances");
306 }
307
308 // Our callers ensure that there are no other threads accessing trap_array_
309 // concurrently (typically this is done by ensuring that we are single-
310 // threaded while the sandbox is being set up). But we nonetheless are
311 // modifying a live data structure that could be accessed any time a
312 // system call is made; as system calls could be triggering SIGSYS.
313 // So, we have to be extra careful that we update trap_array_ atomically.
314 // In particular, this means we shouldn't be using realloc() to resize it.
315 // Instead, we allocate a new array, copy the values, and then switch the
316 // pointer. We only really care about the pointer being updated atomically
317 // and the data that is pointed to being valid, as these are the only
318 // values accessed from the signal handler. It is OK if trap_array_size_
319 // is inconsistent with the pointer, as it is monotonously increasing.
320 // Also, we only care about compiler barriers, as the signal handler is
321 // triggered synchronously from a system call. We don't have to protect
322 // against issues with the memory model or with completely asynchronous
323 // events.
324 if (trap_array_size_ >= trap_array_capacity_) {
325 trap_array_capacity_ += kCapacityIncrement;
326 TrapKey* old_trap_array = trap_array_;
327 TrapKey* new_trap_array = new TrapKey[trap_array_capacity_];
328 std::copy_n(old_trap_array, trap_array_size_, new_trap_array);
329
330 // Language specs are unclear on whether the compiler is allowed to move
331 // the "delete[]" above our preceding assignments and/or memory moves,
332 // iff the compiler believes that "delete[]" doesn't have any other
333 // global side-effects.
334 // We insert optimization barriers to prevent this from happening.
335 // The first barrier is probably not needed, but better be explicit in
336 // what we want to tell the compiler.
337 // The clang developer mailing list couldn't answer whether this is a
338 // legitimate worry; but they at least thought that the barrier is
339 // sufficient to prevent the (so far hypothetical) problem of re-ordering
340 // of instructions by the compiler.
341 //
342 // TODO(mdempsky): Try to clean this up using base/atomicops or C++11
343 // atomics; see crbug.com/414363.
344 asm volatile("" : "=r"(new_trap_array) : "0"(new_trap_array) : "memory");
345 trap_array_ = new_trap_array;
346 asm volatile("" : "=r"(trap_array_) : "0"(trap_array_) : "memory");
347
348 delete[] old_trap_array;
349 }
350
351 uint16_t id = trap_array_size_ + 1;
352 trap_ids_[key] = id;
353 trap_array_[trap_array_size_] = key;
354 trap_array_size_++;
355 return id;
356 }
357
SandboxDebuggingAllowedByUser()358 bool Trap::SandboxDebuggingAllowedByUser() {
359 const char* debug_flag = getenv(kSandboxDebuggingEnv);
360 return debug_flag && *debug_flag;
361 }
362
EnableUnsafeTraps()363 bool Trap::EnableUnsafeTraps() {
364 if (!has_unsafe_traps_) {
365 // Unsafe traps are a one-way fuse. Once enabled, they can never be turned
366 // off again.
367 // We only allow enabling unsafe traps, if the user explicitly set an
368 // appropriate environment variable. This prevents bugs that accidentally
369 // disable all sandboxing for all users.
370 if (SandboxDebuggingAllowedByUser()) {
371 // We only ever print this message once, when we enable unsafe traps the
372 // first time.
373 SANDBOX_INFO("WARNING! Disabling sandbox for debugging purposes");
374 has_unsafe_traps_ = true;
375 } else {
376 SANDBOX_INFO(
377 "Cannot disable sandbox and use unsafe traps unless "
378 "CHROME_SANDBOX_DEBUGGING is turned on first");
379 }
380 }
381 // Returns the, possibly updated, value of has_unsafe_traps_.
382 return has_unsafe_traps_;
383 }
384
385 Trap* Trap::global_trap_;
386
387 } // namespace sandbox
388