• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "sandbox/linux/seccomp-bpf/syscall.h"
6 
7 #include <asm/unistd.h>
8 #include <errno.h>
9 
10 #include "base/basictypes.h"
11 #include "base/logging.h"
12 #include "sandbox/linux/seccomp-bpf/linux_seccomp.h"
13 
14 namespace sandbox {
15 
16 namespace {
17 
18 #if defined(ARCH_CPU_X86_FAMILY) || defined(ARCH_CPU_ARM_FAMILY) || \
19     defined(ARCH_CPU_MIPS_FAMILY)
20 // Number that's not currently used by any Linux kernel ABIs.
21 const int kInvalidSyscallNumber = 0x351d3;
22 #else
23 #error Unrecognized architecture
24 #endif
25 
26 asm(// We need to be able to tell the kernel exactly where we made a
27     // system call. The C++ compiler likes to sometimes clone or
28     // inline code, which would inadvertently end up duplicating
29     // the entry point.
30     // "gcc" can suppress code duplication with suitable function
31     // attributes, but "clang" doesn't have this ability.
32     // The "clang" developer mailing list suggested that the correct
33     // and portable solution is a file-scope assembly block.
34     // N.B. We do mark our code as a proper function so that backtraces
35     // work correctly. But we make absolutely no attempt to use the
36     // ABI's calling conventions for passing arguments. We will only
37     // ever be called from assembly code and thus can pick more
38     // suitable calling conventions.
39 #if defined(__i386__)
40     ".text\n"
41     ".align 16, 0x90\n"
42     ".type SyscallAsm, @function\n"
43     "SyscallAsm:.cfi_startproc\n"
44     // Check if "%eax" is negative. If so, do not attempt to make a
45     // system call. Instead, compute the return address that is visible
46     // to the kernel after we execute "int $0x80". This address can be
47     // used as a marker that BPF code inspects.
48     "test %eax, %eax\n"
49     "jge  1f\n"
50     // Always, make sure that our code is position-independent, or
51     // address space randomization might not work on i386. This means,
52     // we can't use "lea", but instead have to rely on "call/pop".
53     "call 0f;   .cfi_adjust_cfa_offset  4\n"
54     "0:pop  %eax; .cfi_adjust_cfa_offset -4\n"
55     "addl $2f-0b, %eax\n"
56     "ret\n"
57     // Save register that we don't want to clobber. On i386, we need to
58     // save relatively aggressively, as there are a couple or registers
59     // that are used internally (e.g. %ebx for position-independent
60     // code, and %ebp for the frame pointer), and as we need to keep at
61     // least a few registers available for the register allocator.
62     "1:push %esi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset esi, 0\n"
63     "push %edi; .cfi_adjust_cfa_offset 4; .cfi_rel_offset edi, 0\n"
64     "push %ebx; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebx, 0\n"
65     "push %ebp; .cfi_adjust_cfa_offset 4; .cfi_rel_offset ebp, 0\n"
66     // Copy entries from the array holding the arguments into the
67     // correct CPU registers.
68     "movl  0(%edi), %ebx\n"
69     "movl  4(%edi), %ecx\n"
70     "movl  8(%edi), %edx\n"
71     "movl 12(%edi), %esi\n"
72     "movl 20(%edi), %ebp\n"
73     "movl 16(%edi), %edi\n"
74     // Enter the kernel.
75     "int  $0x80\n"
76     // This is our "magic" return address that the BPF filter sees.
77     "2:"
78     // Restore any clobbered registers that we didn't declare to the
79     // compiler.
80     "pop  %ebp; .cfi_restore ebp; .cfi_adjust_cfa_offset -4\n"
81     "pop  %ebx; .cfi_restore ebx; .cfi_adjust_cfa_offset -4\n"
82     "pop  %edi; .cfi_restore edi; .cfi_adjust_cfa_offset -4\n"
83     "pop  %esi; .cfi_restore esi; .cfi_adjust_cfa_offset -4\n"
84     "ret\n"
85     ".cfi_endproc\n"
86     "9:.size SyscallAsm, 9b-SyscallAsm\n"
87 #elif defined(__x86_64__)
88     ".text\n"
89     ".align 16, 0x90\n"
90     ".type SyscallAsm, @function\n"
91     "SyscallAsm:.cfi_startproc\n"
92     // Check if "%rax" is negative. If so, do not attempt to make a
93     // system call. Instead, compute the return address that is visible
94     // to the kernel after we execute "syscall". This address can be
95     // used as a marker that BPF code inspects.
96     "test %rax, %rax\n"
97     "jge  1f\n"
98     // Always make sure that our code is position-independent, or the
99     // linker will throw a hissy fit on x86-64.
100     "call 0f;   .cfi_adjust_cfa_offset  8\n"
101     "0:pop  %rax; .cfi_adjust_cfa_offset -8\n"
102     "addq $2f-0b, %rax\n"
103     "ret\n"
104     // We declared all clobbered registers to the compiler. On x86-64,
105     // there really isn't much of a problem with register pressure. So,
106     // we can go ahead and directly copy the entries from the arguments
107     // array into the appropriate CPU registers.
108     "1:movq  0(%r12), %rdi\n"
109     "movq  8(%r12), %rsi\n"
110     "movq 16(%r12), %rdx\n"
111     "movq 24(%r12), %r10\n"
112     "movq 32(%r12), %r8\n"
113     "movq 40(%r12), %r9\n"
114     // Enter the kernel.
115     "syscall\n"
116     // This is our "magic" return address that the BPF filter sees.
117     "2:ret\n"
118     ".cfi_endproc\n"
119     "9:.size SyscallAsm, 9b-SyscallAsm\n"
120 #elif defined(__arm__)
121     // Throughout this file, we use the same mode (ARM vs. thumb)
122     // that the C++ compiler uses. This means, when transfering control
123     // from C++ to assembly code, we do not need to switch modes (e.g.
124     // by using the "bx" instruction). It also means that our assembly
125     // code should not be invoked directly from code that lives in
126     // other compilation units, as we don't bother implementing thumb
127     // interworking. That's OK, as we don't make any of the assembly
128     // symbols public. They are all local to this file.
129     ".text\n"
130     ".align 2\n"
131     ".type SyscallAsm, %function\n"
132 #if defined(__thumb__)
133     ".thumb_func\n"
134 #else
135     ".arm\n"
136 #endif
137     "SyscallAsm:.fnstart\n"
138     "@ args = 0, pretend = 0, frame = 8\n"
139     "@ frame_needed = 1, uses_anonymous_args = 0\n"
140 #if defined(__thumb__)
141     ".cfi_startproc\n"
142     "push {r7, lr}\n"
143     ".cfi_offset 14, -4\n"
144     ".cfi_offset  7, -8\n"
145     "mov r7, sp\n"
146     ".cfi_def_cfa_register 7\n"
147     ".cfi_def_cfa_offset 8\n"
148 #else
149     "stmfd sp!, {fp, lr}\n"
150     "add fp, sp, #4\n"
151 #endif
152     // Check if "r0" is negative. If so, do not attempt to make a
153     // system call. Instead, compute the return address that is visible
154     // to the kernel after we execute "swi 0". This address can be
155     // used as a marker that BPF code inspects.
156     "cmp r0, #0\n"
157     "bge 1f\n"
158     "adr r0, 2f\n"
159     "b   2f\n"
160     // We declared (almost) all clobbered registers to the compiler. On
161     // ARM there is no particular register pressure. So, we can go
162     // ahead and directly copy the entries from the arguments array
163     // into the appropriate CPU registers.
164     "1:ldr r5, [r6, #20]\n"
165     "ldr r4, [r6, #16]\n"
166     "ldr r3, [r6, #12]\n"
167     "ldr r2, [r6, #8]\n"
168     "ldr r1, [r6, #4]\n"
169     "mov r7, r0\n"
170     "ldr r0, [r6, #0]\n"
171     // Enter the kernel
172     "swi 0\n"
173 // Restore the frame pointer. Also restore the program counter from
174 // the link register; this makes us return to the caller.
175 #if defined(__thumb__)
176     "2:pop {r7, pc}\n"
177     ".cfi_endproc\n"
178 #else
179     "2:ldmfd sp!, {fp, pc}\n"
180 #endif
181     ".fnend\n"
182     "9:.size SyscallAsm, 9b-SyscallAsm\n"
183 #elif defined(__mips__)
184     ".text\n"
185     ".align 4\n"
186     ".type SyscallAsm, @function\n"
187     "SyscallAsm:.ent SyscallAsm\n"
188     ".frame  $sp, 40, $ra\n"
189     ".set   push\n"
190     ".set   noreorder\n"
191     "addiu  $sp, $sp, -40\n"
192     "sw     $ra, 36($sp)\n"
193     // Check if "v0" is negative. If so, do not attempt to make a
194     // system call. Instead, compute the return address that is visible
195     // to the kernel after we execute "syscall". This address can be
196     // used as a marker that BPF code inspects.
197     "bgez   $v0, 1f\n"
198     " nop\n"
199     "la     $v0, 2f\n"
200     "b      2f\n"
201     " nop\n"
202     // On MIPS first four arguments go to registers a0 - a3 and any
203     // argument after that goes to stack. We can go ahead and directly
204     // copy the entries from the arguments array into the appropriate
205     // CPU registers and on the stack.
206     "1:lw     $a3, 28($a0)\n"
207     "lw     $a2, 24($a0)\n"
208     "lw     $a1, 20($a0)\n"
209     "lw     $t0, 16($a0)\n"
210     "sw     $a3, 28($sp)\n"
211     "sw     $a2, 24($sp)\n"
212     "sw     $a1, 20($sp)\n"
213     "sw     $t0, 16($sp)\n"
214     "lw     $a3, 12($a0)\n"
215     "lw     $a2, 8($a0)\n"
216     "lw     $a1, 4($a0)\n"
217     "lw     $a0, 0($a0)\n"
218     // Enter the kernel
219     "syscall\n"
220     // This is our "magic" return address that the BPF filter sees.
221     // Restore the return address from the stack.
222     "2:lw     $ra, 36($sp)\n"
223     "jr     $ra\n"
224     " addiu  $sp, $sp, 40\n"
225     ".set    pop\n"
226     ".end    SyscallAsm\n"
227     ".size   SyscallAsm,.-SyscallAsm\n"
228 #elif defined(__aarch64__)
229     ".text\n"
230     ".align 2\n"
231     ".type SyscallAsm, %function\n"
232     "SyscallAsm:\n"
233     ".cfi_startproc\n"
234     "cmp x0, #0\n"
235     "b.ge 1f\n"
236     "adr x0,2f\n"
237     "b 2f\n"
238     "1:ldr x5, [x6, #40]\n"
239     "ldr x4, [x6, #32]\n"
240     "ldr x3, [x6, #24]\n"
241     "ldr x2, [x6, #16]\n"
242     "ldr x1, [x6, #8]\n"
243     "mov x8, x0\n"
244     "ldr x0, [x6, #0]\n"
245     // Enter the kernel
246     "svc 0\n"
247     "2:ret\n"
248     ".cfi_endproc\n"
249     ".size SyscallAsm, .-SyscallAsm\n"
250 #endif
251     );  // asm
252 
253 }  // namespace
254 
InvalidCall()255 intptr_t Syscall::InvalidCall() {
256   // Explicitly pass eight zero arguments just in case.
257   return Call(kInvalidSyscallNumber, 0, 0, 0, 0, 0, 0, 0, 0);
258 }
259 
Call(int nr,intptr_t p0,intptr_t p1,intptr_t p2,intptr_t p3,intptr_t p4,intptr_t p5,intptr_t p6,intptr_t p7)260 intptr_t Syscall::Call(int nr,
261                        intptr_t p0,
262                        intptr_t p1,
263                        intptr_t p2,
264                        intptr_t p3,
265                        intptr_t p4,
266                        intptr_t p5,
267                        intptr_t p6,
268                        intptr_t p7) {
269   // We rely on "intptr_t" to be the exact size as a "void *". This is
270   // typically true, but just in case, we add a check. The language
271   // specification allows platforms some leeway in cases, where
272   // "sizeof(void *)" is not the same as "sizeof(void (*)())". We expect
273   // that this would only be an issue for IA64, which we are currently not
274   // planning on supporting. And it is even possible that this would work
275   // on IA64, but for lack of actual hardware, I cannot test.
276   COMPILE_ASSERT(sizeof(void*) == sizeof(intptr_t),
277                  pointer_types_and_intptr_must_be_exactly_the_same_size);
278 
279   // TODO(nedeljko): Enable use of more than six parameters on architectures
280   //                 where that makes sense.
281 #if defined(__mips__)
282   const intptr_t args[8] = {p0, p1, p2, p3, p4, p5, p6, p7};
283 #else
284   DCHECK_EQ(p6, 0) << " Support for syscalls with more than six arguments not "
285                       "added for this architecture";
286   DCHECK_EQ(p7, 0) << " Support for syscalls with more than six arguments not "
287                       "added for this architecture";
288   const intptr_t args[6] = {p0, p1, p2, p3, p4, p5};
289 #endif  // defined(__mips__)
290 
291 // Invoke our file-scope assembly code. The constraints have been picked
292 // carefully to match what the rest of the assembly code expects in input,
293 // output, and clobbered registers.
294 #if defined(__i386__)
295   intptr_t ret = nr;
296   asm volatile(
297       "call SyscallAsm\n"
298       // N.B. These are not the calling conventions normally used by the ABI.
299       : "=a"(ret)
300       : "0"(ret), "D"(args)
301       : "cc", "esp", "memory", "ecx", "edx");
302 #elif defined(__x86_64__)
303   intptr_t ret = nr;
304   {
305     register const intptr_t* data __asm__("r12") = args;
306     asm volatile(
307         "lea  -128(%%rsp), %%rsp\n"  // Avoid red zone.
308         "call SyscallAsm\n"
309         "lea  128(%%rsp), %%rsp\n"
310         // N.B. These are not the calling conventions normally used by the ABI.
311         : "=a"(ret)
312         : "0"(ret), "r"(data)
313         : "cc",
314           "rsp",
315           "memory",
316           "rcx",
317           "rdi",
318           "rsi",
319           "rdx",
320           "r8",
321           "r9",
322           "r10",
323           "r11");
324   }
325 #elif defined(__arm__)
326   intptr_t ret;
327   {
328     register intptr_t inout __asm__("r0") = nr;
329     register const intptr_t* data __asm__("r6") = args;
330     asm volatile(
331         "bl SyscallAsm\n"
332         // N.B. These are not the calling conventions normally used by the ABI.
333         : "=r"(inout)
334         : "0"(inout), "r"(data)
335         : "cc",
336           "lr",
337           "memory",
338           "r1",
339           "r2",
340           "r3",
341           "r4",
342           "r5"
343 #if !defined(__thumb__)
344           // In thumb mode, we cannot use "r7" as a general purpose register, as
345           // it is our frame pointer. We have to manually manage and preserve
346           // it.
347           // In ARM mode, we have a dedicated frame pointer register and "r7" is
348           // thus available as a general purpose register. We don't preserve it,
349           // but instead mark it as clobbered.
350           ,
351           "r7"
352 #endif  // !defined(__thumb__)
353         );
354     ret = inout;
355   }
356 #elif defined(__mips__)
357   int err_status;
358   intptr_t ret = Syscall::SandboxSyscallRaw(nr, args, &err_status);
359 
360   if (err_status) {
361     // On error, MIPS returns errno from syscall instead of -errno.
362     // The purpose of this negation is for SandboxSyscall() to behave
363     // more like it would on other architectures.
364     ret = -ret;
365   }
366 #elif defined(__aarch64__)
367   intptr_t ret;
368   {
369     register intptr_t inout __asm__("x0") = nr;
370     register const intptr_t* data __asm__("x6") = args;
371     asm volatile("bl SyscallAsm\n"
372                  : "=r"(inout)
373                  : "0"(inout), "r"(data)
374                  : "memory", "x1", "x2", "x3", "x4", "x5", "x8", "x30");
375     ret = inout;
376   }
377 
378 #else
379 #error "Unimplemented architecture"
380 #endif
381   return ret;
382 }
383 
PutValueInUcontext(intptr_t ret_val,ucontext_t * ctx)384 void Syscall::PutValueInUcontext(intptr_t ret_val, ucontext_t* ctx) {
385 #if defined(__mips__)
386   // Mips ABI states that on error a3 CPU register has non zero value and if
387   // there is no error, it should be zero.
388   if (ret_val <= -1 && ret_val >= -4095) {
389     // |ret_val| followes the Syscall::Call() convention of being -errno on
390     // errors. In order to write correct value to return register this sign
391     // needs to be changed back.
392     ret_val = -ret_val;
393     SECCOMP_PARM4(ctx) = 1;
394   } else
395     SECCOMP_PARM4(ctx) = 0;
396 #endif
397   SECCOMP_RESULT(ctx) = static_cast<greg_t>(ret_val);
398 }
399 
400 #if defined(__mips__)
SandboxSyscallRaw(int nr,const intptr_t * args,intptr_t * err_ret)401 intptr_t Syscall::SandboxSyscallRaw(int nr,
402                                     const intptr_t* args,
403                                     intptr_t* err_ret) {
404   register intptr_t ret __asm__("v0") = nr;
405   // a3 register becomes non zero on error.
406   register intptr_t err_stat __asm__("a3") = 0;
407   {
408     register const intptr_t* data __asm__("a0") = args;
409     asm volatile(
410         "la $t9, SyscallAsm\n"
411         "jalr $t9\n"
412         " nop\n"
413         : "=r"(ret), "=r"(err_stat)
414         : "0"(ret),
415           "r"(data)
416           // a2 is in the clober list so inline assembly can not change its
417           // value.
418         : "memory", "ra", "t9", "a2");
419   }
420 
421   // Set an error status so it can be used outside of this function
422   *err_ret = err_stat;
423 
424   return ret;
425 }
426 #endif  // defined(__mips__)
427 
428 }  // namespace sandbox
429