1 /*
2 * 32-bit syscall ABI conformance test.
3 *
4 * Copyright (c) 2015 Denys Vlasenko
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15 /*
16 * Can be built statically:
17 * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
18 */
19 #undef _GNU_SOURCE
20 #define _GNU_SOURCE 1
21 #undef __USE_GNU
22 #define __USE_GNU 1
23 #include <unistd.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <signal.h>
28 #include <sys/types.h>
29 #include <sys/select.h>
30 #include <sys/time.h>
31 #include <elf.h>
32 #include <sys/ptrace.h>
33 #include <sys/utsname.h>
34 #include <sys/wait.h>
35
36 #if !defined(__i386__)
main(int argc,char ** argv,char ** envp)37 int main(int argc, char **argv, char **envp)
38 {
39 printf("[SKIP]\tNot a 32-bit x86 userspace\n");
40 return 0;
41 }
42 #else
43
44 long syscall_addr;
get_syscall(char ** envp)45 long get_syscall(char **envp)
46 {
47 Elf32_auxv_t *auxv;
48 while (*envp++ != NULL)
49 continue;
50 for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
51 if (auxv->a_type == AT_SYSINFO)
52 return auxv->a_un.a_val;
53 printf("[WARN]\tAT_SYSINFO not supplied\n");
54 return 0;
55 }
56
57 asm (
58 " .pushsection .text\n"
59 " .global int80\n"
60 "int80:\n"
61 " int $0x80\n"
62 " ret\n"
63 " .popsection\n"
64 );
65 extern char int80;
66
67 struct regs64 {
68 uint64_t rax, rbx, rcx, rdx;
69 uint64_t rsi, rdi, rbp, rsp;
70 uint64_t r8, r9, r10, r11;
71 uint64_t r12, r13, r14, r15;
72 };
73 struct regs64 regs64;
74 int kernel_is_64bit;
75 int clobber_ok;
76
77 asm (
78 " .pushsection .text\n"
79 " .code64\n"
80 "get_regs64:\n"
81 " push %rax\n"
82 " mov $regs64, %eax\n"
83 " pop 0*8(%rax)\n"
84 " movq %rbx, 1*8(%rax)\n"
85 " movq %rcx, 2*8(%rax)\n"
86 " movq %rdx, 3*8(%rax)\n"
87 " movq %rsi, 4*8(%rax)\n"
88 " movq %rdi, 5*8(%rax)\n"
89 " movq %rbp, 6*8(%rax)\n"
90 " movq %rsp, 7*8(%rax)\n"
91 " movq %r8, 8*8(%rax)\n"
92 " movq %r9, 9*8(%rax)\n"
93 " movq %r10, 10*8(%rax)\n"
94 " movq %r11, 11*8(%rax)\n"
95 " movq %r12, 12*8(%rax)\n"
96 " movq %r13, 13*8(%rax)\n"
97 " movq %r14, 14*8(%rax)\n"
98 " movq %r15, 15*8(%rax)\n"
99 " ret\n"
100 "poison_regs64:\n"
101 " movq $0x7f7f7f7f, %r8\n"
102 " shl $32, %r8\n"
103 " orq $0x7f7f7f7f, %r8\n"
104 " movq %r8, %r9\n"
105 " incq %r9\n"
106 " movq %r9, %r10\n"
107 " incq %r10\n"
108 " movq %r10, %r11\n"
109 " incq %r11\n"
110 " movq %r11, %r12\n"
111 " incq %r12\n"
112 " movq %r12, %r13\n"
113 " incq %r13\n"
114 " movq %r13, %r14\n"
115 " incq %r14\n"
116 " movq %r14, %r15\n"
117 " incq %r15\n"
118 " ret\n"
119 " .code32\n"
120 " .popsection\n"
121 );
122 extern void get_regs64(void);
123 extern void poison_regs64(void);
124 extern unsigned long call64_from_32(void (*function)(void));
print_regs64(void)125 void print_regs64(void)
126 {
127 if (!kernel_is_64bit)
128 return;
129 printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
130 printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
131 printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
132 printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
133 }
134
get_kernel_version(int * version,int * patchlevel)135 static void get_kernel_version(int *version, int *patchlevel)
136 {
137 int ret, sublevel;
138 struct utsname utsname;
139
140 ret = uname(&utsname);
141 if (ret) {
142 perror("uname");
143 exit(1);
144 }
145
146 ret = sscanf(utsname.release, "%d.%d.%d", version, patchlevel,
147 &sublevel);
148 if (ret < 0) {
149 perror("sscanf");
150 exit(1);
151 } else if (ret != 3) {
152 printf("Malformed kernel version %s\n", utsname.release);
153 exit(1);
154 }
155 }
156
check_regs64(void)157 int check_regs64(void)
158 {
159 int err = 0;
160 int num = 8;
161 uint64_t *r64 = ®s64.r8;
162 uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
163
164 if (!kernel_is_64bit)
165 return 0;
166
167 do {
168 if (*r64 == expected++)
169 continue; /* register did not change */
170 if (syscall_addr != (long)&int80) {
171 /*
172 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
173 * either clear them to 0, or for R11, load EFLAGS.
174 */
175 if (*r64 == 0)
176 continue;
177 if (num == 11) {
178 printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
179 continue;
180 }
181 } else {
182 /*
183 * INT80 syscall entrypoint can be used by
184 * 64-bit programs too, unlike SYSCALL/SYSENTER.
185 * Therefore it must preserve R12+
186 * (they are callee-saved registers in 64-bit C ABI).
187 *
188 * Starting in Linux 4.17 (and any kernel that
189 * backports the change), R8..11 are preserved.
190 * Historically (and probably unintentionally), they
191 * were clobbered or zeroed.
192 */
193 if (clobber_ok && *r64 == 0 && num <= 11) {
194 printf("Warning: kernel zeroed r%d, "
195 "allowing on < v4.17\n", num);
196 continue;
197 }
198 }
199 printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
200 err++;
201 } while (r64++, ++num < 16);
202
203 if (!err)
204 printf("[OK]\tR8..R15 did not leak kernel data\n");
205 return err;
206 }
207
208 int nfds;
209 fd_set rfds;
210 fd_set wfds;
211 fd_set efds;
212 struct timespec timeout;
213 sigset_t sigmask;
214 struct {
215 sigset_t *sp;
216 int sz;
217 } sigmask_desc;
218
prep_args()219 void prep_args()
220 {
221 nfds = 42;
222 FD_ZERO(&rfds);
223 FD_ZERO(&wfds);
224 FD_ZERO(&efds);
225 FD_SET(0, &rfds);
226 FD_SET(1, &wfds);
227 FD_SET(2, &efds);
228 timeout.tv_sec = 0;
229 timeout.tv_nsec = 123;
230 sigemptyset(&sigmask);
231 sigaddset(&sigmask, SIGINT);
232 sigaddset(&sigmask, SIGUSR2);
233 sigaddset(&sigmask, SIGRTMAX);
234 sigmask_desc.sp = &sigmask;
235 sigmask_desc.sz = 8; /* bytes */
236 }
237
print_flags(const char * name,unsigned long r)238 static void print_flags(const char *name, unsigned long r)
239 {
240 static const char *bitarray[] = {
241 "\n" ,"c\n" ,/* Carry Flag */
242 "0 " ,"1 " ,/* Bit 1 - always on */
243 "" ,"p " ,/* Parity Flag */
244 "0 " ,"3? " ,
245 "" ,"a " ,/* Auxiliary carry Flag */
246 "0 " ,"5? " ,
247 "" ,"z " ,/* Zero Flag */
248 "" ,"s " ,/* Sign Flag */
249 "" ,"t " ,/* Trap Flag */
250 "" ,"i " ,/* Interrupt Flag */
251 "" ,"d " ,/* Direction Flag */
252 "" ,"o " ,/* Overflow Flag */
253 "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
254 "0" ,"1" ,/* I/O Privilege Level (2 bits) */
255 "" ,"n " ,/* Nested Task */
256 "0 " ,"15? ",
257 "" ,"r " ,/* Resume Flag */
258 "" ,"v " ,/* Virtual Mode */
259 "" ,"ac " ,/* Alignment Check/Access Control */
260 "" ,"vif ",/* Virtual Interrupt Flag */
261 "" ,"vip ",/* Virtual Interrupt Pending */
262 "" ,"id " ,/* CPUID detection */
263 NULL
264 };
265 const char **bitstr;
266 int bit;
267
268 printf("%s=%016lx ", name, r);
269 bitstr = bitarray + 42;
270 bit = 21;
271 if ((r >> 22) != 0)
272 printf("(extra bits are set) ");
273 do {
274 if (bitstr[(r >> bit) & 1][0])
275 fputs(bitstr[(r >> bit) & 1], stdout);
276 bitstr -= 2;
277 bit--;
278 } while (bit >= 0);
279 }
280
run_syscall(void)281 int run_syscall(void)
282 {
283 long flags, bad_arg;
284
285 prep_args();
286
287 if (kernel_is_64bit)
288 call64_from_32(poison_regs64);
289 /*print_regs64();*/
290
291 asm("\n"
292 /* Try 6-arg syscall: pselect. It should return quickly */
293 " push %%ebp\n"
294 " mov $308, %%eax\n" /* PSELECT */
295 " mov nfds, %%ebx\n" /* ebx arg1 */
296 " mov $rfds, %%ecx\n" /* ecx arg2 */
297 " mov $wfds, %%edx\n" /* edx arg3 */
298 " mov $efds, %%esi\n" /* esi arg4 */
299 " mov $timeout, %%edi\n" /* edi arg5 */
300 " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
301 " push $0x200ed7\n" /* set almost all flags */
302 " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
303 " call *syscall_addr\n"
304 /* Check that registers are not clobbered */
305 " pushf\n"
306 " pop %%eax\n"
307 " cld\n"
308 " cmp nfds, %%ebx\n" /* ebx arg1 */
309 " mov $1, %%ebx\n"
310 " jne 1f\n"
311 " cmp $rfds, %%ecx\n" /* ecx arg2 */
312 " mov $2, %%ebx\n"
313 " jne 1f\n"
314 " cmp $wfds, %%edx\n" /* edx arg3 */
315 " mov $3, %%ebx\n"
316 " jne 1f\n"
317 " cmp $efds, %%esi\n" /* esi arg4 */
318 " mov $4, %%ebx\n"
319 " jne 1f\n"
320 " cmp $timeout, %%edi\n" /* edi arg5 */
321 " mov $5, %%ebx\n"
322 " jne 1f\n"
323 " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
324 " mov $6, %%ebx\n"
325 " jne 1f\n"
326 " mov $0, %%ebx\n"
327 "1:\n"
328 " pop %%ebp\n"
329 : "=a" (flags), "=b" (bad_arg)
330 :
331 : "cx", "dx", "si", "di"
332 );
333
334 if (kernel_is_64bit) {
335 memset(®s64, 0x77, sizeof(regs64));
336 call64_from_32(get_regs64);
337 /*print_regs64();*/
338 }
339
340 /*
341 * On paravirt kernels, flags are not preserved across syscalls.
342 * Thus, we do not consider it a bug if some are changed.
343 * We just show ones which do.
344 */
345 if ((0x200ed7 ^ flags) != 0) {
346 print_flags("[WARN]\tFlags before", 0x200ed7);
347 print_flags("[WARN]\tFlags after", flags);
348 print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
349 }
350
351 if (bad_arg) {
352 printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
353 return 1;
354 }
355 printf("[OK]\tArguments are preserved across syscall\n");
356
357 return check_regs64();
358 }
359
run_syscall_twice()360 int run_syscall_twice()
361 {
362 int exitcode = 0;
363 long sv;
364
365 if (syscall_addr) {
366 printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
367 exitcode = run_syscall();
368 }
369 sv = syscall_addr;
370 syscall_addr = (long)&int80;
371 printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
372 exitcode += run_syscall();
373 syscall_addr = sv;
374 return exitcode;
375 }
376
ptrace_me()377 void ptrace_me()
378 {
379 pid_t pid;
380
381 fflush(NULL);
382 pid = fork();
383 if (pid < 0)
384 exit(1);
385 if (pid == 0) {
386 /* child */
387 if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
388 exit(0);
389 raise(SIGSTOP);
390 return;
391 }
392 /* parent */
393 printf("[RUN]\tRunning tests under ptrace\n");
394 while (1) {
395 int status;
396 pid = waitpid(-1, &status, __WALL);
397 if (WIFEXITED(status))
398 exit(WEXITSTATUS(status));
399 if (WIFSIGNALED(status))
400 exit(WTERMSIG(status));
401 if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
402 exit(255);
403 /*
404 * Note: we do not inject sig = WSTOPSIG(status).
405 * We probably should, but careful: do not inject SIGTRAP
406 * generated by syscall entry/exit stops.
407 * That kills the child.
408 */
409 ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
410 }
411 }
412
main(int argc,char ** argv,char ** envp)413 int main(int argc, char **argv, char **envp)
414 {
415 int exitcode = 0;
416 int cs;
417 int version, patchlevel;
418
419 asm("\n"
420 " movl %%cs, %%eax\n"
421 : "=a" (cs)
422 );
423 kernel_is_64bit = (cs == 0x23);
424 if (!kernel_is_64bit)
425 printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
426
427 get_kernel_version(&version, &patchlevel);
428 clobber_ok = version < 4 || (version == 4 && patchlevel < 17);
429
430 /* This only works for non-static builds:
431 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
432 */
433 syscall_addr = get_syscall(envp);
434
435 exitcode += run_syscall_twice();
436 ptrace_me();
437 exitcode += run_syscall_twice();
438
439 return exitcode;
440 }
441 #endif
442