1
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls. syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2011 Julian Seward
11 jseward@acm.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_libcbase.h"
40 #include "pub_core_libcassert.h"
41 #include "pub_core_libcprint.h"
42 #include "pub_core_libcproc.h" // For VG_(getpid)()
43 #include "pub_core_libcsignal.h"
44 #include "pub_core_scheduler.h" // For VG_({acquire,release}_BigLock),
45 // and VG_(vg_yield)
46 #include "pub_core_stacktrace.h" // For VG_(get_and_pp_StackTrace)()
47 #include "pub_core_tooliface.h"
48 #include "pub_core_options.h"
49 #include "pub_core_signals.h" // For VG_SIGVGKILL, VG_(poll_signals)
50 #include "pub_core_syscall.h"
51 #include "pub_core_machine.h"
52 #include "pub_core_syswrap.h"
53
54 #include "priv_types_n_macros.h"
55 #include "priv_syswrap-main.h"
56
57 #if defined(VGO_darwin)
58 #include "priv_syswrap-darwin.h"
59 #endif
60
61 /* Useful info which needs to be recorded somewhere:
62 Use of registers in syscalls is:
63
64 NUM ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65 LINUX:
66 x86 eax ebx ecx edx esi edi ebp n/a n/a eax (== NUM)
67 amd64 rax rdi rsi rdx r10 r8 r9 n/a n/a rax (== NUM)
68 ppc32 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
69 ppc64 r0 r3 r4 r5 r6 r7 r8 n/a n/a r3+CR0.SO (== ARG1)
70 arm r7 r0 r1 r2 r3 r4 r5 n/a n/a r0 (== ARG1)
71
72 On s390x the svc instruction is used for system calls. The system call
73 number is encoded in the instruction (8 bit immediate field). Since Linux
74 2.6 it is also allowed to use svc 0 with the system call number in r1.
75 This was introduced for system calls >255, but works for all. It is
76 also possible to see the svc 0 together with an EXecute instruction, that
77 fills in the immediate field.
78 s390x r1/SVC r2 r3 r4 r5 r6 r7 n/a n/a r2 (== ARG1)
79
80 DARWIN:
81 x86 eax +4 +8 +12 +16 +20 +24 +28 +32 edx:eax, eflags.c
82 amd64 rax rdi rsi rdx rcx r8 r9 +8 +16 rdx:rax, rflags.c
83
84 For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
85 amd64-darwin. Apparently 0(%esp) is some kind of return address
86 (perhaps for syscalls done with "sysenter"?) I don't think it is
87 relevant for syscalls done with "int $0x80/1/2".
88 */
89
90 /* This is the top level of the system-call handler module. All
91 system calls are channelled through here, doing two things:
92
93 * notify the tool of the events (mem/reg reads, writes) happening
94
95 * perform the syscall, usually by passing it along to the kernel
96 unmodified.
97
98 A magical piece of assembly code, do_syscall_for_client_WRK, in
99 syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
100 kernel, whilst having the simulator retain control.
101 */
102
103 /* The main function is VG_(client_syscall). The simulation calls it
104 whenever a client thread wants to do a syscall. The following is a
105 sketch of what it does.
106
107 * Ensures the root thread's stack is suitably mapped. Tedious and
108 arcane. See big big comment in VG_(client_syscall).
109
110 * First, it rounds up the syscall number and args (which is a
111 platform dependent activity) and puts them in a struct ("args")
112 and also a copy in "orig_args".
113
114 The pre/post wrappers refer to these structs and so no longer
115 need magic macros to access any specific registers. This struct
116 is stored in thread-specific storage.
117
118
119 * The pre-wrapper is called, passing it a pointer to struct
120 "args".
121
122
123 * The pre-wrapper examines the args and pokes the tool
124 appropriately. It may modify the args; this is why "orig_args"
125 is also stored.
126
127 The pre-wrapper may choose to 'do' the syscall itself, and
128 concludes one of three outcomes:
129
130 Success(N) -- syscall is already complete, with success;
131 result is N
132
133 Fail(N) -- syscall is already complete, with failure;
134 error code is N
135
136 HandToKernel -- (the usual case): this needs to be given to
137 the kernel to be done, using the values in
138 the possibly-modified "args" struct.
139
140 In addition, the pre-wrapper may set some flags:
141
142 MayBlock -- only applicable when outcome==HandToKernel
143
144 PostOnFail -- only applicable when outcome==HandToKernel or Fail
145
146
147 * If the pre-outcome is HandToKernel, the syscall is duly handed
148 off to the kernel (perhaps involving some thread switchery, but
149 that's not important). This reduces the possible set of outcomes
150 to either Success(N) or Fail(N).
151
152
153 * The outcome (Success(N) or Fail(N)) is written back to the guest
154 register(s). This is platform specific:
155
156 x86: Success(N) ==> eax = N
157 Fail(N) ==> eax = -N
158
159 ditto amd64
160
161 ppc32: Success(N) ==> r3 = N, CR0.SO = 0
162 Fail(N) ==> r3 = N, CR0.SO = 1
163
164 Darwin:
165 x86: Success(N) ==> edx:eax = N, cc = 0
166 Fail(N) ==> edx:eax = N, cc = 1
167
168 s390x: Success(N) ==> r2 = N
169 Fail(N) ==> r2 = -N
170
171 * The post wrapper is called if:
172
173 - it exists, and
174 - outcome==Success or (outcome==Fail and PostOnFail is set)
175
176 The post wrapper is passed the adulterated syscall args (struct
177 "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
178
179 There are several other complications, primarily to do with
180 syscalls getting interrupted, explained in comments in the code.
181 */
182
183 /* CAVEATS for writing wrappers. It is important to follow these!
184
185 The macros defined in priv_types_n_macros.h are designed to help
186 decouple the wrapper logic from the actual representation of
187 syscall args/results, since these wrappers are designed to work on
188 multiple platforms.
189
190 Sometimes a PRE wrapper will complete the syscall itself, without
191 handing it to the kernel. It will use one of SET_STATUS_Success,
192 SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
193 value. It is critical to appreciate that use of the macro does not
194 immediately cause the underlying guest state to be updated -- that
195 is done by the driver logic in this file, when the wrapper returns.
196
197 As a result, PRE wrappers of the following form will malfunction:
198
199 PRE(fooble)
200 {
201 ... do stuff ...
202 SET_STATUS_Somehow(...)
203
204 // do something that assumes guest state is up to date
205 }
206
207 In particular, direct or indirect calls to VG_(poll_signals) after
208 setting STATUS can cause the guest state to be read (in order to
209 build signal frames). Do not do this. If you want a signal poll
210 after the syscall goes through, do "*flags |= SfPollAfter" and the
211 driver logic will do it for you.
212
213 -----------
214
215 Another critical requirement following introduction of new address
216 space manager (JRS, 20050923):
217
218 In a situation where the mappedness of memory has changed, aspacem
219 should be notified BEFORE the tool. Hence the following is
220 correct:
221
222 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
223 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
224 if (d)
225 VG_(discard_translations)(s->start, s->end+1 - s->start);
226
227 whilst this is wrong:
228
229 VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
230 Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
231 if (d)
232 VG_(discard_translations)(s->start, s->end+1 - s->start);
233
234 The reason is that the tool may itself ask aspacem for more shadow
235 memory as a result of the VG_TRACK call. In such a situation it is
236 critical that aspacem's segment array is up to date -- hence the
237 need to notify aspacem first.
238
239 -----------
240
241 Also .. take care to call VG_(discard_translations) whenever
242 memory with execute permissions is unmapped.
243 */
244
245
246 /* ---------------------------------------------------------------------
247 Do potentially blocking syscall for the client, and mess with
248 signal masks at the same time.
249 ------------------------------------------------------------------ */
250
251 /* Perform a syscall on behalf of a client thread, using a specific
252 signal mask. On completion, the signal mask is set to restore_mask
253 (which presumably blocks almost everything). If a signal happens
254 during the syscall, the handler should call
255 VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
256 thread's context to do the right thing.
257
258 The _WRK function is handwritten assembly, implemented per-platform
259 in coregrind/m_syswrap/syscall-$PLAT.S. It has some very magic
260 properties. See comments at the top of
261 VG_(fixup_guest_state_after_syscall_interrupted) below for details.
262
263 This function (these functions) are required to return zero in case
264 of success (even if the syscall itself failed), and nonzero if the
265 sigprocmask-swizzling calls failed. We don't actually care about
266 the failure values from sigprocmask, although most of the assembly
267 implementations do attempt to return that, using the convention
268 0 for success, or 0x8000 | error-code for failure.
269 */
270 #if defined(VGO_linux)
271 extern
272 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
273 void* guest_state,
274 const vki_sigset_t *syscall_mask,
275 const vki_sigset_t *restore_mask,
276 Word sigsetSzB );
277 #elif defined(VGO_darwin)
278 extern
279 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
280 void* guest_state,
281 const vki_sigset_t *syscall_mask,
282 const vki_sigset_t *restore_mask,
283 Word sigsetSzB ); /* unused */
284 extern
285 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
286 void* guest_state,
287 const vki_sigset_t *syscall_mask,
288 const vki_sigset_t *restore_mask,
289 Word sigsetSzB ); /* unused */
290 extern
291 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
292 void* guest_state,
293 const vki_sigset_t *syscall_mask,
294 const vki_sigset_t *restore_mask,
295 Word sigsetSzB ); /* unused */
296 #else
297 # error "Unknown OS"
298 #endif
299
300
301 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)302 void do_syscall_for_client ( Int syscallno,
303 ThreadState* tst,
304 const vki_sigset_t* syscall_mask )
305 {
306 vki_sigset_t saved;
307 UWord err;
308 # if defined(VGO_linux)
309 err = ML_(do_syscall_for_client_WRK)(
310 syscallno, &tst->arch.vex,
311 syscall_mask, &saved, sizeof(vki_sigset_t)
312 );
313 # elif defined(VGO_darwin)
314 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
315 case VG_DARWIN_SYSCALL_CLASS_UNIX:
316 err = ML_(do_syscall_for_client_unix_WRK)(
317 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
318 syscall_mask, &saved, 0/*unused:sigsetSzB*/
319 );
320 break;
321 case VG_DARWIN_SYSCALL_CLASS_MACH:
322 err = ML_(do_syscall_for_client_mach_WRK)(
323 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
324 syscall_mask, &saved, 0/*unused:sigsetSzB*/
325 );
326 break;
327 case VG_DARWIN_SYSCALL_CLASS_MDEP:
328 err = ML_(do_syscall_for_client_mdep_WRK)(
329 VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
330 syscall_mask, &saved, 0/*unused:sigsetSzB*/
331 );
332 break;
333 default:
334 vg_assert(0);
335 /*NOTREACHED*/
336 break;
337 }
338 # else
339 # error "Unknown OS"
340 # endif
341 vg_assert2(
342 err == 0,
343 "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
344 (Int)(err & 0xFFF)
345 );
346 }
347
348
349 /* ---------------------------------------------------------------------
350 Impedance matchers and misc helpers
351 ------------------------------------------------------------------ */
352
353 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)354 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
355 {
356 return a1->sysno == a2->sysno
357 && a1->arg1 == a2->arg1
358 && a1->arg2 == a2->arg2
359 && a1->arg3 == a2->arg3
360 && a1->arg4 == a2->arg4
361 && a1->arg5 == a2->arg5
362 && a1->arg6 == a2->arg6
363 && a1->arg7 == a2->arg7
364 && a1->arg8 == a2->arg8;
365 }
366
367 static
eq_SyscallStatus(SyscallStatus * s1,SyscallStatus * s2)368 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
369 {
370 /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
371 if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
372 return True;
373 # if defined(VGO_darwin)
374 /* Darwin-specific debugging guff */
375 vg_assert(s1->what == s2->what);
376 VG_(printf)("eq_SyscallStatus:\n");
377 VG_(printf)(" {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
378 VG_(printf)(" {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
379 vg_assert(0);
380 # endif
381 return False;
382 }
383
384 /* Convert between SysRes and SyscallStatus, to the extent possible. */
385
386 static
convert_SysRes_to_SyscallStatus(SysRes res)387 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
388 {
389 SyscallStatus status;
390 status.what = SsComplete;
391 status.sres = res;
392 return status;
393 }
394
395
396 /* Impedance matchers. These convert syscall arg or result data from
397 the platform-specific in-guest-state format to the canonical
398 formats, and back. */
399
400 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)401 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs* canonical,
402 /*IN*/ VexGuestArchState* gst_vanilla,
403 /*IN*/ UInt trc )
404 {
405 #if defined(VGP_x86_linux)
406 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
407 canonical->sysno = gst->guest_EAX;
408 canonical->arg1 = gst->guest_EBX;
409 canonical->arg2 = gst->guest_ECX;
410 canonical->arg3 = gst->guest_EDX;
411 canonical->arg4 = gst->guest_ESI;
412 canonical->arg5 = gst->guest_EDI;
413 canonical->arg6 = gst->guest_EBP;
414 canonical->arg7 = 0;
415 canonical->arg8 = 0;
416
417 #elif defined(VGP_amd64_linux)
418 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
419 canonical->sysno = gst->guest_RAX;
420 canonical->arg1 = gst->guest_RDI;
421 canonical->arg2 = gst->guest_RSI;
422 canonical->arg3 = gst->guest_RDX;
423 canonical->arg4 = gst->guest_R10;
424 canonical->arg5 = gst->guest_R8;
425 canonical->arg6 = gst->guest_R9;
426 canonical->arg7 = 0;
427 canonical->arg8 = 0;
428
429 #elif defined(VGP_ppc32_linux)
430 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
431 canonical->sysno = gst->guest_GPR0;
432 canonical->arg1 = gst->guest_GPR3;
433 canonical->arg2 = gst->guest_GPR4;
434 canonical->arg3 = gst->guest_GPR5;
435 canonical->arg4 = gst->guest_GPR6;
436 canonical->arg5 = gst->guest_GPR7;
437 canonical->arg6 = gst->guest_GPR8;
438 canonical->arg7 = 0;
439 canonical->arg8 = 0;
440
441 #elif defined(VGP_ppc64_linux)
442 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
443 canonical->sysno = gst->guest_GPR0;
444 canonical->arg1 = gst->guest_GPR3;
445 canonical->arg2 = gst->guest_GPR4;
446 canonical->arg3 = gst->guest_GPR5;
447 canonical->arg4 = gst->guest_GPR6;
448 canonical->arg5 = gst->guest_GPR7;
449 canonical->arg6 = gst->guest_GPR8;
450 canonical->arg7 = 0;
451 canonical->arg8 = 0;
452
453 #elif defined(VGP_arm_linux)
454 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
455 canonical->sysno = gst->guest_R7;
456 canonical->arg1 = gst->guest_R0;
457 canonical->arg2 = gst->guest_R1;
458 canonical->arg3 = gst->guest_R2;
459 canonical->arg4 = gst->guest_R3;
460 canonical->arg5 = gst->guest_R4;
461 canonical->arg6 = gst->guest_R5;
462 canonical->arg7 = 0;
463 canonical->arg8 = 0;
464
465 #elif defined(VGP_x86_darwin)
466 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
467 UWord *stack = (UWord *)gst->guest_ESP;
468 // GrP fixme hope syscalls aren't called with really shallow stacks...
469 canonical->sysno = gst->guest_EAX;
470 if (canonical->sysno != 0) {
471 // stack[0] is return address
472 canonical->arg1 = stack[1];
473 canonical->arg2 = stack[2];
474 canonical->arg3 = stack[3];
475 canonical->arg4 = stack[4];
476 canonical->arg5 = stack[5];
477 canonical->arg6 = stack[6];
478 canonical->arg7 = stack[7];
479 canonical->arg8 = stack[8];
480 } else {
481 // GrP fixme hack handle syscall()
482 // GrP fixme what about __syscall() ?
483 // stack[0] is return address
484 // DDD: the tool can't see that the params have been shifted! Can
485 // lead to incorrect checking, I think, because the PRRAn/PSARn
486 // macros will mention the pre-shifted args.
487 canonical->sysno = stack[1];
488 vg_assert(canonical->sysno != 0);
489 canonical->arg1 = stack[2];
490 canonical->arg2 = stack[3];
491 canonical->arg3 = stack[4];
492 canonical->arg4 = stack[5];
493 canonical->arg5 = stack[6];
494 canonical->arg6 = stack[7];
495 canonical->arg7 = stack[8];
496 canonical->arg8 = stack[9];
497
498 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
499 VG_(getpid)(), /*tid,*/
500 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
501 }
502
503 // Here we determine what kind of syscall it was by looking at the
504 // interrupt kind, and then encode the syscall number using the 64-bit
505 // encoding for Valgrind's internal use.
506 //
507 // DDD: Would it be better to stash the JMP kind into the Darwin
508 // thread state rather than passing in the trc?
509 switch (trc) {
510 case VEX_TRC_JMP_SYS_INT128:
511 // int $0x80 = Unix, 64-bit result
512 vg_assert(canonical->sysno >= 0);
513 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
514 break;
515 case VEX_TRC_JMP_SYS_SYSENTER:
516 // syscall = Unix, 32-bit result
517 // OR Mach, 32-bit result
518 if (canonical->sysno >= 0) {
519 // GrP fixme hack: 0xffff == I386_SYSCALL_NUMBER_MASK
520 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
521 & 0xffff);
522 } else {
523 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
524 }
525 break;
526 case VEX_TRC_JMP_SYS_INT129:
527 // int $0x81 = Mach, 32-bit result
528 vg_assert(canonical->sysno < 0);
529 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
530 break;
531 case VEX_TRC_JMP_SYS_INT130:
532 // int $0x82 = mdep, 32-bit result
533 vg_assert(canonical->sysno >= 0);
534 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
535 break;
536 default:
537 vg_assert(0);
538 break;
539 }
540
541 #elif defined(VGP_amd64_darwin)
542 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
543 UWord *stack = (UWord *)gst->guest_RSP;
544
545 vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
546
547 // GrP fixme hope syscalls aren't called with really shallow stacks...
548 canonical->sysno = gst->guest_RAX;
549 if (canonical->sysno != __NR_syscall) {
550 // stack[0] is return address
551 canonical->arg1 = gst->guest_RDI;
552 canonical->arg2 = gst->guest_RSI;
553 canonical->arg3 = gst->guest_RDX;
554 canonical->arg4 = gst->guest_R10; // not rcx with syscall insn
555 canonical->arg5 = gst->guest_R8;
556 canonical->arg6 = gst->guest_R9;
557 canonical->arg7 = stack[1];
558 canonical->arg8 = stack[2];
559 } else {
560 // GrP fixme hack handle syscall()
561 // GrP fixme what about __syscall() ?
562 // stack[0] is return address
563 // DDD: the tool can't see that the params have been shifted! Can
564 // lead to incorrect checking, I think, because the PRRAn/PSARn
565 // macros will mention the pre-shifted args.
566 canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
567 vg_assert(canonical->sysno != __NR_syscall);
568 canonical->arg1 = gst->guest_RSI;
569 canonical->arg2 = gst->guest_RDX;
570 canonical->arg3 = gst->guest_R10; // not rcx with syscall insn
571 canonical->arg4 = gst->guest_R8;
572 canonical->arg5 = gst->guest_R9;
573 canonical->arg6 = stack[1];
574 canonical->arg7 = stack[2];
575 canonical->arg8 = stack[3];
576
577 PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
578 VG_(getpid)(), /*tid,*/
579 VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
580 }
581
582 // no canonical->sysno adjustment needed
583
584 #elif defined(VGP_s390x_linux)
585 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
586 canonical->sysno = gst->guest_SYSNO;
587 canonical->arg1 = gst->guest_r2;
588 canonical->arg2 = gst->guest_r3;
589 canonical->arg3 = gst->guest_r4;
590 canonical->arg4 = gst->guest_r5;
591 canonical->arg5 = gst->guest_r6;
592 canonical->arg6 = gst->guest_r7;
593 canonical->arg7 = 0;
594 canonical->arg8 = 0;
595 #else
596 # error "getSyscallArgsFromGuestState: unknown arch"
597 #endif
598 }
599
600 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)601 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs* canonical,
602 /*OUT*/VexGuestArchState* gst_vanilla )
603 {
604 #if defined(VGP_x86_linux)
605 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
606 gst->guest_EAX = canonical->sysno;
607 gst->guest_EBX = canonical->arg1;
608 gst->guest_ECX = canonical->arg2;
609 gst->guest_EDX = canonical->arg3;
610 gst->guest_ESI = canonical->arg4;
611 gst->guest_EDI = canonical->arg5;
612 gst->guest_EBP = canonical->arg6;
613
614 #elif defined(VGP_amd64_linux)
615 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
616 gst->guest_RAX = canonical->sysno;
617 gst->guest_RDI = canonical->arg1;
618 gst->guest_RSI = canonical->arg2;
619 gst->guest_RDX = canonical->arg3;
620 gst->guest_R10 = canonical->arg4;
621 gst->guest_R8 = canonical->arg5;
622 gst->guest_R9 = canonical->arg6;
623
624 #elif defined(VGP_ppc32_linux)
625 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
626 gst->guest_GPR0 = canonical->sysno;
627 gst->guest_GPR3 = canonical->arg1;
628 gst->guest_GPR4 = canonical->arg2;
629 gst->guest_GPR5 = canonical->arg3;
630 gst->guest_GPR6 = canonical->arg4;
631 gst->guest_GPR7 = canonical->arg5;
632 gst->guest_GPR8 = canonical->arg6;
633
634 #elif defined(VGP_ppc64_linux)
635 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
636 gst->guest_GPR0 = canonical->sysno;
637 gst->guest_GPR3 = canonical->arg1;
638 gst->guest_GPR4 = canonical->arg2;
639 gst->guest_GPR5 = canonical->arg3;
640 gst->guest_GPR6 = canonical->arg4;
641 gst->guest_GPR7 = canonical->arg5;
642 gst->guest_GPR8 = canonical->arg6;
643
644 #elif defined(VGP_arm_linux)
645 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
646 gst->guest_R7 = canonical->sysno;
647 gst->guest_R0 = canonical->arg1;
648 gst->guest_R1 = canonical->arg2;
649 gst->guest_R2 = canonical->arg3;
650 gst->guest_R3 = canonical->arg4;
651 gst->guest_R4 = canonical->arg5;
652 gst->guest_R5 = canonical->arg6;
653
654 #elif defined(VGP_x86_darwin)
655 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
656 UWord *stack = (UWord *)gst->guest_ESP;
657
658 gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
659
660 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
661 // stack[0] is return address
662 stack[1] = canonical->arg1;
663 stack[2] = canonical->arg2;
664 stack[3] = canonical->arg3;
665 stack[4] = canonical->arg4;
666 stack[5] = canonical->arg5;
667 stack[6] = canonical->arg6;
668 stack[7] = canonical->arg7;
669 stack[8] = canonical->arg8;
670
671 #elif defined(VGP_amd64_darwin)
672 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
673 UWord *stack = (UWord *)gst->guest_RSP;
674
675 gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
676 // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
677
678 // stack[0] is return address
679 gst->guest_RDI = canonical->arg1;
680 gst->guest_RSI = canonical->arg2;
681 gst->guest_RDX = canonical->arg3;
682 gst->guest_RCX = canonical->arg4;
683 gst->guest_R8 = canonical->arg5;
684 gst->guest_R9 = canonical->arg6;
685 stack[1] = canonical->arg7;
686 stack[2] = canonical->arg8;
687
688 #elif defined(VGP_s390x_linux)
689 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
690 gst->guest_SYSNO = canonical->sysno;
691 gst->guest_r2 = canonical->arg1;
692 gst->guest_r3 = canonical->arg2;
693 gst->guest_r4 = canonical->arg3;
694 gst->guest_r5 = canonical->arg4;
695 gst->guest_r6 = canonical->arg5;
696 gst->guest_r7 = canonical->arg6;
697
698 #else
699 # error "putSyscallArgsIntoGuestState: unknown arch"
700 #endif
701 }
702
703 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)704 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus* canonical,
705 /*IN*/ VexGuestArchState* gst_vanilla )
706 {
707 # if defined(VGP_x86_linux)
708 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
709 canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
710 canonical->what = SsComplete;
711
712 # elif defined(VGP_amd64_linux)
713 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
714 canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
715 canonical->what = SsComplete;
716
717 # elif defined(VGP_ppc32_linux)
718 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
719 UInt cr = LibVEX_GuestPPC32_get_CR( gst );
720 UInt cr0so = (cr >> 28) & 1;
721 canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
722 canonical->what = SsComplete;
723
724 # elif defined(VGP_ppc64_linux)
725 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
726 UInt cr = LibVEX_GuestPPC64_get_CR( gst );
727 UInt cr0so = (cr >> 28) & 1;
728 canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
729 canonical->what = SsComplete;
730
731 # elif defined(VGP_arm_linux)
732 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
733 canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
734 canonical->what = SsComplete;
735
736 # elif defined(VGP_x86_darwin)
737 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
738 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
739 UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
740 UInt err = 0;
741 UInt wLO = 0;
742 UInt wHI = 0;
743 switch (gst->guest_SC_CLASS) {
744 case VG_DARWIN_SYSCALL_CLASS_UNIX:
745 // int $0x80 = Unix, 64-bit result
746 err = carry;
747 wLO = gst->guest_EAX;
748 wHI = gst->guest_EDX;
749 break;
750 case VG_DARWIN_SYSCALL_CLASS_MACH:
751 // int $0x81 = Mach, 32-bit result
752 wLO = gst->guest_EAX;
753 break;
754 case VG_DARWIN_SYSCALL_CLASS_MDEP:
755 // int $0x82 = mdep, 32-bit result
756 wLO = gst->guest_EAX;
757 break;
758 default:
759 vg_assert(0);
760 break;
761 }
762 canonical->sres = VG_(mk_SysRes_x86_darwin)(
763 gst->guest_SC_CLASS, err ? True : False,
764 wHI, wLO
765 );
766 canonical->what = SsComplete;
767
768 # elif defined(VGP_amd64_darwin)
769 /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
770 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
771 ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
772 ULong err = 0;
773 ULong wLO = 0;
774 ULong wHI = 0;
775 switch (gst->guest_SC_CLASS) {
776 case VG_DARWIN_SYSCALL_CLASS_UNIX:
777 // syscall = Unix, 128-bit result
778 err = carry;
779 wLO = gst->guest_RAX;
780 wHI = gst->guest_RDX;
781 break;
782 case VG_DARWIN_SYSCALL_CLASS_MACH:
783 // syscall = Mach, 64-bit result
784 wLO = gst->guest_RAX;
785 break;
786 case VG_DARWIN_SYSCALL_CLASS_MDEP:
787 // syscall = mdep, 64-bit result
788 wLO = gst->guest_RAX;
789 break;
790 default:
791 vg_assert(0);
792 break;
793 }
794 canonical->sres = VG_(mk_SysRes_amd64_darwin)(
795 gst->guest_SC_CLASS, err ? True : False,
796 wHI, wLO
797 );
798 canonical->what = SsComplete;
799
800 # elif defined(VGP_s390x_linux)
801 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
802 canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
803 canonical->what = SsComplete;
804
805 # else
806 # error "getSyscallStatusFromGuestState: unknown arch"
807 # endif
808 }
809
810 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)811 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
812 /*IN*/ SyscallStatus* canonical,
813 /*OUT*/VexGuestArchState* gst_vanilla )
814 {
815 # if defined(VGP_x86_linux)
816 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
817 vg_assert(canonical->what == SsComplete);
818 if (sr_isError(canonical->sres)) {
819 /* This isn't exactly right, in that really a Failure with res
820 not in the range 1 .. 4095 is unrepresentable in the
821 Linux-x86 scheme. Oh well. */
822 gst->guest_EAX = - (Int)sr_Err(canonical->sres);
823 } else {
824 gst->guest_EAX = sr_Res(canonical->sres);
825 }
826 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
827 OFFSET_x86_EAX, sizeof(UWord) );
828
829 # elif defined(VGP_amd64_linux)
830 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
831 vg_assert(canonical->what == SsComplete);
832 if (sr_isError(canonical->sres)) {
833 /* This isn't exactly right, in that really a Failure with res
834 not in the range 1 .. 4095 is unrepresentable in the
835 Linux-amd64 scheme. Oh well. */
836 gst->guest_RAX = - (Long)sr_Err(canonical->sres);
837 } else {
838 gst->guest_RAX = sr_Res(canonical->sres);
839 }
840 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
841 OFFSET_amd64_RAX, sizeof(UWord) );
842
843 # elif defined(VGP_ppc32_linux)
844 VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
845 UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
846 vg_assert(canonical->what == SsComplete);
847 if (sr_isError(canonical->sres)) {
848 /* set CR0.SO */
849 LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
850 gst->guest_GPR3 = sr_Err(canonical->sres);
851 } else {
852 /* clear CR0.SO */
853 LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
854 gst->guest_GPR3 = sr_Res(canonical->sres);
855 }
856 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
857 OFFSET_ppc32_GPR3, sizeof(UWord) );
858 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
859 OFFSET_ppc32_CR0_0, sizeof(UChar) );
860
861 # elif defined(VGP_ppc64_linux)
862 VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
863 UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
864 vg_assert(canonical->what == SsComplete);
865 if (sr_isError(canonical->sres)) {
866 /* set CR0.SO */
867 LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
868 gst->guest_GPR3 = sr_Err(canonical->sres);
869 } else {
870 /* clear CR0.SO */
871 LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
872 gst->guest_GPR3 = sr_Res(canonical->sres);
873 }
874 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
875 OFFSET_ppc64_GPR3, sizeof(UWord) );
876 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
877 OFFSET_ppc64_CR0_0, sizeof(UChar) );
878
879 # elif defined(VGP_arm_linux)
880 VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
881 vg_assert(canonical->what == SsComplete);
882 if (sr_isError(canonical->sres)) {
883 /* This isn't exactly right, in that really a Failure with res
884 not in the range 1 .. 4095 is unrepresentable in the
885 Linux-arm scheme. Oh well. */
886 gst->guest_R0 = - (Int)sr_Err(canonical->sres);
887 } else {
888 gst->guest_R0 = sr_Res(canonical->sres);
889 }
890 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
891 OFFSET_arm_R0, sizeof(UWord) );
892
893 #elif defined(VGP_x86_darwin)
894 VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
895 SysRes sres = canonical->sres;
896 vg_assert(canonical->what == SsComplete);
897 /* Unfortunately here we have to break abstraction and look
898 directly inside 'res', in order to decide what to do. */
899 switch (sres._mode) {
900 case SysRes_MACH: // int $0x81 = Mach, 32-bit result
901 case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
902 gst->guest_EAX = sres._wLO;
903 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
904 OFFSET_x86_EAX, sizeof(UInt) );
905 break;
906 case SysRes_UNIX_OK: // int $0x80 = Unix, 64-bit result
907 case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
908 gst->guest_EAX = sres._wLO;
909 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
910 OFFSET_x86_EAX, sizeof(UInt) );
911 gst->guest_EDX = sres._wHI;
912 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
913 OFFSET_x86_EDX, sizeof(UInt) );
914 LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
915 gst );
916 // GrP fixme sets defined for entire eflags, not just bit c
917 // DDD: this breaks exp-ptrcheck.
918 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
919 offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
920 break;
921 default:
922 vg_assert(0);
923 break;
924 }
925
926 #elif defined(VGP_amd64_darwin)
927 VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
928 SysRes sres = canonical->sres;
929 vg_assert(canonical->what == SsComplete);
930 /* Unfortunately here we have to break abstraction and look
931 directly inside 'res', in order to decide what to do. */
932 switch (sres._mode) {
933 case SysRes_MACH: // syscall = Mach, 64-bit result
934 case SysRes_MDEP: // syscall = mdep, 64-bit result
935 gst->guest_RAX = sres._wLO;
936 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
937 OFFSET_amd64_RAX, sizeof(ULong) );
938 break;
939 case SysRes_UNIX_OK: // syscall = Unix, 128-bit result
940 case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
941 gst->guest_RAX = sres._wLO;
942 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
943 OFFSET_amd64_RAX, sizeof(ULong) );
944 gst->guest_RDX = sres._wHI;
945 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
946 OFFSET_amd64_RDX, sizeof(ULong) );
947 LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
948 gst );
949 // GrP fixme sets defined for entire rflags, not just bit c
950 // DDD: this breaks exp-ptrcheck.
951 VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
952 offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
953 break;
954 default:
955 vg_assert(0);
956 break;
957 }
958
959 # elif defined(VGP_s390x_linux)
960 VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
961 vg_assert(canonical->what == SsComplete);
962 if (sr_isError(canonical->sres)) {
963 gst->guest_r2 = - (Long)sr_Err(canonical->sres);
964 } else {
965 gst->guest_r2 = sr_Res(canonical->sres);
966 }
967
968 # else
969 # error "putSyscallStatusIntoGuestState: unknown arch"
970 # endif
971 }
972
973
974 /* Tell me the offsets in the guest state of the syscall params, so
975 that the scalar argument checkers don't have to have this info
976 hardwired. */
977
978 static
getSyscallArgLayout(SyscallArgLayout * layout)979 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
980 {
981 #if defined(VGP_x86_linux)
982 layout->o_sysno = OFFSET_x86_EAX;
983 layout->o_arg1 = OFFSET_x86_EBX;
984 layout->o_arg2 = OFFSET_x86_ECX;
985 layout->o_arg3 = OFFSET_x86_EDX;
986 layout->o_arg4 = OFFSET_x86_ESI;
987 layout->o_arg5 = OFFSET_x86_EDI;
988 layout->o_arg6 = OFFSET_x86_EBP;
989 layout->uu_arg7 = -1; /* impossible value */
990 layout->uu_arg8 = -1; /* impossible value */
991
992 #elif defined(VGP_amd64_linux)
993 layout->o_sysno = OFFSET_amd64_RAX;
994 layout->o_arg1 = OFFSET_amd64_RDI;
995 layout->o_arg2 = OFFSET_amd64_RSI;
996 layout->o_arg3 = OFFSET_amd64_RDX;
997 layout->o_arg4 = OFFSET_amd64_R10;
998 layout->o_arg5 = OFFSET_amd64_R8;
999 layout->o_arg6 = OFFSET_amd64_R9;
1000 layout->uu_arg7 = -1; /* impossible value */
1001 layout->uu_arg8 = -1; /* impossible value */
1002
1003 #elif defined(VGP_ppc32_linux)
1004 layout->o_sysno = OFFSET_ppc32_GPR0;
1005 layout->o_arg1 = OFFSET_ppc32_GPR3;
1006 layout->o_arg2 = OFFSET_ppc32_GPR4;
1007 layout->o_arg3 = OFFSET_ppc32_GPR5;
1008 layout->o_arg4 = OFFSET_ppc32_GPR6;
1009 layout->o_arg5 = OFFSET_ppc32_GPR7;
1010 layout->o_arg6 = OFFSET_ppc32_GPR8;
1011 layout->uu_arg7 = -1; /* impossible value */
1012 layout->uu_arg8 = -1; /* impossible value */
1013
1014 #elif defined(VGP_ppc64_linux)
1015 layout->o_sysno = OFFSET_ppc64_GPR0;
1016 layout->o_arg1 = OFFSET_ppc64_GPR3;
1017 layout->o_arg2 = OFFSET_ppc64_GPR4;
1018 layout->o_arg3 = OFFSET_ppc64_GPR5;
1019 layout->o_arg4 = OFFSET_ppc64_GPR6;
1020 layout->o_arg5 = OFFSET_ppc64_GPR7;
1021 layout->o_arg6 = OFFSET_ppc64_GPR8;
1022 layout->uu_arg7 = -1; /* impossible value */
1023 layout->uu_arg8 = -1; /* impossible value */
1024
1025 #elif defined(VGP_arm_linux)
1026 layout->o_sysno = OFFSET_arm_R7;
1027 layout->o_arg1 = OFFSET_arm_R0;
1028 layout->o_arg2 = OFFSET_arm_R1;
1029 layout->o_arg3 = OFFSET_arm_R2;
1030 layout->o_arg4 = OFFSET_arm_R3;
1031 layout->o_arg5 = OFFSET_arm_R4;
1032 layout->o_arg6 = OFFSET_arm_R5;
1033 layout->uu_arg7 = -1; /* impossible value */
1034 layout->uu_arg8 = -1; /* impossible value */
1035
1036 #elif defined(VGP_x86_darwin)
1037 layout->o_sysno = OFFSET_x86_EAX;
1038 // syscall parameters are on stack in C convention
1039 layout->s_arg1 = sizeof(UWord) * 1;
1040 layout->s_arg2 = sizeof(UWord) * 2;
1041 layout->s_arg3 = sizeof(UWord) * 3;
1042 layout->s_arg4 = sizeof(UWord) * 4;
1043 layout->s_arg5 = sizeof(UWord) * 5;
1044 layout->s_arg6 = sizeof(UWord) * 6;
1045 layout->s_arg7 = sizeof(UWord) * 7;
1046 layout->s_arg8 = sizeof(UWord) * 8;
1047
1048 #elif defined(VGP_amd64_darwin)
1049 layout->o_sysno = OFFSET_amd64_RAX;
1050 layout->o_arg1 = OFFSET_amd64_RDI;
1051 layout->o_arg2 = OFFSET_amd64_RSI;
1052 layout->o_arg3 = OFFSET_amd64_RDX;
1053 layout->o_arg4 = OFFSET_amd64_RCX;
1054 layout->o_arg5 = OFFSET_amd64_R8;
1055 layout->o_arg6 = OFFSET_amd64_R9;
1056 layout->s_arg7 = sizeof(UWord) * 1;
1057 layout->s_arg8 = sizeof(UWord) * 2;
1058
1059 #elif defined(VGP_s390x_linux)
1060 layout->o_sysno = OFFSET_s390x_SYSNO;
1061 layout->o_arg1 = OFFSET_s390x_r2;
1062 layout->o_arg2 = OFFSET_s390x_r3;
1063 layout->o_arg3 = OFFSET_s390x_r4;
1064 layout->o_arg4 = OFFSET_s390x_r5;
1065 layout->o_arg5 = OFFSET_s390x_r6;
1066 layout->o_arg6 = OFFSET_s390x_r7;
1067 layout->uu_arg7 = -1; /* impossible value */
1068 layout->uu_arg8 = -1; /* impossible value */
1069 #else
1070 # error "getSyscallLayout: unknown arch"
1071 #endif
1072 }
1073
1074
1075 /* ---------------------------------------------------------------------
1076 The main driver logic
1077 ------------------------------------------------------------------ */
1078
1079 /* Finding the handlers for a given syscall, or faking up one
1080 when no handler is found. */
1081
1082 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1083 void bad_before ( ThreadId tid,
1084 SyscallArgLayout* layout,
1085 /*MOD*/SyscallArgs* args,
1086 /*OUT*/SyscallStatus* status,
1087 /*OUT*/UWord* flags )
1088 {
1089 VG_(dmsg)("WARNING: unhandled syscall: %s\n",
1090 VG_SYSNUM_STRING_EXTRA(args->sysno));
1091 if (VG_(clo_verbosity) > 1) {
1092 VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1093 }
1094 VG_(dmsg)("You may be able to write your own handler.\n");
1095 VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1096 VG_(dmsg)("Nevertheless we consider this a bug. Please report\n");
1097 VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1098
1099 SET_STATUS_Failure(VKI_ENOSYS);
1100 }
1101
1102 static SyscallTableEntry bad_sys =
1103 { bad_before, NULL };
1104
get_syscall_entry(Int syscallno)1105 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1106 {
1107 const SyscallTableEntry* sys = NULL;
1108
1109 # if defined(VGO_linux)
1110 sys = ML_(get_linux_syscall_entry)( syscallno );
1111
1112 # elif defined(VGO_darwin)
1113 Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1114
1115 switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1116 case VG_DARWIN_SYSCALL_CLASS_UNIX:
1117 if (idx >= 0 && idx < ML_(syscall_table_size) &&
1118 ML_(syscall_table)[idx].before != NULL)
1119 sys = &ML_(syscall_table)[idx];
1120 break;
1121 case VG_DARWIN_SYSCALL_CLASS_MACH:
1122 if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1123 ML_(mach_trap_table)[idx].before != NULL)
1124 sys = &ML_(mach_trap_table)[idx];
1125 break;
1126 case VG_DARWIN_SYSCALL_CLASS_MDEP:
1127 if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1128 ML_(mdep_trap_table)[idx].before != NULL)
1129 sys = &ML_(mdep_trap_table)[idx];
1130 break;
1131 default:
1132 vg_assert(0);
1133 break;
1134 }
1135
1136 # else
1137 # error Unknown OS
1138 # endif
1139
1140 return sys == NULL ? &bad_sys : sys;
1141 }
1142
1143
1144 /* Add and remove signals from mask so that we end up telling the
1145 kernel the state we actually want rather than what the client
1146 wants. */
sanitize_client_sigmask(vki_sigset_t * mask)1147 static void sanitize_client_sigmask(vki_sigset_t *mask)
1148 {
1149 VG_(sigdelset)(mask, VKI_SIGKILL);
1150 VG_(sigdelset)(mask, VKI_SIGSTOP);
1151 VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1152 }
1153
1154 typedef
1155 struct {
1156 SyscallArgs orig_args;
1157 SyscallArgs args;
1158 SyscallStatus status;
1159 UWord flags;
1160 }
1161 SyscallInfo;
1162
1163 SyscallInfo syscallInfo[VG_N_THREADS];
1164
1165
1166 /* The scheduler needs to be able to zero out these records after a
1167 fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1168 void VG_(clear_syscallInfo) ( Int tid )
1169 {
1170 vg_assert(tid >= 0 && tid < VG_N_THREADS);
1171 VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1172 syscallInfo[tid].status.what = SsIdle;
1173 }
1174
ensure_initialised(void)1175 static void ensure_initialised ( void )
1176 {
1177 Int i;
1178 static Bool init_done = False;
1179 if (init_done)
1180 return;
1181 init_done = True;
1182 for (i = 0; i < VG_N_THREADS; i++) {
1183 VG_(clear_syscallInfo)( i );
1184 }
1185 }
1186
1187 /* --- This is the main function of this file. --- */
1188
VG_(client_syscall)1189 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1190 {
1191 Word sysno;
1192 ThreadState* tst;
1193 const SyscallTableEntry* ent;
1194 SyscallArgLayout layout;
1195 SyscallInfo* sci;
1196
1197 ensure_initialised();
1198
1199 vg_assert(VG_(is_valid_tid)(tid));
1200 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1201 vg_assert(VG_(is_running_thread)(tid));
1202
1203 tst = VG_(get_ThreadState)(tid);
1204
1205 /* BEGIN ensure root thread's stack is suitably mapped */
1206 /* In some rare circumstances, we may do the syscall without the
1207 bottom page of the stack being mapped, because the stack pointer
1208 was moved down just a few instructions before the syscall
1209 instruction, and there have been no memory references since
1210 then, that would cause a call to VG_(extend_stack) to have
1211 happened.
1212
1213 In native execution that's OK: the kernel automagically extends
1214 the stack's mapped area down to cover the stack pointer (or sp -
1215 redzone, really). In simulated normal execution that's OK too,
1216 since any signals we get from accessing below the mapped area of
1217 the (guest's) stack lead us to VG_(extend_stack), where we
1218 simulate the kernel's stack extension logic. But that leaves
1219 the problem of entering a syscall with the SP unmapped. Because
1220 the kernel doesn't know that the segment immediately above SP is
1221 supposed to be a grow-down segment, it causes the syscall to
1222 fail, and thereby causes a divergence between native behaviour
1223 (syscall succeeds) and simulated behaviour (syscall fails).
1224
1225 This is quite a rare failure mode. It has only been seen
1226 affecting calls to sys_readlink on amd64-linux, and even then it
1227 requires a certain code sequence around the syscall to trigger
1228 it. Here is one:
1229
1230 extern int my_readlink ( const char* path );
1231 asm(
1232 ".text\n"
1233 ".globl my_readlink\n"
1234 "my_readlink:\n"
1235 "\tsubq $0x1008,%rsp\n"
1236 "\tmovq %rdi,%rdi\n" // path is in rdi
1237 "\tmovq %rsp,%rsi\n" // &buf[0] -> rsi
1238 "\tmovl $0x1000,%edx\n" // sizeof(buf) in rdx
1239 "\tmovl $"__NR_READLINK",%eax\n" // syscall number
1240 "\tsyscall\n"
1241 "\taddq $0x1008,%rsp\n"
1242 "\tret\n"
1243 ".previous\n"
1244 );
1245
1246 For more details, see bug #156404
1247 (https://bugs.kde.org/show_bug.cgi?id=156404).
1248
1249 The fix is actually very simple. We simply need to call
1250 VG_(extend_stack) for this thread, handing it the lowest
1251 possible valid address for stack (sp - redzone), to ensure the
1252 pages all the way down to that address, are mapped. Because
1253 this is a potentially expensive and frequent operation, we
1254 filter in two ways:
1255
1256 First, only the main thread (tid=1) has a growdown stack. So
1257 ignore all others. It is conceivable, although highly unlikely,
1258 that the main thread exits, and later another thread is
1259 allocated tid=1, but that's harmless, I believe;
1260 VG_(extend_stack) will do nothing when applied to a non-root
1261 thread.
1262
1263 Secondly, first call VG_(am_find_nsegment) directly, to see if
1264 the page holding (sp - redzone) is mapped correctly. If so, do
1265 nothing. This is almost always the case. VG_(extend_stack)
1266 calls VG_(am_find_nsegment) twice, so this optimisation -- and
1267 that's all it is -- more or less halves the number of calls to
1268 VG_(am_find_nsegment) required.
1269
1270 TODO: the test "seg->kind == SkAnonC" is really inadequate,
1271 because although it tests whether the segment is mapped
1272 _somehow_, it doesn't check that it has the right permissions
1273 (r,w, maybe x) ? We could test that here, but it will also be
1274 necessary to fix the corresponding test in VG_(extend_stack).
1275
1276 All this guff is of course Linux-specific. Hence the ifdef.
1277 */
1278 # if defined(VGO_linux)
1279 if (tid == 1/*ROOT THREAD*/) {
1280 Addr stackMin = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1281 NSegment const* seg = VG_(am_find_nsegment)(stackMin);
1282 if (seg && seg->kind == SkAnonC) {
1283 /* stackMin is already mapped. Nothing to do. */
1284 } else {
1285 (void)VG_(extend_stack)( stackMin,
1286 tst->client_stack_szB );
1287 }
1288 }
1289 # endif
1290 /* END ensure root thread's stack is suitably mapped */
1291
1292 /* First off, get the syscall args and number. This is a
1293 platform-dependent action. */
1294
1295 sci = & syscallInfo[tid];
1296 vg_assert(sci->status.what == SsIdle);
1297
1298 getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1299
1300 /* Copy .orig_args to .args. The pre-handler may modify .args, but
1301 we want to keep the originals too, just in case. */
1302 sci->args = sci->orig_args;
1303
1304 /* Save the syscall number in the thread state in case the syscall
1305 is interrupted by a signal. */
1306 sysno = sci->orig_args.sysno;
1307
1308 /* It's sometimes useful, as a crude debugging hack, to get a
1309 stack trace at each (or selected) syscalls. */
1310 if (0 && sysno == __NR_ioctl) {
1311 VG_(umsg)("\nioctl:\n");
1312 VG_(get_and_pp_StackTrace)(tid, 10);
1313 VG_(umsg)("\n");
1314 }
1315
1316 # if defined(VGO_darwin)
1317 /* Record syscall class. But why? Because the syscall might be
1318 interrupted by a signal, and in the signal handler (which will
1319 be m_signals.async_signalhandler) we will need to build a SysRes
1320 reflecting the syscall return result. In order to do that we
1321 need to know the syscall class. Hence stash it in the guest
1322 state of this thread. This madness is not needed on Linux
1323 because it only has a single syscall return convention and so
1324 there is no ambiguity involved in converting the post-signal
1325 machine state into a SysRes. */
1326 tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1327 # endif
1328
1329 /* The default what-to-do-next thing is hand the syscall to the
1330 kernel, so we pre-set that here. Set .sres to something
1331 harmless looking (is irrelevant because .what is not
1332 SsComplete.) */
1333 sci->status.what = SsHandToKernel;
1334 sci->status.sres = VG_(mk_SysRes_Error)(0);
1335 sci->flags = 0;
1336
1337 /* Fetch the syscall's handlers. If no handlers exist for this
1338 syscall, we are given dummy handlers which force an immediate
1339 return with ENOSYS. */
1340 ent = get_syscall_entry(sysno);
1341
1342 /* Fetch the layout information, which tells us where in the guest
1343 state the syscall args reside. This is a platform-dependent
1344 action. This info is needed so that the scalar syscall argument
1345 checks (PRE_REG_READ calls) know which bits of the guest state
1346 they need to inspect. */
1347 getSyscallArgLayout( &layout );
1348
1349 /* Make sure the tmp signal mask matches the real signal mask;
1350 sigsuspend may change this. */
1351 vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1352
1353 /* Right, we're finally ready to Party. Call the pre-handler and
1354 see what we get back. At this point:
1355
1356 sci->status.what is Unset (we don't know yet).
1357 sci->orig_args contains the original args.
1358 sci->args is the same as sci->orig_args.
1359 sci->flags is zero.
1360 */
1361
1362 PRINT("SYSCALL[%d,%d](%s) ",
1363 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1364
1365 /* Do any pre-syscall actions */
1366 if (VG_(needs).syscall_wrapper) {
1367 UWord tmpv[8];
1368 tmpv[0] = sci->orig_args.arg1;
1369 tmpv[1] = sci->orig_args.arg2;
1370 tmpv[2] = sci->orig_args.arg3;
1371 tmpv[3] = sci->orig_args.arg4;
1372 tmpv[4] = sci->orig_args.arg5;
1373 tmpv[5] = sci->orig_args.arg6;
1374 tmpv[6] = sci->orig_args.arg7;
1375 tmpv[7] = sci->orig_args.arg8;
1376 VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1377 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1378 }
1379
1380 vg_assert(ent);
1381 vg_assert(ent->before);
1382 (ent->before)( tid,
1383 &layout,
1384 &sci->args, &sci->status, &sci->flags );
1385
1386 /* The pre-handler may have modified:
1387 sci->args
1388 sci->status
1389 sci->flags
1390 All else remains unchanged.
1391 Although the args may be modified, pre handlers are not allowed
1392 to change the syscall number.
1393 */
1394 /* Now we proceed according to what the pre-handler decided. */
1395 vg_assert(sci->status.what == SsHandToKernel
1396 || sci->status.what == SsComplete);
1397 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1398
1399 if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1400 /* The pre-handler completed the syscall itself, declaring
1401 success. */
1402 if (sci->flags & SfNoWriteResult) {
1403 PRINT(" --> [pre-success] NoWriteResult");
1404 } else {
1405 PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
1406 (ULong)sr_ResHI(sci->status.sres),
1407 (ULong)sr_Res(sci->status.sres));
1408 }
1409 /* In this case the allowable flags are to ask for a signal-poll
1410 and/or a yield after the call. Changing the args isn't
1411 allowed. */
1412 vg_assert(0 == (sci->flags
1413 & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1414 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1415 }
1416
1417 else
1418 if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1419 /* The pre-handler decided to fail syscall itself. */
1420 PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
1421 /* In this case, the pre-handler is also allowed to ask for the
1422 post-handler to be run anyway. Changing the args is not
1423 allowed. */
1424 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1425 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1426 }
1427
1428 else
1429 if (sci->status.what != SsHandToKernel) {
1430 /* huh?! */
1431 vg_assert(0);
1432 }
1433
1434 else /* (sci->status.what == HandToKernel) */ {
1435 /* Ok, this is the usual case -- and the complicated one. There
1436 are two subcases: sync and async. async is the general case
1437 and is to be used when there is any possibility that the
1438 syscall might block [a fact that the pre-handler must tell us
1439 via the sci->flags field.] Because the tidying-away /
1440 context-switch overhead of the async case could be large, if
1441 we are sure that the syscall will not block, we fast-track it
1442 by doing it directly in this thread, which is a lot
1443 simpler. */
1444
1445 /* Check that the given flags are allowable: MayBlock, PollAfter
1446 and PostOnFail are ok. */
1447 vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1448
1449 if (sci->flags & SfMayBlock) {
1450
1451 /* Syscall may block, so run it asynchronously */
1452 vki_sigset_t mask;
1453
1454 PRINT(" --> [async] ... \n");
1455
1456 mask = tst->sig_mask;
1457 sanitize_client_sigmask(&mask);
1458
1459 /* Gack. More impedance matching. Copy the possibly
1460 modified syscall args back into the guest state. */
1461 /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1462 then this assertion is senseless:
1463 vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1464 The case that exposed it was sys_posix_spawn on Darwin,
1465 which heavily modifies its arguments but then lets the call
1466 go through anyway, with SfToBlock set, hence we end up here. */
1467 putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1468
1469 /* Drop the bigLock */
1470 VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1471 /* Urr. We're now in a race against other threads trying to
1472 acquire the bigLock. I guess that doesn't matter provided
1473 that do_syscall_for_client only touches thread-local
1474 state. */
1475
1476 /* Do the call, which operates directly on the guest state,
1477 not on our abstracted copies of the args/result. */
1478 do_syscall_for_client(sysno, tst, &mask);
1479
1480 /* do_syscall_for_client may not return if the syscall was
1481 interrupted by a signal. In that case, flow of control is
1482 first to m_signals.async_sighandler, which calls
1483 VG_(fixup_guest_state_after_syscall_interrupted), which
1484 fixes up the guest state, and possibly calls
1485 VG_(post_syscall). Once that's done, control drops back
1486 to the scheduler. */
1487
1488 /* Darwin: do_syscall_for_client may not return if the
1489 syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1490 responded by starting the thread at wqthread_hijack(reuse=1)
1491 (to run another workqueue item). In that case, wqthread_hijack
1492 calls ML_(wqthread_continue), which is similar to
1493 VG_(fixup_guest_state_after_syscall_interrupted). */
1494
1495 /* Reacquire the lock */
1496 VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1497
1498 /* Even more impedance matching. Extract the syscall status
1499 from the guest state. */
1500 getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1501 vg_assert(sci->status.what == SsComplete);
1502
1503 /* Be decorative, if required. */
1504 if (VG_(clo_trace_syscalls)) {
1505 Bool failed = sr_isError(sci->status.sres);
1506 if (failed) {
1507 PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
1508 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1509 (ULong)sr_Err(sci->status.sres));
1510 } else {
1511 PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
1512 "Success(0x%llx:0x%llx)",
1513 VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1514 (ULong)sr_ResHI(sci->status.sres),
1515 (ULong)sr_Res(sci->status.sres) );
1516 }
1517 }
1518
1519 } else {
1520
1521 /* run the syscall directly */
1522 /* The pre-handler may have modified the syscall args, but
1523 since we're passing values in ->args directly to the
1524 kernel, there's no point in flushing them back to the
1525 guest state. Indeed doing so could be construed as
1526 incorrect. */
1527 SysRes sres
1528 = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1529 sci->args.arg3, sci->args.arg4,
1530 sci->args.arg5, sci->args.arg6,
1531 sci->args.arg7, sci->args.arg8 );
1532 sci->status = convert_SysRes_to_SyscallStatus(sres);
1533
1534 /* Be decorative, if required. */
1535 if (VG_(clo_trace_syscalls)) {
1536 Bool failed = sr_isError(sci->status.sres);
1537 if (failed) {
1538 PRINT("[sync] --> Failure(0x%llx)",
1539 (ULong)sr_Err(sci->status.sres) );
1540 } else {
1541 PRINT("[sync] --> Success(0x%llx:0x%llx)",
1542 (ULong)sr_ResHI(sci->status.sres),
1543 (ULong)sr_Res(sci->status.sres) );
1544 }
1545 }
1546 }
1547 }
1548
1549 vg_assert(sci->status.what == SsComplete);
1550
1551 vg_assert(VG_(is_running_thread)(tid));
1552
1553 /* Dump the syscall result back in the guest state. This is
1554 a platform-specific action. */
1555 if (!(sci->flags & SfNoWriteResult))
1556 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1557
1558 /* Situation now:
1559 - the guest state is now correctly modified following the syscall
1560 - modified args, original args and syscall status are still
1561 available in the syscallInfo[] entry for this syscall.
1562
1563 Now go on to do the post-syscall actions (read on down ..)
1564 */
1565 PRINT(" ");
1566 VG_(post_syscall)(tid);
1567 PRINT("\n");
1568 }
1569
1570
1571 /* Perform post syscall actions. The expected state on entry is
1572 precisely as at the end of VG_(client_syscall), that is:
1573
1574 - guest state up to date following the syscall
1575 - modified args, original args and syscall status are still
1576 available in the syscallInfo[] entry for this syscall.
1577 - syscall status matches what's in the guest state.
1578
1579 There are two ways to get here: the normal way -- being called by
1580 VG_(client_syscall), and the unusual way, from
1581 VG_(fixup_guest_state_after_syscall_interrupted).
1582 Darwin: there's a third way, ML_(wqthread_continue).
1583 */
VG_(post_syscall)1584 void VG_(post_syscall) (ThreadId tid)
1585 {
1586 SyscallInfo* sci;
1587 const SyscallTableEntry* ent;
1588 SyscallStatus test_status;
1589 ThreadState* tst;
1590 Word sysno;
1591
1592 /* Preliminaries */
1593 vg_assert(VG_(is_valid_tid)(tid));
1594 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1595 vg_assert(VG_(is_running_thread)(tid));
1596
1597 tst = VG_(get_ThreadState)(tid);
1598 sci = & syscallInfo[tid];
1599
1600 /* m_signals.sigvgkill_handler might call here even when not in
1601 a syscall. */
1602 if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1603 sci->status.what = SsIdle;
1604 return;
1605 }
1606
1607 /* Validate current syscallInfo entry. In particular we require
1608 that the current .status matches what's actually in the guest
1609 state. At least in the normal case where we have actually
1610 previously written the result into the guest state. */
1611 vg_assert(sci->status.what == SsComplete);
1612
1613 getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1614 if (!(sci->flags & SfNoWriteResult))
1615 vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1616 /* Failure of the above assertion on Darwin can indicate a problem
1617 in the syscall wrappers that pre-fail or pre-succeed the
1618 syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1619 when they really should call SET_STATUS_from_SysRes. The former
1620 create a UNIX-class syscall result on Darwin, which may not be
1621 correct for the syscall; if that's the case then this assertion
1622 fires. See PRE(thread_fast_set_cthread_self) for an example. On
1623 non-Darwin platforms this assertion is should never fail, and this
1624 comment is completely irrelevant. */
1625 /* Ok, looks sane */
1626
1627 /* Get the system call number. Because the pre-handler isn't
1628 allowed to mess with it, it should be the same for both the
1629 original and potentially-modified args. */
1630 vg_assert(sci->args.sysno == sci->orig_args.sysno);
1631 sysno = sci->args.sysno;
1632 ent = get_syscall_entry(sysno);
1633
1634 /* pre: status == Complete (asserted above) */
1635 /* Consider either success or failure. Now run the post handler if:
1636 - it exists, and
1637 - Success or (Failure and PostOnFail is set)
1638 */
1639 if (ent->after
1640 && ((!sr_isError(sci->status.sres))
1641 || (sr_isError(sci->status.sres)
1642 && (sci->flags & SfPostOnFail) ))) {
1643
1644 (ent->after)( tid, &sci->args, &sci->status );
1645 }
1646
1647 /* Because the post handler might have changed the status (eg, the
1648 post-handler for sys_open can change the result from success to
1649 failure if the kernel supplied a fd that it doesn't like), once
1650 again dump the syscall result back in the guest state.*/
1651 if (!(sci->flags & SfNoWriteResult))
1652 putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1653
1654 /* Do any post-syscall actions required by the tool. */
1655 if (VG_(needs).syscall_wrapper) {
1656 UWord tmpv[8];
1657 tmpv[0] = sci->orig_args.arg1;
1658 tmpv[1] = sci->orig_args.arg2;
1659 tmpv[2] = sci->orig_args.arg3;
1660 tmpv[3] = sci->orig_args.arg4;
1661 tmpv[4] = sci->orig_args.arg5;
1662 tmpv[5] = sci->orig_args.arg6;
1663 tmpv[6] = sci->orig_args.arg7;
1664 tmpv[7] = sci->orig_args.arg8;
1665 VG_TDICT_CALL(tool_post_syscall, tid,
1666 sysno,
1667 &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1668 sci->status.sres);
1669 }
1670
1671 /* The syscall is done. */
1672 vg_assert(sci->status.what == SsComplete);
1673 sci->status.what = SsIdle;
1674
1675 /* The pre/post wrappers may have concluded that pending signals
1676 might have been created, and will have set SfPollAfter to
1677 request a poll for them once the syscall is done. */
1678 if (sci->flags & SfPollAfter)
1679 VG_(poll_signals)(tid);
1680
1681 /* Similarly, the wrappers might have asked for a yield
1682 afterwards. */
1683 if (sci->flags & SfYieldAfter)
1684 VG_(vg_yield)();
1685 }
1686
1687
1688 /* ---------------------------------------------------------------------
1689 Dealing with syscalls which get interrupted by a signal:
1690 VG_(fixup_guest_state_after_syscall_interrupted)
1691 ------------------------------------------------------------------ */
1692
1693 /* Syscalls done on behalf of the client are finally handed off to the
1694 kernel in VG_(client_syscall) above, either by calling
1695 do_syscall_for_client (the async case), or by calling
1696 VG_(do_syscall6) (the sync case).
1697
1698 If the syscall is not interrupted by a signal (it may block and
1699 later unblock, but that's irrelevant here) then those functions
1700 eventually return and so control is passed to VG_(post_syscall).
1701 NB: not sure if the sync case can actually get interrupted, as it
1702 operates with all signals masked.
1703
1704 However, the syscall may get interrupted by an async-signal. In
1705 that case do_syscall_for_client/VG_(do_syscall6) do not
1706 return. Instead we wind up in m_signals.async_sighandler. We need
1707 to fix up the guest state to make it look like the syscall was
1708 interrupted for guest. So async_sighandler calls here, and this
1709 does the fixup. Note that from here we wind up calling
1710 VG_(post_syscall) too.
1711 */
1712
1713
1714 /* These are addresses within ML_(do_syscall_for_client_WRK). See
1715 syscall-$PLAT.S for details.
1716 */
1717 #if defined(VGO_linux)
1718 extern const Addr ML_(blksys_setup);
1719 extern const Addr ML_(blksys_restart);
1720 extern const Addr ML_(blksys_complete);
1721 extern const Addr ML_(blksys_committed);
1722 extern const Addr ML_(blksys_finished);
1723 #elif defined(VGO_darwin)
1724 /* Darwin requires extra uglyness */
1725 extern const Addr ML_(blksys_setup_MACH);
1726 extern const Addr ML_(blksys_restart_MACH);
1727 extern const Addr ML_(blksys_complete_MACH);
1728 extern const Addr ML_(blksys_committed_MACH);
1729 extern const Addr ML_(blksys_finished_MACH);
1730 extern const Addr ML_(blksys_setup_MDEP);
1731 extern const Addr ML_(blksys_restart_MDEP);
1732 extern const Addr ML_(blksys_complete_MDEP);
1733 extern const Addr ML_(blksys_committed_MDEP);
1734 extern const Addr ML_(blksys_finished_MDEP);
1735 extern const Addr ML_(blksys_setup_UNIX);
1736 extern const Addr ML_(blksys_restart_UNIX);
1737 extern const Addr ML_(blksys_complete_UNIX);
1738 extern const Addr ML_(blksys_committed_UNIX);
1739 extern const Addr ML_(blksys_finished_UNIX);
1740 #else
1741 # error "Unknown OS"
1742 #endif
1743
1744
1745 /* Back up guest state to restart a system call. */
1746
ML_(fixup_guest_state_to_restart_syscall)1747 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1748 {
1749 #if defined(VGP_x86_linux)
1750 arch->vex.guest_EIP -= 2; // sizeof(int $0x80)
1751
1752 /* Make sure our caller is actually sane, and we're really backing
1753 back over a syscall.
1754
1755 int $0x80 == CD 80
1756 */
1757 {
1758 UChar *p = (UChar *)arch->vex.guest_EIP;
1759
1760 if (p[0] != 0xcd || p[1] != 0x80)
1761 VG_(message)(Vg_DebugMsg,
1762 "?! restarting over syscall at %#x %02x %02x\n",
1763 arch->vex.guest_EIP, p[0], p[1]);
1764
1765 vg_assert(p[0] == 0xcd && p[1] == 0x80);
1766 }
1767
1768 #elif defined(VGP_amd64_linux)
1769 arch->vex.guest_RIP -= 2; // sizeof(syscall)
1770
1771 /* Make sure our caller is actually sane, and we're really backing
1772 back over a syscall.
1773
1774 syscall == 0F 05
1775 */
1776 {
1777 UChar *p = (UChar *)arch->vex.guest_RIP;
1778
1779 if (p[0] != 0x0F || p[1] != 0x05)
1780 VG_(message)(Vg_DebugMsg,
1781 "?! restarting over syscall at %#llx %02x %02x\n",
1782 arch->vex.guest_RIP, p[0], p[1]);
1783
1784 vg_assert(p[0] == 0x0F && p[1] == 0x05);
1785 }
1786
1787 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
1788 arch->vex.guest_CIA -= 4; // sizeof(ppc32 instr)
1789
1790 /* Make sure our caller is actually sane, and we're really backing
1791 back over a syscall.
1792
1793 sc == 44 00 00 02
1794 */
1795 {
1796 UChar *p = (UChar *)arch->vex.guest_CIA;
1797
1798 if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
1799 VG_(message)(Vg_DebugMsg,
1800 "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
1801 arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
1802
1803 vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
1804 }
1805
1806 #elif defined(VGP_arm_linux)
1807 if (arch->vex.guest_R15T & 1) {
1808 // Thumb mode. SVC is a encoded as
1809 // 1101 1111 imm8
1810 // where imm8 is the SVC number, and we only accept 0.
1811 arch->vex.guest_R15T -= 2; // sizeof(thumb 16 bit insn)
1812 UChar* p = (UChar*)(arch->vex.guest_R15T - 1);
1813 Bool valid = p[0] == 0 && p[1] == 0xDF;
1814 if (!valid) {
1815 VG_(message)(Vg_DebugMsg,
1816 "?! restarting over (Thumb) syscall that is not syscall "
1817 "at %#llx %02x %02x\n",
1818 arch->vex.guest_R15T - 1ULL, p[0], p[1]);
1819 }
1820 vg_assert(valid);
1821 // FIXME: NOTE, this really isn't right. We need to back up
1822 // ITSTATE to what it was before the SVC instruction, but we
1823 // don't know what it was. At least assert that it is now
1824 // zero, because if it is nonzero then it must also have
1825 // been nonzero for the SVC itself, which means it was
1826 // conditional. Urk.
1827 vg_assert(arch->vex.guest_ITSTATE == 0);
1828 } else {
1829 // ARM mode. SVC is encoded as
1830 // cond 1111 imm24
1831 // where imm24 is the SVC number, and we only accept 0.
1832 arch->vex.guest_R15T -= 4; // sizeof(arm instr)
1833 UChar* p = (UChar*)arch->vex.guest_R15T;
1834 Bool valid = p[0] == 0 && p[1] == 0 && p[2] == 0
1835 && (p[3] & 0xF) == 0xF;
1836 if (!valid) {
1837 VG_(message)(Vg_DebugMsg,
1838 "?! restarting over (ARM) syscall that is not syscall "
1839 "at %#llx %02x %02x %02x %02x\n",
1840 arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
1841 }
1842 vg_assert(valid);
1843 }
1844
1845 #elif defined(VGP_x86_darwin)
1846 arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
1847
1848 /* Make sure our caller is actually sane, and we're really backing
1849 back over a syscall.
1850
1851 int $0x80 == CD 80
1852 int $0x81 == CD 81
1853 int $0x82 == CD 82
1854 sysenter == 0F 34
1855 */
1856 {
1857 UChar *p = (UChar *)arch->vex.guest_EIP;
1858 Bool ok = (p[0] == 0xCD && p[1] == 0x80)
1859 || (p[0] == 0xCD && p[1] == 0x81)
1860 || (p[0] == 0xCD && p[1] == 0x82)
1861 || (p[0] == 0x0F && p[1] == 0x34);
1862 if (!ok)
1863 VG_(message)(Vg_DebugMsg,
1864 "?! restarting over syscall at %#x %02x %02x\n",
1865 arch->vex.guest_EIP, p[0], p[1]);
1866 vg_assert(ok);
1867 }
1868
1869 #elif defined(VGP_amd64_darwin)
1870 // DDD: #warning GrP fixme amd64 restart unimplemented
1871 vg_assert(0);
1872
1873 #elif defined(VGP_s390x_linux)
1874 arch->vex.guest_IA -= 2; // sizeof(syscall)
1875
1876 /* Make sure our caller is actually sane, and we're really backing
1877 back over a syscall.
1878
1879 syscall == 0A <num>
1880 */
1881 {
1882 UChar *p = (UChar *)arch->vex.guest_IA;
1883 if (p[0] != 0x0A)
1884 VG_(message)(Vg_DebugMsg,
1885 "?! restarting over syscall at %#llx %02x %02x\n",
1886 arch->vex.guest_IA, p[0], p[1]);
1887
1888 vg_assert(p[0] == 0x0A);
1889 }
1890 #else
1891 # error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
1892 #endif
1893 }
1894
1895 /*
1896 Fix up the guest state when a syscall is interrupted by a signal
1897 and so has been forced to return 'sysret'.
1898
1899 To do this, we determine the precise state of the syscall by
1900 looking at the (real) IP at the time the signal happened. The
1901 syscall sequence looks like:
1902
1903 1. unblock signals
1904 2. perform syscall
1905 3. save result to guest state (EAX, RAX, R3+CR0.SO)
1906 4. re-block signals
1907
1908 If a signal
1909 happens at Then Why?
1910 [1-2) restart nothing has happened (restart syscall)
1911 [2] restart syscall hasn't started, or kernel wants to restart
1912 [2-3) save syscall complete, but results not saved
1913 [3-4) syscall complete, results saved
1914
1915 Sometimes we never want to restart an interrupted syscall (because
1916 sigaction says not to), so we only restart if "restart" is True.
1917
1918 This will also call VG_(post_syscall) if the syscall has actually
1919 completed (either because it was interrupted, or because it
1920 actually finished). It will not call VG_(post_syscall) if the
1921 syscall is set up for restart, which means that the pre-wrapper may
1922 get called multiple times.
1923 */
1924
1925 void
VG_(fixup_guest_state_after_syscall_interrupted)1926 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
1927 Addr ip,
1928 SysRes sres,
1929 Bool restart)
1930 {
1931 /* Note that we don't know the syscall number here, since (1) in
1932 general there's no reliable way to get hold of it short of
1933 stashing it in the guest state before the syscall, and (2) in
1934 any case we don't need to know it for the actions done by this
1935 routine.
1936
1937 Furthermore, 'sres' is only used in the case where the syscall
1938 is complete, but the result has not been committed to the guest
1939 state yet. In any other situation it will be meaningless and
1940 therefore ignored. */
1941
1942 ThreadState* tst;
1943 SyscallStatus canonical;
1944 ThreadArchState* th_regs;
1945 SyscallInfo* sci;
1946
1947 /* Compute some Booleans indicating which range we're in. */
1948 Bool outside_range,
1949 in_setup_to_restart, // [1,2) in the .S files
1950 at_restart, // [2] in the .S files
1951 in_complete_to_committed, // [3,4) in the .S files
1952 in_committed_to_finished; // [4,5) in the .S files
1953
1954 # if defined(VGO_linux)
1955 outside_range
1956 = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
1957 in_setup_to_restart
1958 = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
1959 at_restart
1960 = ip == ML_(blksys_restart);
1961 in_complete_to_committed
1962 = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
1963 in_committed_to_finished
1964 = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
1965 # elif defined(VGO_darwin)
1966 outside_range
1967 = (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
1968 && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
1969 && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
1970 in_setup_to_restart
1971 = (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
1972 || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
1973 || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
1974 at_restart
1975 = (ip == ML_(blksys_restart_MACH))
1976 || (ip == ML_(blksys_restart_MDEP))
1977 || (ip == ML_(blksys_restart_UNIX));
1978 in_complete_to_committed
1979 = (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
1980 || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
1981 || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
1982 in_committed_to_finished
1983 = (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
1984 || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
1985 || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
1986 /* Wasn't that just So Much Fun? Does your head hurt yet? Mine does. */
1987 # else
1988 # error "Unknown OS"
1989 # endif
1990
1991 if (VG_(clo_trace_signals))
1992 VG_(message)( Vg_DebugMsg,
1993 "interrupted_syscall: tid=%d, ip=0x%llx, "
1994 "restart=%s, sres.isErr=%s, sres.val=%lld\n",
1995 (Int)tid,
1996 (ULong)ip,
1997 restart ? "True" : "False",
1998 sr_isError(sres) ? "True" : "False",
1999 (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2000
2001 vg_assert(VG_(is_valid_tid)(tid));
2002 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2003 vg_assert(VG_(is_running_thread)(tid));
2004
2005 tst = VG_(get_ThreadState)(tid);
2006 th_regs = &tst->arch;
2007 sci = & syscallInfo[tid];
2008
2009 /* Figure out what the state of the syscall was by examining the
2010 (real) IP at the time of the signal, and act accordingly. */
2011 if (outside_range) {
2012 if (VG_(clo_trace_signals))
2013 VG_(message)( Vg_DebugMsg,
2014 " not in syscall at all: hmm, very suspicious\n" );
2015 /* Looks like we weren't in a syscall at all. Hmm. */
2016 vg_assert(sci->status.what != SsIdle);
2017 return;
2018 }
2019
2020 /* We should not be here unless this thread had first started up
2021 the machinery for a syscall by calling VG_(client_syscall).
2022 Hence: */
2023 vg_assert(sci->status.what != SsIdle);
2024
2025 /* now, do one of four fixup actions, depending on where the IP has
2026 got to. */
2027
2028 if (in_setup_to_restart) {
2029 /* syscall hasn't even started; go around again */
2030 if (VG_(clo_trace_signals))
2031 VG_(message)( Vg_DebugMsg, " not started: restarting\n");
2032 vg_assert(sci->status.what == SsHandToKernel);
2033 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2034 }
2035
2036 else
2037 if (at_restart) {
2038 /* We're either about to run the syscall, or it was interrupted
2039 and the kernel restarted it. Restart if asked, otherwise
2040 EINTR it. */
2041 if (restart) {
2042 if (VG_(clo_trace_signals))
2043 VG_(message)( Vg_DebugMsg, " at syscall instr: restarting\n");
2044 ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2045 } else {
2046 if (VG_(clo_trace_signals))
2047 VG_(message)( Vg_DebugMsg, " at syscall instr: returning EINTR\n");
2048 canonical = convert_SysRes_to_SyscallStatus(
2049 VG_(mk_SysRes_Error)( VKI_EINTR )
2050 );
2051 if (!(sci->flags & SfNoWriteResult))
2052 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2053 sci->status = canonical;
2054 VG_(post_syscall)(tid);
2055 }
2056 }
2057
2058 else
2059 if (in_complete_to_committed) {
2060 /* Syscall complete, but result hasn't been written back yet.
2061 Write the SysRes we were supplied with back to the guest
2062 state. */
2063 if (VG_(clo_trace_signals))
2064 VG_(message)( Vg_DebugMsg,
2065 " completed, but uncommitted: committing\n");
2066 canonical = convert_SysRes_to_SyscallStatus( sres );
2067 if (!(sci->flags & SfNoWriteResult))
2068 putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2069 sci->status = canonical;
2070 VG_(post_syscall)(tid);
2071 }
2072
2073 else
2074 if (in_committed_to_finished) {
2075 /* Result committed, but the signal mask has not been restored;
2076 we expect our caller (the signal handler) will have fixed
2077 this up. */
2078 if (VG_(clo_trace_signals))
2079 VG_(message)( Vg_DebugMsg,
2080 " completed and committed: nothing to do\n");
2081 getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2082 vg_assert(sci->status.what == SsComplete);
2083 VG_(post_syscall)(tid);
2084 }
2085
2086 else
2087 VG_(core_panic)("?? strange syscall interrupt state?");
2088
2089 /* In all cases, the syscall is now finished (even if we called
2090 ML_(fixup_guest_state_to_restart_syscall), since that just
2091 re-positions the guest's IP for another go at it). So we need
2092 to record that fact. */
2093 sci->status.what = SsIdle;
2094 }
2095
2096
2097 #if defined(VGO_darwin)
2098 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2099 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2100 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2101 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2102 {
2103 ThreadState* tst;
2104 SyscallInfo* sci;
2105
2106 VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2107
2108 PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2109 VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2110
2111 vg_assert(VG_(is_valid_tid)(tid));
2112 vg_assert(tid >= 1 && tid < VG_N_THREADS);
2113 vg_assert(VG_(is_running_thread)(tid));
2114
2115 tst = VG_(get_ThreadState)(tid);
2116 sci = & syscallInfo[tid];
2117 vg_assert(sci->status.what != SsIdle);
2118 vg_assert(tst->os_state.wq_jmpbuf_valid); // check this BEFORE post_syscall
2119
2120 // Pretend the syscall completed normally, but don't touch the thread state.
2121 sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2122 sci->flags |= SfNoWriteResult;
2123 VG_(post_syscall)(tid);
2124
2125 sci->status.what = SsIdle;
2126
2127 vg_assert(tst->sched_jmpbuf_valid);
2128 VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2129
2130 /* NOTREACHED */
2131 vg_assert(0);
2132 }
2133 #endif
2134
2135
2136 /* ---------------------------------------------------------------------
2137 A place to store the where-to-call-when-really-done pointer
2138 ------------------------------------------------------------------ */
2139
2140 // When the final thread is done, where shall I call to shutdown the
2141 // system cleanly? Is set once at startup (in m_main) and never
2142 // changes after that. Is basically a pointer to the exit
2143 // continuation. This is all just a nasty hack to avoid calling
2144 // directly from m_syswrap to m_main at exit, since that would cause
2145 // m_main to become part of a module cycle, which is silly.
2146 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2147 (ThreadId,VgSchedReturnCode)
2148 = NULL;
2149
2150 /*--------------------------------------------------------------------*/
2151 /*--- end ---*/
2152 /*--------------------------------------------------------------------*/
2153