• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls.                          syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2013 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_libcbase.h"
40 #include "pub_core_libcassert.h"
41 #include "pub_core_libcprint.h"
42 #include "pub_core_libcproc.h"      // For VG_(getpid)()
43 #include "pub_core_libcsignal.h"
44 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
45                                     //   and VG_(vg_yield)
46 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
47 #include "pub_core_tooliface.h"
48 #include "pub_core_options.h"
49 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
50 #include "pub_core_syscall.h"
51 #include "pub_core_machine.h"
52 #include "pub_core_syswrap.h"
53 
54 #include "priv_types_n_macros.h"
55 #include "priv_syswrap-main.h"
56 
57 #if defined(VGO_darwin)
58 #include "priv_syswrap-darwin.h"
59 #endif
60 
61 /* Useful info which needs to be recorded somewhere:
62    Use of registers in syscalls is:
63 
64           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65    LINUX:
66    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
67    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
68    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
69    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
70    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
71    mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
72    mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
73    arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
74 
75    On s390x the svc instruction is used for system calls. The system call
76    number is encoded in the instruction (8 bit immediate field). Since Linux
77    2.6 it is also allowed to use svc 0 with the system call number in r1.
78    This was introduced for system calls >255, but works for all. It is
79    also possible to see the svc 0 together with an EXecute instruction, that
80    fills in the immediate field.
81    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
82 
83    DARWIN:
84    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
85    amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
86 
87    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
88    amd64-darwin.  Apparently 0(%esp) is some kind of return address
89    (perhaps for syscalls done with "sysenter"?)  I don't think it is
90    relevant for syscalls done with "int $0x80/1/2".
91 */
92 
93 /* This is the top level of the system-call handler module.  All
94    system calls are channelled through here, doing two things:
95 
96    * notify the tool of the events (mem/reg reads, writes) happening
97 
98    * perform the syscall, usually by passing it along to the kernel
99      unmodified.
100 
101    A magical piece of assembly code, do_syscall_for_client_WRK, in
102    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
103    kernel, whilst having the simulator retain control.
104 */
105 
106 /* The main function is VG_(client_syscall).  The simulation calls it
107    whenever a client thread wants to do a syscall.  The following is a
108    sketch of what it does.
109 
110    * Ensures the root thread's stack is suitably mapped.  Tedious and
111      arcane.  See big big comment in VG_(client_syscall).
112 
113    * First, it rounds up the syscall number and args (which is a
114      platform dependent activity) and puts them in a struct ("args")
115      and also a copy in "orig_args".
116 
117      The pre/post wrappers refer to these structs and so no longer
118      need magic macros to access any specific registers.  This struct
119      is stored in thread-specific storage.
120 
121 
122    * The pre-wrapper is called, passing it a pointer to struct
123      "args".
124 
125 
126    * The pre-wrapper examines the args and pokes the tool
127      appropriately.  It may modify the args; this is why "orig_args"
128      is also stored.
129 
130      The pre-wrapper may choose to 'do' the syscall itself, and
131      concludes one of three outcomes:
132 
133        Success(N)    -- syscall is already complete, with success;
134                         result is N
135 
136        Fail(N)       -- syscall is already complete, with failure;
137                         error code is N
138 
139        HandToKernel  -- (the usual case): this needs to be given to
140                         the kernel to be done, using the values in
141                         the possibly-modified "args" struct.
142 
143      In addition, the pre-wrapper may set some flags:
144 
145        MayBlock   -- only applicable when outcome==HandToKernel
146 
147        PostOnFail -- only applicable when outcome==HandToKernel or Fail
148 
149 
150    * If the pre-outcome is HandToKernel, the syscall is duly handed
151      off to the kernel (perhaps involving some thread switchery, but
152      that's not important).  This reduces the possible set of outcomes
153      to either Success(N) or Fail(N).
154 
155 
156    * The outcome (Success(N) or Fail(N)) is written back to the guest
157      register(s).  This is platform specific:
158 
159      x86:    Success(N) ==>  eax = N
160              Fail(N)    ==>  eax = -N
161 
162      ditto amd64
163 
164      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
165              Fail(N) ==>     r3 = N, CR0.SO = 1
166 
167      Darwin:
168      x86:    Success(N) ==>  edx:eax = N, cc = 0
169              Fail(N)    ==>  edx:eax = N, cc = 1
170 
171      s390x:  Success(N) ==>  r2 = N
172              Fail(N)    ==>  r2 = -N
173 
174    * The post wrapper is called if:
175 
176      - it exists, and
177      - outcome==Success or (outcome==Fail and PostOnFail is set)
178 
179      The post wrapper is passed the adulterated syscall args (struct
180      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
181 
182    There are several other complications, primarily to do with
183    syscalls getting interrupted, explained in comments in the code.
184 */
185 
186 /* CAVEATS for writing wrappers.  It is important to follow these!
187 
188    The macros defined in priv_types_n_macros.h are designed to help
189    decouple the wrapper logic from the actual representation of
190    syscall args/results, since these wrappers are designed to work on
191    multiple platforms.
192 
193    Sometimes a PRE wrapper will complete the syscall itself, without
194    handing it to the kernel.  It will use one of SET_STATUS_Success,
195    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
196    value.  It is critical to appreciate that use of the macro does not
197    immediately cause the underlying guest state to be updated -- that
198    is done by the driver logic in this file, when the wrapper returns.
199 
200    As a result, PRE wrappers of the following form will malfunction:
201 
202    PRE(fooble)
203    {
204       ... do stuff ...
205       SET_STATUS_Somehow(...)
206 
207       // do something that assumes guest state is up to date
208    }
209 
210    In particular, direct or indirect calls to VG_(poll_signals) after
211    setting STATUS can cause the guest state to be read (in order to
212    build signal frames).  Do not do this.  If you want a signal poll
213    after the syscall goes through, do "*flags |= SfPollAfter" and the
214    driver logic will do it for you.
215 
216    -----------
217 
218    Another critical requirement following introduction of new address
219    space manager (JRS, 20050923):
220 
221    In a situation where the mappedness of memory has changed, aspacem
222    should be notified BEFORE the tool.  Hence the following is
223    correct:
224 
225       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
226       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
227       if (d)
228          VG_(discard_translations)(s->start, s->end+1 - s->start);
229 
230    whilst this is wrong:
231 
232       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
233       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
234       if (d)
235          VG_(discard_translations)(s->start, s->end+1 - s->start);
236 
237    The reason is that the tool may itself ask aspacem for more shadow
238    memory as a result of the VG_TRACK call.  In such a situation it is
239    critical that aspacem's segment array is up to date -- hence the
240    need to notify aspacem first.
241 
242    -----------
243 
244    Also .. take care to call VG_(discard_translations) whenever
245    memory with execute permissions is unmapped.
246 */
247 
248 
249 /* ---------------------------------------------------------------------
250    Do potentially blocking syscall for the client, and mess with
251    signal masks at the same time.
252    ------------------------------------------------------------------ */
253 
254 /* Perform a syscall on behalf of a client thread, using a specific
255    signal mask.  On completion, the signal mask is set to restore_mask
256    (which presumably blocks almost everything).  If a signal happens
257    during the syscall, the handler should call
258    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
259    thread's context to do the right thing.
260 
261    The _WRK function is handwritten assembly, implemented per-platform
262    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
263    properties.  See comments at the top of
264    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
265 
266    This function (these functions) are required to return zero in case
267    of success (even if the syscall itself failed), and nonzero if the
268    sigprocmask-swizzling calls failed.  We don't actually care about
269    the failure values from sigprocmask, although most of the assembly
270    implementations do attempt to return that, using the convention
271    0 for success, or 0x8000 | error-code for failure.
272 */
273 #if defined(VGO_linux)
274 extern
275 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
276                                       void* guest_state,
277                                       const vki_sigset_t *syscall_mask,
278                                       const vki_sigset_t *restore_mask,
279                                       Word sigsetSzB );
280 #elif defined(VGO_darwin)
281 extern
282 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
283                                            void* guest_state,
284                                            const vki_sigset_t *syscall_mask,
285                                            const vki_sigset_t *restore_mask,
286                                            Word sigsetSzB ); /* unused */
287 extern
288 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
289                                            void* guest_state,
290                                            const vki_sigset_t *syscall_mask,
291                                            const vki_sigset_t *restore_mask,
292                                            Word sigsetSzB ); /* unused */
293 extern
294 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
295                                            void* guest_state,
296                                            const vki_sigset_t *syscall_mask,
297                                            const vki_sigset_t *restore_mask,
298                                            Word sigsetSzB ); /* unused */
299 #else
300 #  error "Unknown OS"
301 #endif
302 
303 
304 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)305 void do_syscall_for_client ( Int syscallno,
306                              ThreadState* tst,
307                              const vki_sigset_t* syscall_mask )
308 {
309    vki_sigset_t saved;
310    UWord err;
311 #  if defined(VGO_linux)
312    err = ML_(do_syscall_for_client_WRK)(
313             syscallno, &tst->arch.vex,
314             syscall_mask, &saved, sizeof(vki_sigset_t)
315          );
316 #  elif defined(VGO_darwin)
317    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
318       case VG_DARWIN_SYSCALL_CLASS_UNIX:
319          err = ML_(do_syscall_for_client_unix_WRK)(
320                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
321                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
322                );
323          break;
324       case VG_DARWIN_SYSCALL_CLASS_MACH:
325          err = ML_(do_syscall_for_client_mach_WRK)(
326                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
327                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
328                );
329          break;
330       case VG_DARWIN_SYSCALL_CLASS_MDEP:
331          err = ML_(do_syscall_for_client_mdep_WRK)(
332                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
333                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
334                );
335          break;
336       default:
337          vg_assert(0);
338          /*NOTREACHED*/
339          break;
340    }
341 #  else
342 #    error "Unknown OS"
343 #  endif
344    vg_assert2(
345       err == 0,
346       "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
347       (Int)(err & 0xFFF)
348    );
349 }
350 
351 
352 /* ---------------------------------------------------------------------
353    Impedance matchers and misc helpers
354    ------------------------------------------------------------------ */
355 
356 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)357 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
358 {
359    return a1->sysno == a2->sysno
360           && a1->arg1 == a2->arg1
361           && a1->arg2 == a2->arg2
362           && a1->arg3 == a2->arg3
363           && a1->arg4 == a2->arg4
364           && a1->arg5 == a2->arg5
365           && a1->arg6 == a2->arg6
366           && a1->arg7 == a2->arg7
367           && a1->arg8 == a2->arg8;
368 }
369 
370 static
eq_SyscallStatus(SyscallStatus * s1,SyscallStatus * s2)371 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
372 {
373    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
374    if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
375       return True;
376 #  if defined(VGO_darwin)
377    /* Darwin-specific debugging guff */
378    vg_assert(s1->what == s2->what);
379    VG_(printf)("eq_SyscallStatus:\n");
380    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
381    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
382    vg_assert(0);
383 #  endif
384    return False;
385 }
386 
387 /* Convert between SysRes and SyscallStatus, to the extent possible. */
388 
389 static
convert_SysRes_to_SyscallStatus(SysRes res)390 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
391 {
392    SyscallStatus status;
393    status.what = SsComplete;
394    status.sres = res;
395    return status;
396 }
397 
398 
399 /* Impedance matchers.  These convert syscall arg or result data from
400    the platform-specific in-guest-state format to the canonical
401    formats, and back. */
402 
403 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)404 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
405                                     /*IN*/ VexGuestArchState* gst_vanilla,
406                                     /*IN*/ UInt trc )
407 {
408 #if defined(VGP_x86_linux)
409    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
410    canonical->sysno = gst->guest_EAX;
411    canonical->arg1  = gst->guest_EBX;
412    canonical->arg2  = gst->guest_ECX;
413    canonical->arg3  = gst->guest_EDX;
414    canonical->arg4  = gst->guest_ESI;
415    canonical->arg5  = gst->guest_EDI;
416    canonical->arg6  = gst->guest_EBP;
417    canonical->arg7  = 0;
418    canonical->arg8  = 0;
419 
420 #elif defined(VGP_amd64_linux)
421    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
422    canonical->sysno = gst->guest_RAX;
423    canonical->arg1  = gst->guest_RDI;
424    canonical->arg2  = gst->guest_RSI;
425    canonical->arg3  = gst->guest_RDX;
426    canonical->arg4  = gst->guest_R10;
427    canonical->arg5  = gst->guest_R8;
428    canonical->arg6  = gst->guest_R9;
429    canonical->arg7  = 0;
430    canonical->arg8  = 0;
431 
432 #elif defined(VGP_ppc32_linux)
433    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
434    canonical->sysno = gst->guest_GPR0;
435    canonical->arg1  = gst->guest_GPR3;
436    canonical->arg2  = gst->guest_GPR4;
437    canonical->arg3  = gst->guest_GPR5;
438    canonical->arg4  = gst->guest_GPR6;
439    canonical->arg5  = gst->guest_GPR7;
440    canonical->arg6  = gst->guest_GPR8;
441    canonical->arg7  = 0;
442    canonical->arg8  = 0;
443 
444 #elif defined(VGP_ppc64_linux)
445    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
446    canonical->sysno = gst->guest_GPR0;
447    canonical->arg1  = gst->guest_GPR3;
448    canonical->arg2  = gst->guest_GPR4;
449    canonical->arg3  = gst->guest_GPR5;
450    canonical->arg4  = gst->guest_GPR6;
451    canonical->arg5  = gst->guest_GPR7;
452    canonical->arg6  = gst->guest_GPR8;
453    canonical->arg7  = 0;
454    canonical->arg8  = 0;
455 
456 #elif defined(VGP_arm_linux)
457    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
458    canonical->sysno = gst->guest_R7;
459    canonical->arg1  = gst->guest_R0;
460    canonical->arg2  = gst->guest_R1;
461    canonical->arg3  = gst->guest_R2;
462    canonical->arg4  = gst->guest_R3;
463    canonical->arg5  = gst->guest_R4;
464    canonical->arg6  = gst->guest_R5;
465    canonical->arg7  = 0;
466    canonical->arg8  = 0;
467 
468 #elif defined(VGP_arm64_linux)
469    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
470    canonical->sysno = gst->guest_X8;
471    canonical->arg1  = gst->guest_X0;
472    canonical->arg2  = gst->guest_X1;
473    canonical->arg3  = gst->guest_X2;
474    canonical->arg4  = gst->guest_X3;
475    canonical->arg5  = gst->guest_X4;
476    canonical->arg6  = gst->guest_X5;
477    canonical->arg7  = 0;
478    canonical->arg8  = 0;
479 
480 #elif defined(VGP_mips32_linux)
481    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
482    canonical->sysno = gst->guest_r2;    // v0
483    if (canonical->sysno == __NR_exit) {
484       canonical->arg1 = gst->guest_r4;    // a0
485       canonical->arg2 = 0;
486       canonical->arg3 = 0;
487       canonical->arg4 = 0;
488       canonical->arg5 = 0;
489       canonical->arg6 = 0;
490       canonical->arg8 = 0;
491    } else if (canonical->sysno != __NR_syscall) {
492       canonical->arg1  = gst->guest_r4;    // a0
493       canonical->arg2  = gst->guest_r5;    // a1
494       canonical->arg3  = gst->guest_r6;    // a2
495       canonical->arg4  = gst->guest_r7;    // a3
496       canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
497       canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(sp)
498       canonical->arg8 = 0;
499    } else {
500       // Fixme hack handle syscall()
501       canonical->sysno = gst->guest_r4;    // a0
502       canonical->arg1  = gst->guest_r5;    // a1
503       canonical->arg2  = gst->guest_r6;    // a2
504       canonical->arg3  = gst->guest_r7;    // a3
505       canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
506       canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
507       canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
508       canonical->arg8 = __NR_syscall;
509    }
510 
511 #elif defined(VGP_mips64_linux)
512    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
513    canonical->sysno = gst->guest_r2;    // v0
514    canonical->arg1  = gst->guest_r4;    // a0
515    canonical->arg2  = gst->guest_r5;    // a1
516    canonical->arg3  = gst->guest_r6;    // a2
517    canonical->arg4  = gst->guest_r7;    // a3
518    canonical->arg5  = gst->guest_r8;    // a4
519    canonical->arg6  = gst->guest_r9;    // a5
520 
521 #elif defined(VGP_x86_darwin)
522    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
523    UWord *stack = (UWord *)gst->guest_ESP;
524    // GrP fixme hope syscalls aren't called with really shallow stacks...
525    canonical->sysno = gst->guest_EAX;
526    if (canonical->sysno != 0) {
527       // stack[0] is return address
528       canonical->arg1  = stack[1];
529       canonical->arg2  = stack[2];
530       canonical->arg3  = stack[3];
531       canonical->arg4  = stack[4];
532       canonical->arg5  = stack[5];
533       canonical->arg6  = stack[6];
534       canonical->arg7  = stack[7];
535       canonical->arg8  = stack[8];
536    } else {
537       // GrP fixme hack handle syscall()
538       // GrP fixme what about __syscall() ?
539       // stack[0] is return address
540       // DDD: the tool can't see that the params have been shifted!  Can
541       //      lead to incorrect checking, I think, because the PRRAn/PSARn
542       //      macros will mention the pre-shifted args.
543       canonical->sysno = stack[1];
544       vg_assert(canonical->sysno != 0);
545       canonical->arg1  = stack[2];
546       canonical->arg2  = stack[3];
547       canonical->arg3  = stack[4];
548       canonical->arg4  = stack[5];
549       canonical->arg5  = stack[6];
550       canonical->arg6  = stack[7];
551       canonical->arg7  = stack[8];
552       canonical->arg8  = stack[9];
553 
554       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
555             VG_(getpid)(), /*tid,*/
556             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
557    }
558 
559    // Here we determine what kind of syscall it was by looking at the
560    // interrupt kind, and then encode the syscall number using the 64-bit
561    // encoding for Valgrind's internal use.
562    //
563    // DDD: Would it be better to stash the JMP kind into the Darwin
564    // thread state rather than passing in the trc?
565    switch (trc) {
566    case VEX_TRC_JMP_SYS_INT128:
567       // int $0x80 = Unix, 64-bit result
568       vg_assert(canonical->sysno >= 0);
569       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
570       break;
571    case VEX_TRC_JMP_SYS_SYSENTER:
572       // syscall = Unix, 32-bit result
573       // OR        Mach, 32-bit result
574       if (canonical->sysno >= 0) {
575          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
576          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
577                                                              & 0xffff);
578       } else {
579          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
580       }
581       break;
582    case VEX_TRC_JMP_SYS_INT129:
583       // int $0x81 = Mach, 32-bit result
584       vg_assert(canonical->sysno < 0);
585       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
586       break;
587    case VEX_TRC_JMP_SYS_INT130:
588       // int $0x82 = mdep, 32-bit result
589       vg_assert(canonical->sysno >= 0);
590       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
591       break;
592    default:
593       vg_assert(0);
594       break;
595    }
596 
597 #elif defined(VGP_amd64_darwin)
598    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
599    UWord *stack = (UWord *)gst->guest_RSP;
600 
601    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
602 
603    // GrP fixme hope syscalls aren't called with really shallow stacks...
604    canonical->sysno = gst->guest_RAX;
605    if (canonical->sysno != __NR_syscall) {
606       // stack[0] is return address
607       canonical->arg1  = gst->guest_RDI;
608       canonical->arg2  = gst->guest_RSI;
609       canonical->arg3  = gst->guest_RDX;
610       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
611       canonical->arg5  = gst->guest_R8;
612       canonical->arg6  = gst->guest_R9;
613       canonical->arg7  = stack[1];
614       canonical->arg8  = stack[2];
615    } else {
616       // GrP fixme hack handle syscall()
617       // GrP fixme what about __syscall() ?
618       // stack[0] is return address
619       // DDD: the tool can't see that the params have been shifted!  Can
620       //      lead to incorrect checking, I think, because the PRRAn/PSARn
621       //      macros will mention the pre-shifted args.
622       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
623       vg_assert(canonical->sysno != __NR_syscall);
624       canonical->arg1  = gst->guest_RSI;
625       canonical->arg2  = gst->guest_RDX;
626       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
627       canonical->arg4  = gst->guest_R8;
628       canonical->arg5  = gst->guest_R9;
629       canonical->arg6  = stack[1];
630       canonical->arg7  = stack[2];
631       canonical->arg8  = stack[3];
632 
633       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
634             VG_(getpid)(), /*tid,*/
635             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
636    }
637 
638    // no canonical->sysno adjustment needed
639 
640 #elif defined(VGP_s390x_linux)
641    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
642    canonical->sysno = gst->guest_SYSNO;
643    canonical->arg1  = gst->guest_r2;
644    canonical->arg2  = gst->guest_r3;
645    canonical->arg3  = gst->guest_r4;
646    canonical->arg4  = gst->guest_r5;
647    canonical->arg5  = gst->guest_r6;
648    canonical->arg6  = gst->guest_r7;
649    canonical->arg7  = 0;
650    canonical->arg8  = 0;
651 #else
652 #  error "getSyscallArgsFromGuestState: unknown arch"
653 #endif
654 }
655 
656 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)657 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
658                                     /*OUT*/VexGuestArchState* gst_vanilla )
659 {
660 #if defined(VGP_x86_linux)
661    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
662    gst->guest_EAX = canonical->sysno;
663    gst->guest_EBX = canonical->arg1;
664    gst->guest_ECX = canonical->arg2;
665    gst->guest_EDX = canonical->arg3;
666    gst->guest_ESI = canonical->arg4;
667    gst->guest_EDI = canonical->arg5;
668    gst->guest_EBP = canonical->arg6;
669 
670 #elif defined(VGP_amd64_linux)
671    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
672    gst->guest_RAX = canonical->sysno;
673    gst->guest_RDI = canonical->arg1;
674    gst->guest_RSI = canonical->arg2;
675    gst->guest_RDX = canonical->arg3;
676    gst->guest_R10 = canonical->arg4;
677    gst->guest_R8  = canonical->arg5;
678    gst->guest_R9  = canonical->arg6;
679 
680 #elif defined(VGP_ppc32_linux)
681    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
682    gst->guest_GPR0 = canonical->sysno;
683    gst->guest_GPR3 = canonical->arg1;
684    gst->guest_GPR4 = canonical->arg2;
685    gst->guest_GPR5 = canonical->arg3;
686    gst->guest_GPR6 = canonical->arg4;
687    gst->guest_GPR7 = canonical->arg5;
688    gst->guest_GPR8 = canonical->arg6;
689 
690 #elif defined(VGP_ppc64_linux)
691    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
692    gst->guest_GPR0 = canonical->sysno;
693    gst->guest_GPR3 = canonical->arg1;
694    gst->guest_GPR4 = canonical->arg2;
695    gst->guest_GPR5 = canonical->arg3;
696    gst->guest_GPR6 = canonical->arg4;
697    gst->guest_GPR7 = canonical->arg5;
698    gst->guest_GPR8 = canonical->arg6;
699 
700 #elif defined(VGP_arm_linux)
701    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
702    gst->guest_R7 = canonical->sysno;
703    gst->guest_R0 = canonical->arg1;
704    gst->guest_R1 = canonical->arg2;
705    gst->guest_R2 = canonical->arg3;
706    gst->guest_R3 = canonical->arg4;
707    gst->guest_R4 = canonical->arg5;
708    gst->guest_R5 = canonical->arg6;
709 
710 #elif defined(VGP_arm64_linux)
711    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
712    gst->guest_X8 = canonical->sysno;
713    gst->guest_X0 = canonical->arg1;
714    gst->guest_X1 = canonical->arg2;
715    gst->guest_X2 = canonical->arg3;
716    gst->guest_X3 = canonical->arg4;
717    gst->guest_X4 = canonical->arg5;
718    gst->guest_X5 = canonical->arg6;
719 
720 #elif defined(VGP_x86_darwin)
721    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
722    UWord *stack = (UWord *)gst->guest_ESP;
723 
724    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
725 
726    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
727    // stack[0] is return address
728    stack[1] = canonical->arg1;
729    stack[2] = canonical->arg2;
730    stack[3] = canonical->arg3;
731    stack[4] = canonical->arg4;
732    stack[5] = canonical->arg5;
733    stack[6] = canonical->arg6;
734    stack[7] = canonical->arg7;
735    stack[8] = canonical->arg8;
736 
737 #elif defined(VGP_amd64_darwin)
738    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
739    UWord *stack = (UWord *)gst->guest_RSP;
740 
741    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
742    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
743 
744    // stack[0] is return address
745    gst->guest_RDI = canonical->arg1;
746    gst->guest_RSI = canonical->arg2;
747    gst->guest_RDX = canonical->arg3;
748    gst->guest_RCX = canonical->arg4;
749    gst->guest_R8  = canonical->arg5;
750    gst->guest_R9  = canonical->arg6;
751    stack[1]       = canonical->arg7;
752    stack[2]       = canonical->arg8;
753 
754 #elif defined(VGP_s390x_linux)
755    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
756    gst->guest_SYSNO  = canonical->sysno;
757    gst->guest_r2     = canonical->arg1;
758    gst->guest_r3     = canonical->arg2;
759    gst->guest_r4     = canonical->arg3;
760    gst->guest_r5     = canonical->arg4;
761    gst->guest_r6     = canonical->arg5;
762    gst->guest_r7     = canonical->arg6;
763 
764 #elif defined(VGP_mips32_linux)
765    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
766    if (canonical->arg8 != __NR_syscall) {
767       gst->guest_r2 = canonical->sysno;
768       gst->guest_r4 = canonical->arg1;
769       gst->guest_r5 = canonical->arg2;
770       gst->guest_r6 = canonical->arg3;
771       gst->guest_r7 = canonical->arg4;
772       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
773       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
774    } else {
775       canonical->arg8 = 0;
776       gst->guest_r2 = __NR_syscall;
777       gst->guest_r4 = canonical->sysno;
778       gst->guest_r5 = canonical->arg1;
779       gst->guest_r6 = canonical->arg2;
780       gst->guest_r7 = canonical->arg3;
781       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
782       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
783       *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
784    }
785 
786 #elif defined(VGP_mips64_linux)
787    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
788    gst->guest_r2 = canonical->sysno;
789    gst->guest_r4 = canonical->arg1;
790    gst->guest_r5 = canonical->arg2;
791    gst->guest_r6 = canonical->arg3;
792    gst->guest_r7 = canonical->arg4;
793    gst->guest_r8 = canonical->arg5;
794    gst->guest_r9 = canonical->arg6;
795 #else
796 #  error "putSyscallArgsIntoGuestState: unknown arch"
797 #endif
798 }
799 
800 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)801 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
802                                       /*IN*/ VexGuestArchState* gst_vanilla )
803 {
804 #  if defined(VGP_x86_linux)
805    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
806    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
807    canonical->what = SsComplete;
808 
809 #  elif defined(VGP_amd64_linux)
810    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
811    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
812    canonical->what = SsComplete;
813 
814 #  elif defined(VGP_ppc32_linux)
815    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
816    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
817    UInt                cr0so = (cr >> 28) & 1;
818    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
819    canonical->what = SsComplete;
820 
821 #  elif defined(VGP_ppc64_linux)
822    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
823    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
824    UInt                cr0so = (cr >> 28) & 1;
825    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
826    canonical->what = SsComplete;
827 
828 #  elif defined(VGP_arm_linux)
829    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
830    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
831    canonical->what = SsComplete;
832 
833 #  elif defined(VGP_arm64_linux)
834    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
835    canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
836    canonical->what = SsComplete;
837 
838 #  elif defined(VGP_mips32_linux)
839    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
840    UInt                v0 = gst->guest_r2;    // v0
841    UInt                v1 = gst->guest_r3;    // v1
842    UInt                a3 = gst->guest_r7;    // a3
843    canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
844    canonical->what = SsComplete;
845 
846 #  elif defined(VGP_mips64_linux)
847    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
848    ULong                v0 = gst->guest_r2;    // v0
849    ULong                v1 = gst->guest_r3;    // v1
850    ULong                a3 = gst->guest_r7;    // a3
851    canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3);
852    canonical->what = SsComplete;
853 
854 #  elif defined(VGP_x86_darwin)
855    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
856    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
857    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
858    UInt err = 0;
859    UInt wLO = 0;
860    UInt wHI = 0;
861    switch (gst->guest_SC_CLASS) {
862       case VG_DARWIN_SYSCALL_CLASS_UNIX:
863          // int $0x80 = Unix, 64-bit result
864          err = carry;
865          wLO = gst->guest_EAX;
866          wHI = gst->guest_EDX;
867          break;
868       case VG_DARWIN_SYSCALL_CLASS_MACH:
869          // int $0x81 = Mach, 32-bit result
870          wLO = gst->guest_EAX;
871          break;
872       case VG_DARWIN_SYSCALL_CLASS_MDEP:
873          // int $0x82 = mdep, 32-bit result
874          wLO = gst->guest_EAX;
875          break;
876       default:
877          vg_assert(0);
878          break;
879    }
880    canonical->sres = VG_(mk_SysRes_x86_darwin)(
881                         gst->guest_SC_CLASS, err ? True : False,
882                         wHI, wLO
883                      );
884    canonical->what = SsComplete;
885 
886 #  elif defined(VGP_amd64_darwin)
887    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
888    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
889    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
890    ULong err = 0;
891    ULong wLO = 0;
892    ULong wHI = 0;
893    switch (gst->guest_SC_CLASS) {
894       case VG_DARWIN_SYSCALL_CLASS_UNIX:
895          // syscall = Unix, 128-bit result
896          err = carry;
897          wLO = gst->guest_RAX;
898          wHI = gst->guest_RDX;
899          break;
900       case VG_DARWIN_SYSCALL_CLASS_MACH:
901          // syscall = Mach, 64-bit result
902          wLO = gst->guest_RAX;
903          break;
904       case VG_DARWIN_SYSCALL_CLASS_MDEP:
905          // syscall = mdep, 64-bit result
906          wLO = gst->guest_RAX;
907          break;
908       default:
909          vg_assert(0);
910          break;
911    }
912    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
913                         gst->guest_SC_CLASS, err ? True : False,
914                         wHI, wLO
915                      );
916    canonical->what = SsComplete;
917 
918 #  elif defined(VGP_s390x_linux)
919    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
920    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
921    canonical->what = SsComplete;
922 
923 #  else
924 #    error "getSyscallStatusFromGuestState: unknown arch"
925 #  endif
926 }
927 
928 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)929 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
930                                       /*IN*/ SyscallStatus*     canonical,
931                                       /*OUT*/VexGuestArchState* gst_vanilla )
932 {
933 #  if defined(VGP_x86_linux)
934    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
935    vg_assert(canonical->what == SsComplete);
936    if (sr_isError(canonical->sres)) {
937       /* This isn't exactly right, in that really a Failure with res
938          not in the range 1 .. 4095 is unrepresentable in the
939          Linux-x86 scheme.  Oh well. */
940       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
941    } else {
942       gst->guest_EAX = sr_Res(canonical->sres);
943    }
944    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
945              OFFSET_x86_EAX, sizeof(UWord) );
946 
947 #  elif defined(VGP_amd64_linux)
948    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
949    vg_assert(canonical->what == SsComplete);
950    if (sr_isError(canonical->sres)) {
951       /* This isn't exactly right, in that really a Failure with res
952          not in the range 1 .. 4095 is unrepresentable in the
953          Linux-amd64 scheme.  Oh well. */
954       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
955    } else {
956       gst->guest_RAX = sr_Res(canonical->sres);
957    }
958    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
959              OFFSET_amd64_RAX, sizeof(UWord) );
960 
961 #  elif defined(VGP_ppc32_linux)
962    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
963    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
964    vg_assert(canonical->what == SsComplete);
965    if (sr_isError(canonical->sres)) {
966       /* set CR0.SO */
967       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
968       gst->guest_GPR3 = sr_Err(canonical->sres);
969    } else {
970       /* clear CR0.SO */
971       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
972       gst->guest_GPR3 = sr_Res(canonical->sres);
973    }
974    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
975              OFFSET_ppc32_GPR3, sizeof(UWord) );
976    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
977              OFFSET_ppc32_CR0_0, sizeof(UChar) );
978 
979 #  elif defined(VGP_ppc64_linux)
980    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
981    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
982    vg_assert(canonical->what == SsComplete);
983    if (sr_isError(canonical->sres)) {
984       /* set CR0.SO */
985       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
986       gst->guest_GPR3 = sr_Err(canonical->sres);
987    } else {
988       /* clear CR0.SO */
989       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
990       gst->guest_GPR3 = sr_Res(canonical->sres);
991    }
992    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
993              OFFSET_ppc64_GPR3, sizeof(UWord) );
994    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
995              OFFSET_ppc64_CR0_0, sizeof(UChar) );
996 
997 #  elif defined(VGP_arm_linux)
998    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
999    vg_assert(canonical->what == SsComplete);
1000    if (sr_isError(canonical->sres)) {
1001       /* This isn't exactly right, in that really a Failure with res
1002          not in the range 1 .. 4095 is unrepresentable in the
1003          Linux-arm scheme.  Oh well. */
1004       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
1005    } else {
1006       gst->guest_R0 = sr_Res(canonical->sres);
1007    }
1008    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1009              OFFSET_arm_R0, sizeof(UWord) );
1010 
1011 #  elif defined(VGP_arm64_linux)
1012    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
1013    vg_assert(canonical->what == SsComplete);
1014    if (sr_isError(canonical->sres)) {
1015       /* This isn't exactly right, in that really a Failure with res
1016          not in the range 1 .. 4095 is unrepresentable in the
1017          Linux-arm64 scheme.  Oh well. */
1018       gst->guest_X0 = - (Long)sr_Err(canonical->sres);
1019    } else {
1020       gst->guest_X0 = sr_Res(canonical->sres);
1021    }
1022    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1023              OFFSET_arm64_X0, sizeof(UWord) );
1024 
1025 #elif defined(VGP_x86_darwin)
1026    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1027    SysRes sres = canonical->sres;
1028    vg_assert(canonical->what == SsComplete);
1029    /* Unfortunately here we have to break abstraction and look
1030       directly inside 'res', in order to decide what to do. */
1031    switch (sres._mode) {
1032       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
1033       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
1034          gst->guest_EAX = sres._wLO;
1035          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1036                    OFFSET_x86_EAX, sizeof(UInt) );
1037          break;
1038       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
1039       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
1040          gst->guest_EAX = sres._wLO;
1041          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1042                    OFFSET_x86_EAX, sizeof(UInt) );
1043          gst->guest_EDX = sres._wHI;
1044          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1045                    OFFSET_x86_EDX, sizeof(UInt) );
1046          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1047                                       gst );
1048          // GrP fixme sets defined for entire eflags, not just bit c
1049          // DDD: this breaks exp-ptrcheck.
1050          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1051                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
1052          break;
1053       default:
1054          vg_assert(0);
1055          break;
1056    }
1057 
1058 #elif defined(VGP_amd64_darwin)
1059    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1060    SysRes sres = canonical->sres;
1061    vg_assert(canonical->what == SsComplete);
1062    /* Unfortunately here we have to break abstraction and look
1063       directly inside 'res', in order to decide what to do. */
1064    switch (sres._mode) {
1065       case SysRes_MACH: // syscall = Mach, 64-bit result
1066       case SysRes_MDEP: // syscall = mdep, 64-bit result
1067          gst->guest_RAX = sres._wLO;
1068          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1069                    OFFSET_amd64_RAX, sizeof(ULong) );
1070          break;
1071       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
1072       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
1073          gst->guest_RAX = sres._wLO;
1074          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1075                    OFFSET_amd64_RAX, sizeof(ULong) );
1076          gst->guest_RDX = sres._wHI;
1077          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1078                    OFFSET_amd64_RDX, sizeof(ULong) );
1079          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1080                                         gst );
1081          // GrP fixme sets defined for entire rflags, not just bit c
1082          // DDD: this breaks exp-ptrcheck.
1083          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1084                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1085          break;
1086       default:
1087          vg_assert(0);
1088          break;
1089    }
1090 
1091 #  elif defined(VGP_s390x_linux)
1092    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
1093    vg_assert(canonical->what == SsComplete);
1094    if (sr_isError(canonical->sres)) {
1095       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
1096    } else {
1097       gst->guest_r2 = sr_Res(canonical->sres);
1098    }
1099 
1100 #  elif defined(VGP_mips32_linux)
1101    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1102    vg_assert(canonical->what == SsComplete);
1103    if (sr_isError(canonical->sres)) {
1104       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1105       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1106    } else {
1107       gst->guest_r2 = sr_Res(canonical->sres);
1108       gst->guest_r3 = sr_ResEx(canonical->sres);
1109       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1110    }
1111    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1112              OFFSET_mips32_r2, sizeof(UWord) );
1113    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1114              OFFSET_mips32_r3, sizeof(UWord) );
1115    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1116              OFFSET_mips32_r7, sizeof(UWord) );
1117 
1118 #  elif defined(VGP_mips64_linux)
1119    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1120    vg_assert(canonical->what == SsComplete);
1121    if (sr_isError(canonical->sres)) {
1122       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1123       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1124    } else {
1125       gst->guest_r2 = sr_Res(canonical->sres);
1126       gst->guest_r3 = sr_ResEx(canonical->sres);
1127       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1128    }
1129    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1130              OFFSET_mips64_r2, sizeof(UWord) );
1131    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1132              OFFSET_mips64_r3, sizeof(UWord) );
1133    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1134              OFFSET_mips64_r7, sizeof(UWord) );
1135 
1136 #  else
1137 #    error "putSyscallStatusIntoGuestState: unknown arch"
1138 #  endif
1139 }
1140 
1141 
1142 /* Tell me the offsets in the guest state of the syscall params, so
1143    that the scalar argument checkers don't have to have this info
1144    hardwired. */
1145 
1146 static
getSyscallArgLayout(SyscallArgLayout * layout)1147 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1148 {
1149    VG_(bzero_inline)(layout, sizeof(*layout));
1150 
1151 #if defined(VGP_x86_linux)
1152    layout->o_sysno  = OFFSET_x86_EAX;
1153    layout->o_arg1   = OFFSET_x86_EBX;
1154    layout->o_arg2   = OFFSET_x86_ECX;
1155    layout->o_arg3   = OFFSET_x86_EDX;
1156    layout->o_arg4   = OFFSET_x86_ESI;
1157    layout->o_arg5   = OFFSET_x86_EDI;
1158    layout->o_arg6   = OFFSET_x86_EBP;
1159    layout->uu_arg7  = -1; /* impossible value */
1160    layout->uu_arg8  = -1; /* impossible value */
1161 
1162 #elif defined(VGP_amd64_linux)
1163    layout->o_sysno  = OFFSET_amd64_RAX;
1164    layout->o_arg1   = OFFSET_amd64_RDI;
1165    layout->o_arg2   = OFFSET_amd64_RSI;
1166    layout->o_arg3   = OFFSET_amd64_RDX;
1167    layout->o_arg4   = OFFSET_amd64_R10;
1168    layout->o_arg5   = OFFSET_amd64_R8;
1169    layout->o_arg6   = OFFSET_amd64_R9;
1170    layout->uu_arg7  = -1; /* impossible value */
1171    layout->uu_arg8  = -1; /* impossible value */
1172 
1173 #elif defined(VGP_ppc32_linux)
1174    layout->o_sysno  = OFFSET_ppc32_GPR0;
1175    layout->o_arg1   = OFFSET_ppc32_GPR3;
1176    layout->o_arg2   = OFFSET_ppc32_GPR4;
1177    layout->o_arg3   = OFFSET_ppc32_GPR5;
1178    layout->o_arg4   = OFFSET_ppc32_GPR6;
1179    layout->o_arg5   = OFFSET_ppc32_GPR7;
1180    layout->o_arg6   = OFFSET_ppc32_GPR8;
1181    layout->uu_arg7  = -1; /* impossible value */
1182    layout->uu_arg8  = -1; /* impossible value */
1183 
1184 #elif defined(VGP_ppc64_linux)
1185    layout->o_sysno  = OFFSET_ppc64_GPR0;
1186    layout->o_arg1   = OFFSET_ppc64_GPR3;
1187    layout->o_arg2   = OFFSET_ppc64_GPR4;
1188    layout->o_arg3   = OFFSET_ppc64_GPR5;
1189    layout->o_arg4   = OFFSET_ppc64_GPR6;
1190    layout->o_arg5   = OFFSET_ppc64_GPR7;
1191    layout->o_arg6   = OFFSET_ppc64_GPR8;
1192    layout->uu_arg7  = -1; /* impossible value */
1193    layout->uu_arg8  = -1; /* impossible value */
1194 
1195 #elif defined(VGP_arm_linux)
1196    layout->o_sysno  = OFFSET_arm_R7;
1197    layout->o_arg1   = OFFSET_arm_R0;
1198    layout->o_arg2   = OFFSET_arm_R1;
1199    layout->o_arg3   = OFFSET_arm_R2;
1200    layout->o_arg4   = OFFSET_arm_R3;
1201    layout->o_arg5   = OFFSET_arm_R4;
1202    layout->o_arg6   = OFFSET_arm_R5;
1203    layout->uu_arg7  = -1; /* impossible value */
1204    layout->uu_arg8  = -1; /* impossible value */
1205 
1206 #elif defined(VGP_arm64_linux)
1207    layout->o_sysno  = OFFSET_arm64_X8;
1208    layout->o_arg1   = OFFSET_arm64_X0;
1209    layout->o_arg2   = OFFSET_arm64_X1;
1210    layout->o_arg3   = OFFSET_arm64_X2;
1211    layout->o_arg4   = OFFSET_arm64_X3;
1212    layout->o_arg5   = OFFSET_arm64_X4;
1213    layout->o_arg6   = OFFSET_arm64_X5;
1214    layout->uu_arg7  = -1; /* impossible value */
1215    layout->uu_arg8  = -1; /* impossible value */
1216 
1217 #elif defined(VGP_mips32_linux)
1218    layout->o_sysno  = OFFSET_mips32_r2;
1219    layout->o_arg1   = OFFSET_mips32_r4;
1220    layout->o_arg2   = OFFSET_mips32_r5;
1221    layout->o_arg3   = OFFSET_mips32_r6;
1222    layout->o_arg4   = OFFSET_mips32_r7;
1223    layout->s_arg5   = sizeof(UWord) * 4;
1224    layout->s_arg6   = sizeof(UWord) * 5;
1225    layout->uu_arg7  = -1; /* impossible value */
1226    layout->uu_arg8  = -1; /* impossible value */
1227 
1228 #elif defined(VGP_mips64_linux)
1229    layout->o_sysno  = OFFSET_mips64_r2;
1230    layout->o_arg1   = OFFSET_mips64_r4;
1231    layout->o_arg2   = OFFSET_mips64_r5;
1232    layout->o_arg3   = OFFSET_mips64_r6;
1233    layout->o_arg4   = OFFSET_mips64_r7;
1234    layout->o_arg5   = OFFSET_mips64_r8;
1235    layout->o_arg6   = OFFSET_mips64_r9;
1236    layout->uu_arg7  = -1; /* impossible value */
1237    layout->uu_arg8  = -1; /* impossible value */
1238 
1239 #elif defined(VGP_x86_darwin)
1240    layout->o_sysno  = OFFSET_x86_EAX;
1241    // syscall parameters are on stack in C convention
1242    layout->s_arg1   = sizeof(UWord) * 1;
1243    layout->s_arg2   = sizeof(UWord) * 2;
1244    layout->s_arg3   = sizeof(UWord) * 3;
1245    layout->s_arg4   = sizeof(UWord) * 4;
1246    layout->s_arg5   = sizeof(UWord) * 5;
1247    layout->s_arg6   = sizeof(UWord) * 6;
1248    layout->s_arg7   = sizeof(UWord) * 7;
1249    layout->s_arg8   = sizeof(UWord) * 8;
1250 
1251 #elif defined(VGP_amd64_darwin)
1252    layout->o_sysno  = OFFSET_amd64_RAX;
1253    layout->o_arg1   = OFFSET_amd64_RDI;
1254    layout->o_arg2   = OFFSET_amd64_RSI;
1255    layout->o_arg3   = OFFSET_amd64_RDX;
1256    layout->o_arg4   = OFFSET_amd64_RCX;
1257    layout->o_arg5   = OFFSET_amd64_R8;
1258    layout->o_arg6   = OFFSET_amd64_R9;
1259    layout->s_arg7   = sizeof(UWord) * 1;
1260    layout->s_arg8   = sizeof(UWord) * 2;
1261 
1262 #elif defined(VGP_s390x_linux)
1263    layout->o_sysno  = OFFSET_s390x_SYSNO;
1264    layout->o_arg1   = OFFSET_s390x_r2;
1265    layout->o_arg2   = OFFSET_s390x_r3;
1266    layout->o_arg3   = OFFSET_s390x_r4;
1267    layout->o_arg4   = OFFSET_s390x_r5;
1268    layout->o_arg5   = OFFSET_s390x_r6;
1269    layout->o_arg6   = OFFSET_s390x_r7;
1270    layout->uu_arg7  = -1; /* impossible value */
1271    layout->uu_arg8  = -1; /* impossible value */
1272 #else
1273 #  error "getSyscallLayout: unknown arch"
1274 #endif
1275 }
1276 
1277 
1278 /* ---------------------------------------------------------------------
1279    The main driver logic
1280    ------------------------------------------------------------------ */
1281 
1282 /* Finding the handlers for a given syscall, or faking up one
1283    when no handler is found. */
1284 
1285 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1286 void bad_before ( ThreadId              tid,
1287                   SyscallArgLayout*     layout,
1288                   /*MOD*/SyscallArgs*   args,
1289                   /*OUT*/SyscallStatus* status,
1290                   /*OUT*/UWord*         flags )
1291 {
1292    VG_(dmsg)("WARNING: unhandled syscall: %s\n",
1293       VG_SYSNUM_STRING_EXTRA(args->sysno));
1294    if (VG_(clo_verbosity) > 1) {
1295       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1296    }
1297    VG_(dmsg)("You may be able to write your own handler.\n");
1298    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1299    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
1300    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1301 
1302    SET_STATUS_Failure(VKI_ENOSYS);
1303 }
1304 
1305 static SyscallTableEntry bad_sys =
1306    { bad_before, NULL };
1307 
get_syscall_entry(Int syscallno)1308 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1309 {
1310    const SyscallTableEntry* sys = NULL;
1311 
1312 #  if defined(VGO_linux)
1313    sys = ML_(get_linux_syscall_entry)( syscallno );
1314 
1315 #  elif defined(VGO_darwin)
1316    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1317 
1318    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1319    case VG_DARWIN_SYSCALL_CLASS_UNIX:
1320       if (idx >= 0 && idx < ML_(syscall_table_size) &&
1321           ML_(syscall_table)[idx].before != NULL)
1322          sys = &ML_(syscall_table)[idx];
1323          break;
1324    case VG_DARWIN_SYSCALL_CLASS_MACH:
1325       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1326           ML_(mach_trap_table)[idx].before != NULL)
1327          sys = &ML_(mach_trap_table)[idx];
1328          break;
1329    case VG_DARWIN_SYSCALL_CLASS_MDEP:
1330       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1331           ML_(mdep_trap_table)[idx].before != NULL)
1332          sys = &ML_(mdep_trap_table)[idx];
1333          break;
1334    default:
1335       vg_assert(0);
1336       break;
1337    }
1338 
1339 #  else
1340 #    error Unknown OS
1341 #  endif
1342 
1343    return sys == NULL  ? &bad_sys  : sys;
1344 }
1345 
1346 
1347 /* Add and remove signals from mask so that we end up telling the
1348    kernel the state we actually want rather than what the client
1349    wants. */
sanitize_client_sigmask(vki_sigset_t * mask)1350 static void sanitize_client_sigmask(vki_sigset_t *mask)
1351 {
1352    VG_(sigdelset)(mask, VKI_SIGKILL);
1353    VG_(sigdelset)(mask, VKI_SIGSTOP);
1354    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1355 }
1356 
1357 typedef
1358    struct {
1359       SyscallArgs   orig_args;
1360       SyscallArgs   args;
1361       SyscallStatus status;
1362       UWord         flags;
1363    }
1364    SyscallInfo;
1365 
1366 SyscallInfo syscallInfo[VG_N_THREADS];
1367 
1368 
1369 /* The scheduler needs to be able to zero out these records after a
1370    fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1371 void VG_(clear_syscallInfo) ( Int tid )
1372 {
1373    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1374    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1375    syscallInfo[tid].status.what = SsIdle;
1376 }
1377 
ensure_initialised(void)1378 static void ensure_initialised ( void )
1379 {
1380    Int i;
1381    static Bool init_done = False;
1382    if (init_done)
1383       return;
1384    init_done = True;
1385    for (i = 0; i < VG_N_THREADS; i++) {
1386       VG_(clear_syscallInfo)( i );
1387    }
1388 }
1389 
1390 /* --- This is the main function of this file. --- */
1391 
VG_(client_syscall)1392 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1393 {
1394    Word                     sysno;
1395    ThreadState*             tst;
1396    const SyscallTableEntry* ent;
1397    SyscallArgLayout         layout;
1398    SyscallInfo*             sci;
1399 
1400    ensure_initialised();
1401 
1402    vg_assert(VG_(is_valid_tid)(tid));
1403    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1404    vg_assert(VG_(is_running_thread)(tid));
1405 
1406    tst = VG_(get_ThreadState)(tid);
1407 
1408    /* BEGIN ensure root thread's stack is suitably mapped */
1409    /* In some rare circumstances, we may do the syscall without the
1410       bottom page of the stack being mapped, because the stack pointer
1411       was moved down just a few instructions before the syscall
1412       instruction, and there have been no memory references since
1413       then, that would cause a call to VG_(extend_stack) to have
1414       happened.
1415 
1416       In native execution that's OK: the kernel automagically extends
1417       the stack's mapped area down to cover the stack pointer (or sp -
1418       redzone, really).  In simulated normal execution that's OK too,
1419       since any signals we get from accessing below the mapped area of
1420       the (guest's) stack lead us to VG_(extend_stack), where we
1421       simulate the kernel's stack extension logic.  But that leaves
1422       the problem of entering a syscall with the SP unmapped.  Because
1423       the kernel doesn't know that the segment immediately above SP is
1424       supposed to be a grow-down segment, it causes the syscall to
1425       fail, and thereby causes a divergence between native behaviour
1426       (syscall succeeds) and simulated behaviour (syscall fails).
1427 
1428       This is quite a rare failure mode.  It has only been seen
1429       affecting calls to sys_readlink on amd64-linux, and even then it
1430       requires a certain code sequence around the syscall to trigger
1431       it.  Here is one:
1432 
1433       extern int my_readlink ( const char* path );
1434       asm(
1435       ".text\n"
1436       ".globl my_readlink\n"
1437       "my_readlink:\n"
1438       "\tsubq    $0x1008,%rsp\n"
1439       "\tmovq    %rdi,%rdi\n"              // path is in rdi
1440       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
1441       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
1442       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
1443       "\tsyscall\n"
1444       "\taddq    $0x1008,%rsp\n"
1445       "\tret\n"
1446       ".previous\n"
1447       );
1448 
1449       For more details, see bug #156404
1450       (https://bugs.kde.org/show_bug.cgi?id=156404).
1451 
1452       The fix is actually very simple.  We simply need to call
1453       VG_(extend_stack) for this thread, handing it the lowest
1454       possible valid address for stack (sp - redzone), to ensure the
1455       pages all the way down to that address, are mapped.  Because
1456       this is a potentially expensive and frequent operation, we
1457       filter in two ways:
1458 
1459       First, only the main thread (tid=1) has a growdown stack.  So
1460       ignore all others.  It is conceivable, although highly unlikely,
1461       that the main thread exits, and later another thread is
1462       allocated tid=1, but that's harmless, I believe;
1463       VG_(extend_stack) will do nothing when applied to a non-root
1464       thread.
1465 
1466       Secondly, first call VG_(am_find_nsegment) directly, to see if
1467       the page holding (sp - redzone) is mapped correctly.  If so, do
1468       nothing.  This is almost always the case.  VG_(extend_stack)
1469       calls VG_(am_find_nsegment) twice, so this optimisation -- and
1470       that's all it is -- more or less halves the number of calls to
1471       VG_(am_find_nsegment) required.
1472 
1473       TODO: the test "seg->kind == SkAnonC" is really inadequate,
1474       because although it tests whether the segment is mapped
1475       _somehow_, it doesn't check that it has the right permissions
1476       (r,w, maybe x) ?  We could test that here, but it will also be
1477       necessary to fix the corresponding test in VG_(extend_stack).
1478 
1479       All this guff is of course Linux-specific.  Hence the ifdef.
1480    */
1481 #  if defined(VGO_linux)
1482    if (tid == 1/*ROOT THREAD*/) {
1483       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1484       NSegment const* seg = VG_(am_find_nsegment)(stackMin);
1485       if (seg && seg->kind == SkAnonC) {
1486          /* stackMin is already mapped.  Nothing to do. */
1487       } else {
1488          (void)VG_(extend_stack)( stackMin,
1489                                   tst->client_stack_szB );
1490       }
1491    }
1492 #  endif
1493    /* END ensure root thread's stack is suitably mapped */
1494 
1495    /* First off, get the syscall args and number.  This is a
1496       platform-dependent action. */
1497 
1498    sci = & syscallInfo[tid];
1499    vg_assert(sci->status.what == SsIdle);
1500 
1501    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1502 
1503    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
1504       we want to keep the originals too, just in case. */
1505    sci->args = sci->orig_args;
1506 
1507    /* Save the syscall number in the thread state in case the syscall
1508       is interrupted by a signal. */
1509    sysno = sci->orig_args.sysno;
1510 
1511    /* It's sometimes useful, as a crude debugging hack, to get a
1512       stack trace at each (or selected) syscalls. */
1513    if (0 && sysno == __NR_ioctl) {
1514       VG_(umsg)("\nioctl:\n");
1515       VG_(get_and_pp_StackTrace)(tid, 10);
1516       VG_(umsg)("\n");
1517    }
1518 
1519 #  if defined(VGO_darwin)
1520    /* Record syscall class.  But why?  Because the syscall might be
1521       interrupted by a signal, and in the signal handler (which will
1522       be m_signals.async_signalhandler) we will need to build a SysRes
1523       reflecting the syscall return result.  In order to do that we
1524       need to know the syscall class.  Hence stash it in the guest
1525       state of this thread.  This madness is not needed on Linux
1526       because it only has a single syscall return convention and so
1527       there is no ambiguity involved in converting the post-signal
1528       machine state into a SysRes. */
1529    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1530 #  endif
1531 
1532    /* The default what-to-do-next thing is hand the syscall to the
1533       kernel, so we pre-set that here.  Set .sres to something
1534       harmless looking (is irrelevant because .what is not
1535       SsComplete.) */
1536    sci->status.what = SsHandToKernel;
1537    sci->status.sres = VG_(mk_SysRes_Error)(0);
1538    sci->flags       = 0;
1539 
1540    /* Fetch the syscall's handlers.  If no handlers exist for this
1541       syscall, we are given dummy handlers which force an immediate
1542       return with ENOSYS. */
1543    ent = get_syscall_entry(sysno);
1544 
1545    /* Fetch the layout information, which tells us where in the guest
1546       state the syscall args reside.  This is a platform-dependent
1547       action.  This info is needed so that the scalar syscall argument
1548       checks (PRE_REG_READ calls) know which bits of the guest state
1549       they need to inspect. */
1550    getSyscallArgLayout( &layout );
1551 
1552    /* Make sure the tmp signal mask matches the real signal mask;
1553       sigsuspend may change this. */
1554    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1555 
1556    /* Right, we're finally ready to Party.  Call the pre-handler and
1557       see what we get back.  At this point:
1558 
1559         sci->status.what  is Unset (we don't know yet).
1560         sci->orig_args    contains the original args.
1561         sci->args         is the same as sci->orig_args.
1562         sci->flags        is zero.
1563    */
1564 
1565    PRINT("SYSCALL[%d,%d](%s) ",
1566       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1567 
1568    /* Do any pre-syscall actions */
1569    if (VG_(needs).syscall_wrapper) {
1570       UWord tmpv[8];
1571       tmpv[0] = sci->orig_args.arg1;
1572       tmpv[1] = sci->orig_args.arg2;
1573       tmpv[2] = sci->orig_args.arg3;
1574       tmpv[3] = sci->orig_args.arg4;
1575       tmpv[4] = sci->orig_args.arg5;
1576       tmpv[5] = sci->orig_args.arg6;
1577       tmpv[6] = sci->orig_args.arg7;
1578       tmpv[7] = sci->orig_args.arg8;
1579       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1580                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1581    }
1582 
1583    vg_assert(ent);
1584    vg_assert(ent->before);
1585    (ent->before)( tid,
1586                   &layout,
1587                   &sci->args, &sci->status, &sci->flags );
1588 
1589    /* The pre-handler may have modified:
1590          sci->args
1591          sci->status
1592          sci->flags
1593       All else remains unchanged.
1594       Although the args may be modified, pre handlers are not allowed
1595       to change the syscall number.
1596    */
1597    /* Now we proceed according to what the pre-handler decided. */
1598    vg_assert(sci->status.what == SsHandToKernel
1599              || sci->status.what == SsComplete);
1600    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1601 
1602    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1603       /* The pre-handler completed the syscall itself, declaring
1604          success. */
1605       if (sci->flags & SfNoWriteResult) {
1606          PRINT(" --> [pre-success] NoWriteResult");
1607       } else {
1608          PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
1609                (ULong)sr_ResHI(sci->status.sres),
1610                (ULong)sr_Res(sci->status.sres));
1611       }
1612       /* In this case the allowable flags are to ask for a signal-poll
1613          and/or a yield after the call.  Changing the args isn't
1614          allowed. */
1615       vg_assert(0 == (sci->flags
1616                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1617       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1618    }
1619 
1620    else
1621    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1622       /* The pre-handler decided to fail syscall itself. */
1623       PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
1624       /* In this case, the pre-handler is also allowed to ask for the
1625          post-handler to be run anyway.  Changing the args is not
1626          allowed. */
1627       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1628       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1629    }
1630 
1631    else
1632    if (sci->status.what != SsHandToKernel) {
1633       /* huh?! */
1634       vg_assert(0);
1635    }
1636 
1637    else /* (sci->status.what == HandToKernel) */ {
1638       /* Ok, this is the usual case -- and the complicated one.  There
1639          are two subcases: sync and async.  async is the general case
1640          and is to be used when there is any possibility that the
1641          syscall might block [a fact that the pre-handler must tell us
1642          via the sci->flags field.]  Because the tidying-away /
1643          context-switch overhead of the async case could be large, if
1644          we are sure that the syscall will not block, we fast-track it
1645          by doing it directly in this thread, which is a lot
1646          simpler. */
1647 
1648       /* Check that the given flags are allowable: MayBlock, PollAfter
1649          and PostOnFail are ok. */
1650       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1651 
1652       if (sci->flags & SfMayBlock) {
1653 
1654          /* Syscall may block, so run it asynchronously */
1655          vki_sigset_t mask;
1656 
1657          PRINT(" --> [async] ... \n");
1658 
1659          mask = tst->sig_mask;
1660          sanitize_client_sigmask(&mask);
1661 
1662          /* Gack.  More impedance matching.  Copy the possibly
1663             modified syscall args back into the guest state. */
1664          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1665             then this assertion is senseless:
1666               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1667             The case that exposed it was sys_posix_spawn on Darwin,
1668             which heavily modifies its arguments but then lets the call
1669             go through anyway, with SfToBlock set, hence we end up here. */
1670          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1671 
1672          /* Drop the bigLock */
1673          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1674          /* Urr.  We're now in a race against other threads trying to
1675             acquire the bigLock.  I guess that doesn't matter provided
1676             that do_syscall_for_client only touches thread-local
1677             state. */
1678 
1679          /* Do the call, which operates directly on the guest state,
1680             not on our abstracted copies of the args/result. */
1681          do_syscall_for_client(sysno, tst, &mask);
1682 
1683          /* do_syscall_for_client may not return if the syscall was
1684             interrupted by a signal.  In that case, flow of control is
1685             first to m_signals.async_sighandler, which calls
1686             VG_(fixup_guest_state_after_syscall_interrupted), which
1687             fixes up the guest state, and possibly calls
1688             VG_(post_syscall).  Once that's done, control drops back
1689             to the scheduler.  */
1690 
1691          /* Darwin: do_syscall_for_client may not return if the
1692             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1693             responded by starting the thread at wqthread_hijack(reuse=1)
1694             (to run another workqueue item). In that case, wqthread_hijack
1695             calls ML_(wqthread_continue), which is similar to
1696             VG_(fixup_guest_state_after_syscall_interrupted). */
1697 
1698          /* Reacquire the lock */
1699          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1700 
1701          /* Even more impedance matching.  Extract the syscall status
1702             from the guest state. */
1703          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1704          vg_assert(sci->status.what == SsComplete);
1705 
1706          /* Be decorative, if required. */
1707          if (VG_(clo_trace_syscalls)) {
1708             Bool failed = sr_isError(sci->status.sres);
1709             if (failed) {
1710                PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
1711                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1712                      (ULong)sr_Err(sci->status.sres));
1713             } else {
1714                PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
1715                      "Success(0x%llx:0x%llx)",
1716                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1717                      (ULong)sr_ResHI(sci->status.sres),
1718                      (ULong)sr_Res(sci->status.sres) );
1719             }
1720          }
1721 
1722       } else {
1723 
1724          /* run the syscall directly */
1725          /* The pre-handler may have modified the syscall args, but
1726             since we're passing values in ->args directly to the
1727             kernel, there's no point in flushing them back to the
1728             guest state.  Indeed doing so could be construed as
1729             incorrect. */
1730          SysRes sres
1731             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1732                                      sci->args.arg3, sci->args.arg4,
1733                                      sci->args.arg5, sci->args.arg6,
1734                                      sci->args.arg7, sci->args.arg8 );
1735          sci->status = convert_SysRes_to_SyscallStatus(sres);
1736 
1737          /* Be decorative, if required. */
1738          if (VG_(clo_trace_syscalls)) {
1739             Bool failed = sr_isError(sci->status.sres);
1740             if (failed) {
1741                PRINT("[sync] --> Failure(0x%llx)",
1742                      (ULong)sr_Err(sci->status.sres) );
1743             } else {
1744                PRINT("[sync] --> Success(0x%llx:0x%llx)",
1745                      (ULong)sr_ResHI(sci->status.sres),
1746                      (ULong)sr_Res(sci->status.sres) );
1747             }
1748          }
1749       }
1750    }
1751 
1752    vg_assert(sci->status.what == SsComplete);
1753 
1754    vg_assert(VG_(is_running_thread)(tid));
1755 
1756    /* Dump the syscall result back in the guest state.  This is
1757       a platform-specific action. */
1758    if (!(sci->flags & SfNoWriteResult))
1759       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1760 
1761    /* Situation now:
1762       - the guest state is now correctly modified following the syscall
1763       - modified args, original args and syscall status are still
1764         available in the syscallInfo[] entry for this syscall.
1765 
1766       Now go on to do the post-syscall actions (read on down ..)
1767    */
1768    PRINT(" ");
1769    VG_(post_syscall)(tid);
1770    PRINT("\n");
1771 }
1772 
1773 
1774 /* Perform post syscall actions.  The expected state on entry is
1775    precisely as at the end of VG_(client_syscall), that is:
1776 
1777    - guest state up to date following the syscall
1778    - modified args, original args and syscall status are still
1779      available in the syscallInfo[] entry for this syscall.
1780    - syscall status matches what's in the guest state.
1781 
1782    There are two ways to get here: the normal way -- being called by
1783    VG_(client_syscall), and the unusual way, from
1784    VG_(fixup_guest_state_after_syscall_interrupted).
1785    Darwin: there's a third way, ML_(wqthread_continue).
1786 */
VG_(post_syscall)1787 void VG_(post_syscall) (ThreadId tid)
1788 {
1789    SyscallInfo*             sci;
1790    const SyscallTableEntry* ent;
1791    SyscallStatus            test_status;
1792    ThreadState*             tst;
1793    Word sysno;
1794 
1795    /* Preliminaries */
1796    vg_assert(VG_(is_valid_tid)(tid));
1797    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1798    vg_assert(VG_(is_running_thread)(tid));
1799 
1800    tst = VG_(get_ThreadState)(tid);
1801    sci = & syscallInfo[tid];
1802 
1803    /* m_signals.sigvgkill_handler might call here even when not in
1804       a syscall. */
1805    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1806       sci->status.what = SsIdle;
1807       return;
1808    }
1809 
1810    /* Validate current syscallInfo entry.  In particular we require
1811       that the current .status matches what's actually in the guest
1812       state.  At least in the normal case where we have actually
1813       previously written the result into the guest state. */
1814    vg_assert(sci->status.what == SsComplete);
1815 
1816    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1817    if (!(sci->flags & SfNoWriteResult))
1818       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1819    /* Failure of the above assertion on Darwin can indicate a problem
1820       in the syscall wrappers that pre-fail or pre-succeed the
1821       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1822       when they really should call SET_STATUS_from_SysRes.  The former
1823       create a UNIX-class syscall result on Darwin, which may not be
1824       correct for the syscall; if that's the case then this assertion
1825       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
1826       non-Darwin platforms this assertion is should never fail, and this
1827       comment is completely irrelevant. */
1828    /* Ok, looks sane */
1829 
1830    /* Get the system call number.  Because the pre-handler isn't
1831       allowed to mess with it, it should be the same for both the
1832       original and potentially-modified args. */
1833    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1834    sysno = sci->args.sysno;
1835    ent = get_syscall_entry(sysno);
1836 
1837    /* pre: status == Complete (asserted above) */
1838    /* Consider either success or failure.  Now run the post handler if:
1839       - it exists, and
1840       - Success or (Failure and PostOnFail is set)
1841    */
1842    if (ent->after
1843        && ((!sr_isError(sci->status.sres))
1844            || (sr_isError(sci->status.sres)
1845                && (sci->flags & SfPostOnFail) ))) {
1846 
1847       (ent->after)( tid, &sci->args, &sci->status );
1848    }
1849 
1850    /* Because the post handler might have changed the status (eg, the
1851       post-handler for sys_open can change the result from success to
1852       failure if the kernel supplied a fd that it doesn't like), once
1853       again dump the syscall result back in the guest state.*/
1854    if (!(sci->flags & SfNoWriteResult))
1855       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1856 
1857    /* Do any post-syscall actions required by the tool. */
1858    if (VG_(needs).syscall_wrapper) {
1859       UWord tmpv[8];
1860       tmpv[0] = sci->orig_args.arg1;
1861       tmpv[1] = sci->orig_args.arg2;
1862       tmpv[2] = sci->orig_args.arg3;
1863       tmpv[3] = sci->orig_args.arg4;
1864       tmpv[4] = sci->orig_args.arg5;
1865       tmpv[5] = sci->orig_args.arg6;
1866       tmpv[6] = sci->orig_args.arg7;
1867       tmpv[7] = sci->orig_args.arg8;
1868       VG_TDICT_CALL(tool_post_syscall, tid,
1869                     sysno,
1870                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1871                     sci->status.sres);
1872    }
1873 
1874    /* The syscall is done. */
1875    vg_assert(sci->status.what == SsComplete);
1876    sci->status.what = SsIdle;
1877 
1878    /* The pre/post wrappers may have concluded that pending signals
1879       might have been created, and will have set SfPollAfter to
1880       request a poll for them once the syscall is done. */
1881    if (sci->flags & SfPollAfter)
1882       VG_(poll_signals)(tid);
1883 
1884    /* Similarly, the wrappers might have asked for a yield
1885       afterwards. */
1886    if (sci->flags & SfYieldAfter)
1887       VG_(vg_yield)();
1888 }
1889 
1890 
1891 /* ---------------------------------------------------------------------
1892    Dealing with syscalls which get interrupted by a signal:
1893    VG_(fixup_guest_state_after_syscall_interrupted)
1894    ------------------------------------------------------------------ */
1895 
1896 /* Syscalls done on behalf of the client are finally handed off to the
1897    kernel in VG_(client_syscall) above, either by calling
1898    do_syscall_for_client (the async case), or by calling
1899    VG_(do_syscall6) (the sync case).
1900 
1901    If the syscall is not interrupted by a signal (it may block and
1902    later unblock, but that's irrelevant here) then those functions
1903    eventually return and so control is passed to VG_(post_syscall).
1904    NB: not sure if the sync case can actually get interrupted, as it
1905    operates with all signals masked.
1906 
1907    However, the syscall may get interrupted by an async-signal.  In
1908    that case do_syscall_for_client/VG_(do_syscall6) do not
1909    return.  Instead we wind up in m_signals.async_sighandler.  We need
1910    to fix up the guest state to make it look like the syscall was
1911    interrupted for guest.  So async_sighandler calls here, and this
1912    does the fixup.  Note that from here we wind up calling
1913    VG_(post_syscall) too.
1914 */
1915 
1916 
1917 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
1918    syscall-$PLAT.S for details.
1919 */
1920 #if defined(VGO_linux)
1921   extern const Addr ML_(blksys_setup);
1922   extern const Addr ML_(blksys_restart);
1923   extern const Addr ML_(blksys_complete);
1924   extern const Addr ML_(blksys_committed);
1925   extern const Addr ML_(blksys_finished);
1926 #elif defined(VGO_darwin)
1927   /* Darwin requires extra uglyness */
1928   extern const Addr ML_(blksys_setup_MACH);
1929   extern const Addr ML_(blksys_restart_MACH);
1930   extern const Addr ML_(blksys_complete_MACH);
1931   extern const Addr ML_(blksys_committed_MACH);
1932   extern const Addr ML_(blksys_finished_MACH);
1933   extern const Addr ML_(blksys_setup_MDEP);
1934   extern const Addr ML_(blksys_restart_MDEP);
1935   extern const Addr ML_(blksys_complete_MDEP);
1936   extern const Addr ML_(blksys_committed_MDEP);
1937   extern const Addr ML_(blksys_finished_MDEP);
1938   extern const Addr ML_(blksys_setup_UNIX);
1939   extern const Addr ML_(blksys_restart_UNIX);
1940   extern const Addr ML_(blksys_complete_UNIX);
1941   extern const Addr ML_(blksys_committed_UNIX);
1942   extern const Addr ML_(blksys_finished_UNIX);
1943 #else
1944 # error "Unknown OS"
1945 #endif
1946 
1947 
1948 /* Back up guest state to restart a system call. */
1949 
ML_(fixup_guest_state_to_restart_syscall)1950 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1951 {
1952 #if defined(VGP_x86_linux)
1953    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
1954 
1955    /* Make sure our caller is actually sane, and we're really backing
1956       back over a syscall.
1957 
1958       int $0x80 == CD 80
1959    */
1960    {
1961       UChar *p = (UChar *)arch->vex.guest_EIP;
1962 
1963       if (p[0] != 0xcd || p[1] != 0x80)
1964          VG_(message)(Vg_DebugMsg,
1965                       "?! restarting over syscall at %#x %02x %02x\n",
1966                       arch->vex.guest_EIP, p[0], p[1]);
1967 
1968       vg_assert(p[0] == 0xcd && p[1] == 0x80);
1969    }
1970 
1971 #elif defined(VGP_amd64_linux)
1972    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
1973 
1974    /* Make sure our caller is actually sane, and we're really backing
1975       back over a syscall.
1976 
1977       syscall == 0F 05
1978    */
1979    {
1980       UChar *p = (UChar *)arch->vex.guest_RIP;
1981 
1982       if (p[0] != 0x0F || p[1] != 0x05)
1983          VG_(message)(Vg_DebugMsg,
1984                       "?! restarting over syscall at %#llx %02x %02x\n",
1985                       arch->vex.guest_RIP, p[0], p[1]);
1986 
1987       vg_assert(p[0] == 0x0F && p[1] == 0x05);
1988    }
1989 
1990 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
1991    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
1992 
1993    /* Make sure our caller is actually sane, and we're really backing
1994       back over a syscall.
1995 
1996       sc == 44 00 00 02
1997    */
1998    {
1999       UChar *p = (UChar *)arch->vex.guest_CIA;
2000 
2001       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
2002          VG_(message)(Vg_DebugMsg,
2003                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2004                       arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
2005 
2006       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2007    }
2008 
2009 #elif defined(VGP_arm_linux)
2010    if (arch->vex.guest_R15T & 1) {
2011       // Thumb mode.  SVC is a encoded as
2012       //   1101 1111 imm8
2013       // where imm8 is the SVC number, and we only accept 0.
2014       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
2015       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
2016       Bool   valid = p[0] == 0 && p[1] == 0xDF;
2017       if (!valid) {
2018          VG_(message)(Vg_DebugMsg,
2019                       "?! restarting over (Thumb) syscall that is not syscall "
2020                       "at %#llx %02x %02x\n",
2021                       arch->vex.guest_R15T - 1ULL, p[0], p[1]);
2022       }
2023       vg_assert(valid);
2024       // FIXME: NOTE, this really isn't right.  We need to back up
2025       // ITSTATE to what it was before the SVC instruction, but we
2026       // don't know what it was.  At least assert that it is now
2027       // zero, because if it is nonzero then it must also have
2028       // been nonzero for the SVC itself, which means it was
2029       // conditional.  Urk.
2030       vg_assert(arch->vex.guest_ITSTATE == 0);
2031    } else {
2032       // ARM mode.  SVC is encoded as
2033       //   cond 1111 imm24
2034       // where imm24 is the SVC number, and we only accept 0.
2035       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
2036       UChar* p     = (UChar*)arch->vex.guest_R15T;
2037       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
2038                      && (p[3] & 0xF) == 0xF;
2039       if (!valid) {
2040          VG_(message)(Vg_DebugMsg,
2041                       "?! restarting over (ARM) syscall that is not syscall "
2042                       "at %#llx %02x %02x %02x %02x\n",
2043                       arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
2044       }
2045       vg_assert(valid);
2046    }
2047 
2048 #elif defined(VGP_arm64_linux)
2049    arch->vex.guest_PC -= 4;             // sizeof(arm64 instr)
2050 
2051    /* Make sure our caller is actually sane, and we're really backing
2052       back over a syscall.
2053 
2054       svc #0 == d4 00 00 01
2055    */
2056    {
2057       UChar *p = (UChar *)arch->vex.guest_PC;
2058 
2059       if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4)
2060          VG_(message)(
2061             Vg_DebugMsg,
2062             "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2063             arch->vex.guest_PC + 0ULL, p[0], p[1], p[2], p[3]
2064           );
2065 
2066       vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4);
2067    }
2068 
2069 #elif defined(VGP_x86_darwin)
2070    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
2071 
2072    /* Make sure our caller is actually sane, and we're really backing
2073       back over a syscall.
2074 
2075       int $0x80 == CD 80
2076       int $0x81 == CD 81
2077       int $0x82 == CD 82
2078       sysenter  == 0F 34
2079    */
2080    {
2081        UChar *p = (UChar *)arch->vex.guest_EIP;
2082        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
2083                   || (p[0] == 0xCD && p[1] == 0x81)
2084                   || (p[0] == 0xCD && p[1] == 0x82)
2085                   || (p[0] == 0x0F && p[1] == 0x34);
2086        if (!ok)
2087            VG_(message)(Vg_DebugMsg,
2088                         "?! restarting over syscall at %#x %02x %02x\n",
2089                         arch->vex.guest_EIP, p[0], p[1]);
2090        vg_assert(ok);
2091    }
2092 
2093 #elif defined(VGP_amd64_darwin)
2094    // DDD: #warning GrP fixme amd64 restart unimplemented
2095    vg_assert(0);
2096 
2097 #elif defined(VGP_s390x_linux)
2098    arch->vex.guest_IA -= 2;             // sizeof(syscall)
2099 
2100    /* Make sure our caller is actually sane, and we're really backing
2101       back over a syscall.
2102 
2103       syscall == 0A <num>
2104    */
2105    {
2106       UChar *p = (UChar *)arch->vex.guest_IA;
2107       if (p[0] != 0x0A)
2108          VG_(message)(Vg_DebugMsg,
2109                       "?! restarting over syscall at %#llx %02x %02x\n",
2110                       arch->vex.guest_IA, p[0], p[1]);
2111 
2112       vg_assert(p[0] == 0x0A);
2113    }
2114 
2115 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
2116 
2117    arch->vex.guest_PC -= 4;             // sizeof(mips instr)
2118 
2119    /* Make sure our caller is actually sane, and we're really backing
2120       back over a syscall.
2121 
2122       syscall == 00 00 00 0C
2123       big endian
2124       syscall == 0C 00 00 00
2125    */
2126    {
2127       UChar *p = (UChar *)(arch->vex.guest_PC);
2128 #     if defined (VG_LITTLEENDIAN)
2129       if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
2130          VG_(message)(Vg_DebugMsg,
2131                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2132                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2133 
2134       vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
2135 #     elif defined (VG_BIGENDIAN)
2136       if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
2137          VG_(message)(Vg_DebugMsg,
2138                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2139                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2140 
2141       vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
2142 #     else
2143 #        error "Unknown endianness"
2144 #     endif
2145    }
2146 
2147 #else
2148 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2149 #endif
2150 }
2151 
2152 
2153 /*
2154    Fix up the guest state when a syscall is interrupted by a signal
2155    and so has been forced to return 'sysret'.
2156 
2157    To do this, we determine the precise state of the syscall by
2158    looking at the (real) IP at the time the signal happened.  The
2159    syscall sequence looks like:
2160 
2161      1. unblock signals
2162      2. perform syscall
2163      3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
2164      4. re-block signals
2165 
2166    If a signal
2167    happens at      Then     Why?
2168    [1-2)           restart  nothing has happened (restart syscall)
2169    [2]             restart  syscall hasn't started, or kernel wants to restart
2170    [2-3)           save     syscall complete, but results not saved
2171    [3-4)           syscall complete, results saved
2172 
2173    Sometimes we never want to restart an interrupted syscall (because
2174    sigaction says not to), so we only restart if "restart" is True.
2175 
2176    This will also call VG_(post_syscall) if the syscall has actually
2177    completed (either because it was interrupted, or because it
2178    actually finished).  It will not call VG_(post_syscall) if the
2179    syscall is set up for restart, which means that the pre-wrapper may
2180    get called multiple times.
2181 */
2182 
2183 void
VG_(fixup_guest_state_after_syscall_interrupted)2184 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2185                                                   Addr     ip,
2186                                                   SysRes   sres,
2187                                                   Bool     restart)
2188 {
2189    /* Note that we don't know the syscall number here, since (1) in
2190       general there's no reliable way to get hold of it short of
2191       stashing it in the guest state before the syscall, and (2) in
2192       any case we don't need to know it for the actions done by this
2193       routine.
2194 
2195       Furthermore, 'sres' is only used in the case where the syscall
2196       is complete, but the result has not been committed to the guest
2197       state yet.  In any other situation it will be meaningless and
2198       therefore ignored. */
2199 
2200    ThreadState*     tst;
2201    SyscallStatus    canonical;
2202    ThreadArchState* th_regs;
2203    SyscallInfo*     sci;
2204 
2205    /* Compute some Booleans indicating which range we're in. */
2206    Bool outside_range,
2207         in_setup_to_restart,      // [1,2) in the .S files
2208         at_restart,               // [2]   in the .S files
2209         in_complete_to_committed, // [3,4) in the .S files
2210         in_committed_to_finished; // [4,5) in the .S files
2211 
2212 #  if defined(VGO_linux)
2213    outside_range
2214       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2215    in_setup_to_restart
2216       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2217    at_restart
2218       = ip == ML_(blksys_restart);
2219    in_complete_to_committed
2220       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2221    in_committed_to_finished
2222       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2223 #  elif defined(VGO_darwin)
2224    outside_range
2225       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2226       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2227       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2228    in_setup_to_restart
2229       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2230       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2231       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2232    at_restart
2233       =  (ip == ML_(blksys_restart_MACH))
2234       || (ip == ML_(blksys_restart_MDEP))
2235       || (ip == ML_(blksys_restart_UNIX));
2236    in_complete_to_committed
2237       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2238       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2239       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2240    in_committed_to_finished
2241       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2242       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2243       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2244    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
2245 #  else
2246 #    error "Unknown OS"
2247 #  endif
2248 
2249    if (VG_(clo_trace_signals))
2250       VG_(message)( Vg_DebugMsg,
2251                     "interrupted_syscall: tid=%d, ip=0x%llx, "
2252                     "restart=%s, sres.isErr=%s, sres.val=%lld\n",
2253                     (Int)tid,
2254                     (ULong)ip,
2255                     restart ? "True" : "False",
2256                     sr_isError(sres) ? "True" : "False",
2257                     (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2258 
2259    vg_assert(VG_(is_valid_tid)(tid));
2260    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2261    vg_assert(VG_(is_running_thread)(tid));
2262 
2263    tst     = VG_(get_ThreadState)(tid);
2264    th_regs = &tst->arch;
2265    sci     = & syscallInfo[tid];
2266 
2267    /* Figure out what the state of the syscall was by examining the
2268       (real) IP at the time of the signal, and act accordingly. */
2269    if (outside_range) {
2270       if (VG_(clo_trace_signals))
2271          VG_(message)( Vg_DebugMsg,
2272                        "  not in syscall at all: hmm, very suspicious\n" );
2273       /* Looks like we weren't in a syscall at all.  Hmm. */
2274       vg_assert(sci->status.what != SsIdle);
2275       return;
2276    }
2277 
2278    /* We should not be here unless this thread had first started up
2279       the machinery for a syscall by calling VG_(client_syscall).
2280       Hence: */
2281    vg_assert(sci->status.what != SsIdle);
2282 
2283    /* now, do one of four fixup actions, depending on where the IP has
2284       got to. */
2285 
2286    if (in_setup_to_restart) {
2287       /* syscall hasn't even started; go around again */
2288       if (VG_(clo_trace_signals))
2289          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
2290       vg_assert(sci->status.what == SsHandToKernel);
2291       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2292    }
2293 
2294    else
2295    if (at_restart) {
2296       /* We're either about to run the syscall, or it was interrupted
2297          and the kernel restarted it.  Restart if asked, otherwise
2298          EINTR it. */
2299       if (restart) {
2300          if (VG_(clo_trace_signals))
2301             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
2302          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2303       } else {
2304          if (VG_(clo_trace_signals))
2305             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
2306          canonical = convert_SysRes_to_SyscallStatus(
2307                         VG_(mk_SysRes_Error)( VKI_EINTR )
2308                      );
2309          if (!(sci->flags & SfNoWriteResult))
2310             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2311          sci->status = canonical;
2312          VG_(post_syscall)(tid);
2313       }
2314    }
2315 
2316    else
2317    if (in_complete_to_committed) {
2318       /* Syscall complete, but result hasn't been written back yet.
2319          Write the SysRes we were supplied with back to the guest
2320          state. */
2321       if (VG_(clo_trace_signals))
2322          VG_(message)( Vg_DebugMsg,
2323                        "  completed, but uncommitted: committing\n");
2324       canonical = convert_SysRes_to_SyscallStatus( sres );
2325       if (!(sci->flags & SfNoWriteResult))
2326          putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2327       sci->status = canonical;
2328       VG_(post_syscall)(tid);
2329    }
2330 
2331    else
2332    if (in_committed_to_finished) {
2333       /* Result committed, but the signal mask has not been restored;
2334          we expect our caller (the signal handler) will have fixed
2335          this up. */
2336       if (VG_(clo_trace_signals))
2337          VG_(message)( Vg_DebugMsg,
2338                        "  completed and committed: nothing to do\n");
2339       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2340       vg_assert(sci->status.what == SsComplete);
2341       VG_(post_syscall)(tid);
2342    }
2343 
2344    else
2345       VG_(core_panic)("?? strange syscall interrupt state?");
2346 
2347    /* In all cases, the syscall is now finished (even if we called
2348       ML_(fixup_guest_state_to_restart_syscall), since that just
2349       re-positions the guest's IP for another go at it).  So we need
2350       to record that fact. */
2351    sci->status.what = SsIdle;
2352 }
2353 
2354 
2355 #if defined(VGO_darwin)
2356 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2357 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2358 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2359 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2360 {
2361    ThreadState*     tst;
2362    SyscallInfo*     sci;
2363 
2364    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2365 
2366    PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2367          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2368 
2369    vg_assert(VG_(is_valid_tid)(tid));
2370    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2371    vg_assert(VG_(is_running_thread)(tid));
2372 
2373    tst     = VG_(get_ThreadState)(tid);
2374    sci     = & syscallInfo[tid];
2375    vg_assert(sci->status.what != SsIdle);
2376    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
2377 
2378    // Pretend the syscall completed normally, but don't touch the thread state.
2379    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2380    sci->flags |= SfNoWriteResult;
2381    VG_(post_syscall)(tid);
2382 
2383    sci->status.what = SsIdle;
2384 
2385    vg_assert(tst->sched_jmpbuf_valid);
2386    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2387 
2388    /* NOTREACHED */
2389    vg_assert(0);
2390 }
2391 #endif
2392 
2393 
2394 /* ---------------------------------------------------------------------
2395    A place to store the where-to-call-when-really-done pointer
2396    ------------------------------------------------------------------ */
2397 
2398 // When the final thread is done, where shall I call to shutdown the
2399 // system cleanly?  Is set once at startup (in m_main) and never
2400 // changes after that.  Is basically a pointer to the exit
2401 // continuation.  This is all just a nasty hack to avoid calling
2402 // directly from m_syswrap to m_main at exit, since that would cause
2403 // m_main to become part of a module cycle, which is silly.
2404 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2405        (ThreadId,VgSchedReturnCode)
2406    = NULL;
2407 
2408 /*--------------------------------------------------------------------*/
2409 /*--- end                                                          ---*/
2410 /*--------------------------------------------------------------------*/
2411