• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls.                          syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2017 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_threadstate.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcproc.h"      // For VG_(getpid)()
42 #include "pub_core_libcsignal.h"
43 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
44                                     //   and VG_(vg_yield)
45 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
46 #include "pub_core_tooliface.h"
47 #include "pub_core_options.h"
48 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
49 #include "pub_core_syscall.h"
50 #include "pub_core_machine.h"
51 #include "pub_core_mallocfree.h"
52 #include "pub_core_syswrap.h"
53 #include "pub_core_gdbserver.h"     // VG_(gdbserver_report_syscall)
54 
55 #include "priv_types_n_macros.h"
56 #include "priv_syswrap-main.h"
57 
58 #if defined(VGO_darwin)
59 #include "priv_syswrap-darwin.h"
60 #endif
61 
62 /* Useful info which needs to be recorded somewhere:
63    Use of registers in syscalls is:
64 
65           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
66    LINUX:
67    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
68    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
69    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
70    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
71    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
72    mips32 v0    a0   a1   a2   a3 stack stack n/a  n/a  v0        (== NUM)
73    mips64 v0    a0   a1   a2   a3   a4   a5   a6   a7   v0        (== NUM)
74    arm64  x8    x0   x1   x2   x3   x4   x5   n/a  n/a  x0 ??     (== ARG1??)
75 
76    On s390x the svc instruction is used for system calls. The system call
77    number is encoded in the instruction (8 bit immediate field). Since Linux
78    2.6 it is also allowed to use svc 0 with the system call number in r1.
79    This was introduced for system calls >255, but works for all. It is
80    also possible to see the svc 0 together with an EXecute instruction, that
81    fills in the immediate field.
82    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
83 
84           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
85    DARWIN:
86    x86    eax   +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
87    amd64  rax   rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
88 
89    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
90    amd64-darwin.  Apparently 0(%esp) is some kind of return address
91    (perhaps for syscalls done with "sysenter"?)  I don't think it is
92    relevant for syscalls done with "int $0x80/1/2".
93 
94    SOLARIS:
95    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
96    amd64  rax rdi  rsi  rdx  r10  r8   r9   +8   +16  rdx:rax, rflags.c
97 
98    "+N" denotes "in memory at N(%esp)". Solaris also supports fasttrap
99    syscalls. Fasttraps do not take any parameters (except of the sysno in eax)
100    and never fail (if the sysno is valid).
101 */
102 
103 /* This is the top level of the system-call handler module.  All
104    system calls are channelled through here, doing two things:
105 
106    * notify the tool of the events (mem/reg reads, writes) happening
107 
108    * perform the syscall, usually by passing it along to the kernel
109      unmodified.
110 
111    A magical piece of assembly code, do_syscall_for_client_WRK, in
112    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
113    kernel, whilst having the simulator retain control.
114 */
115 
116 /* The main function is VG_(client_syscall).  The simulation calls it
117    whenever a client thread wants to do a syscall.  The following is a
118    sketch of what it does.
119 
120    * Ensures the root thread's stack is suitably mapped.  Tedious and
121      arcane.  See big big comment in VG_(client_syscall).
122 
123    * First, it rounds up the syscall number and args (which is a
124      platform dependent activity) and puts them in a struct ("args")
125      and also a copy in "orig_args".
126 
127      The pre/post wrappers refer to these structs and so no longer
128      need magic macros to access any specific registers.  This struct
129      is stored in thread-specific storage.
130 
131 
132    * The pre-wrapper is called, passing it a pointer to struct
133      "args".
134 
135 
136    * The pre-wrapper examines the args and pokes the tool
137      appropriately.  It may modify the args; this is why "orig_args"
138      is also stored.
139 
140      The pre-wrapper may choose to 'do' the syscall itself, and
141      concludes one of three outcomes:
142 
143        Success(N)    -- syscall is already complete, with success;
144                         result is N
145 
146        Fail(N)       -- syscall is already complete, with failure;
147                         error code is N
148 
149        HandToKernel  -- (the usual case): this needs to be given to
150                         the kernel to be done, using the values in
151                         the possibly-modified "args" struct.
152 
153      In addition, the pre-wrapper may set some flags:
154 
155        MayBlock   -- only applicable when outcome==HandToKernel
156 
157        PostOnFail -- only applicable when outcome==HandToKernel or Fail
158 
159 
160    * If the pre-outcome is HandToKernel, the syscall is duly handed
161      off to the kernel (perhaps involving some thread switchery, but
162      that's not important).  This reduces the possible set of outcomes
163      to either Success(N) or Fail(N).
164 
165 
166    * The outcome (Success(N) or Fail(N)) is written back to the guest
167      register(s).  This is platform specific:
168 
169      x86:    Success(N) ==>  eax = N
170              Fail(N)    ==>  eax = -N
171 
172      ditto amd64
173 
174      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
175              Fail(N) ==>     r3 = N, CR0.SO = 1
176 
177      Darwin:
178      x86:    Success(N) ==>  edx:eax = N, cc = 0
179              Fail(N)    ==>  edx:eax = N, cc = 1
180 
181      s390x:  Success(N) ==>  r2 = N
182              Fail(N)    ==>  r2 = -N
183 
184      Solaris:
185      x86:    Success(N) ==>  edx:eax = N, cc = 0
186              Fail(N)    ==>      eax = N, cc = 1
187      Same applies for fasttraps except they never fail.
188 
189    * The post wrapper is called if:
190 
191      - it exists, and
192      - outcome==Success or (outcome==Fail and PostOnFail is set)
193 
194      The post wrapper is passed the adulterated syscall args (struct
195      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
196 
197    There are several other complications, primarily to do with
198    syscalls getting interrupted, explained in comments in the code.
199 */
200 
201 /* CAVEATS for writing wrappers.  It is important to follow these!
202 
203    The macros defined in priv_types_n_macros.h are designed to help
204    decouple the wrapper logic from the actual representation of
205    syscall args/results, since these wrappers are designed to work on
206    multiple platforms.
207 
208    Sometimes a PRE wrapper will complete the syscall itself, without
209    handing it to the kernel.  It will use one of SET_STATUS_Success,
210    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
211    value.  It is critical to appreciate that use of the macro does not
212    immediately cause the underlying guest state to be updated -- that
213    is done by the driver logic in this file, when the wrapper returns.
214 
215    As a result, PRE wrappers of the following form will malfunction:
216 
217    PRE(fooble)
218    {
219       ... do stuff ...
220       SET_STATUS_Somehow(...)
221 
222       // do something that assumes guest state is up to date
223    }
224 
225    In particular, direct or indirect calls to VG_(poll_signals) after
226    setting STATUS can cause the guest state to be read (in order to
227    build signal frames).  Do not do this.  If you want a signal poll
228    after the syscall goes through, do "*flags |= SfPollAfter" and the
229    driver logic will do it for you.
230 
231    -----------
232 
233    Another critical requirement following introduction of new address
234    space manager (JRS, 20050923):
235 
236    In a situation where the mappedness of memory has changed, aspacem
237    should be notified BEFORE the tool.  Hence the following is
238    correct:
239 
240       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
241       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
242       if (d)
243          VG_(discard_translations)(s->start, s->end+1 - s->start);
244 
245    whilst this is wrong:
246 
247       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
248       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
249       if (d)
250          VG_(discard_translations)(s->start, s->end+1 - s->start);
251 
252    The reason is that the tool may itself ask aspacem for more shadow
253    memory as a result of the VG_TRACK call.  In such a situation it is
254    critical that aspacem's segment array is up to date -- hence the
255    need to notify aspacem first.
256 
257    -----------
258 
259    Also .. take care to call VG_(discard_translations) whenever
260    memory with execute permissions is unmapped.
261 */
262 
263 
264 /* ---------------------------------------------------------------------
265    Do potentially blocking syscall for the client, and mess with
266    signal masks at the same time.
267    ------------------------------------------------------------------ */
268 
269 /* Perform a syscall on behalf of a client thread, using a specific
270    signal mask.  On completion, the signal mask is set to restore_mask
271    (which presumably blocks almost everything).  If a signal happens
272    during the syscall, the handler should call
273    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
274    thread's context to do the right thing.
275 
276    The _WRK function is handwritten assembly, implemented per-platform
277    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
278    properties.  See comments at the top of
279    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
280 
281    This function (these functions) are required to return zero in case
282    of success (even if the syscall itself failed), and nonzero if the
283    sigprocmask-swizzling calls failed.  We don't actually care about
284    the failure values from sigprocmask, although most of the assembly
285    implementations do attempt to return that, using the convention
286    0 for success, or 0x8000 | error-code for failure.
287 */
288 #if defined(VGO_linux)
289 extern
290 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
291                                       void* guest_state,
292                                       const vki_sigset_t *syscall_mask,
293                                       const vki_sigset_t *restore_mask,
294                                       Word sigsetSzB );
295 #elif defined(VGO_darwin)
296 extern
297 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
298                                            void* guest_state,
299                                            const vki_sigset_t *syscall_mask,
300                                            const vki_sigset_t *restore_mask,
301                                            Word sigsetSzB ); /* unused */
302 extern
303 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
304                                            void* guest_state,
305                                            const vki_sigset_t *syscall_mask,
306                                            const vki_sigset_t *restore_mask,
307                                            Word sigsetSzB ); /* unused */
308 extern
309 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
310                                            void* guest_state,
311                                            const vki_sigset_t *syscall_mask,
312                                            const vki_sigset_t *restore_mask,
313                                            Word sigsetSzB ); /* unused */
314 #elif defined(VGO_solaris)
315 extern
316 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
317                                       void* guest_state,
318                                       const vki_sigset_t *syscall_mask,
319                                       const vki_sigset_t *restore_mask,
320                                       UChar *cflag);
321 UWord ML_(do_syscall_for_client_dret_WRK)( Word syscallno,
322                                            void* guest_state,
323                                            const vki_sigset_t *syscall_mask,
324                                            const vki_sigset_t *restore_mask,
325                                            UChar *cflag);
326 #else
327 #  error "Unknown OS"
328 #endif
329 
330 
331 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)332 void do_syscall_for_client ( Int syscallno,
333                              ThreadState* tst,
334                              const vki_sigset_t* syscall_mask )
335 {
336    vki_sigset_t saved;
337    UWord err;
338 #  if defined(VGO_linux)
339    err = ML_(do_syscall_for_client_WRK)(
340             syscallno, &tst->arch.vex,
341             syscall_mask, &saved, sizeof(vki_sigset_t)
342          );
343 #  elif defined(VGO_darwin)
344    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
345       case VG_DARWIN_SYSCALL_CLASS_UNIX:
346          err = ML_(do_syscall_for_client_unix_WRK)(
347                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
348                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
349                );
350          break;
351       case VG_DARWIN_SYSCALL_CLASS_MACH:
352          err = ML_(do_syscall_for_client_mach_WRK)(
353                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
354                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
355                );
356          break;
357       case VG_DARWIN_SYSCALL_CLASS_MDEP:
358          err = ML_(do_syscall_for_client_mdep_WRK)(
359                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
360                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
361                );
362          break;
363       default:
364          vg_assert(0);
365          /*NOTREACHED*/
366          break;
367    }
368 #  elif defined(VGO_solaris)
369    UChar cflag;
370 
371    /* Fasttraps or anything else cannot go through this path. */
372    vg_assert(VG_SOLARIS_SYSNO_CLASS(syscallno)
373              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
374 
375    /* If the syscall is a door_return call then it has to be handled very
376       differently. */
377    if (tst->os_state.in_door_return)
378       err = ML_(do_syscall_for_client_dret_WRK)(
379                 syscallno, &tst->arch.vex,
380                 syscall_mask, &saved, &cflag
381             );
382    else
383       err = ML_(do_syscall_for_client_WRK)(
384                 syscallno, &tst->arch.vex,
385                 syscall_mask, &saved, &cflag
386             );
387 
388    /* Save the carry flag. */
389 #  if defined(VGP_x86_solaris)
390    LibVEX_GuestX86_put_eflag_c(cflag, &tst->arch.vex);
391 #  elif defined(VGP_amd64_solaris)
392    LibVEX_GuestAMD64_put_rflag_c(cflag, &tst->arch.vex);
393 #  else
394 #    error "Unknown platform"
395 #  endif
396 
397 #  else
398 #    error "Unknown OS"
399 #  endif
400    vg_assert2(
401       err == 0,
402       "ML_(do_syscall_for_client_WRK): sigprocmask error %lu",
403       err & 0xFFF
404    );
405 }
406 
407 
408 /* ---------------------------------------------------------------------
409    Impedance matchers and misc helpers
410    ------------------------------------------------------------------ */
411 
412 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)413 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
414 {
415    return a1->sysno == a2->sysno
416           && a1->arg1 == a2->arg1
417           && a1->arg2 == a2->arg2
418           && a1->arg3 == a2->arg3
419           && a1->arg4 == a2->arg4
420           && a1->arg5 == a2->arg5
421           && a1->arg6 == a2->arg6
422           && a1->arg7 == a2->arg7
423           && a1->arg8 == a2->arg8;
424 }
425 
426 static
eq_SyscallStatus(UInt sysno,SyscallStatus * s1,SyscallStatus * s2)427 Bool eq_SyscallStatus ( UInt sysno, SyscallStatus* s1, SyscallStatus* s2 )
428 {
429    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
430    if (s1->what == s2->what && sr_EQ( sysno, s1->sres, s2->sres ))
431       return True;
432 #  if defined(VGO_darwin)
433    /* Darwin-specific debugging guff */
434    vg_assert(s1->what == s2->what);
435    VG_(printf)("eq_SyscallStatus:\n");
436    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
437    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
438    vg_assert(0);
439 #  endif
440    return False;
441 }
442 
443 /* Convert between SysRes and SyscallStatus, to the extent possible. */
444 
445 static
convert_SysRes_to_SyscallStatus(SysRes res)446 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
447 {
448    SyscallStatus status;
449    status.what = SsComplete;
450    status.sres = res;
451    return status;
452 }
453 
454 
455 /* Impedance matchers.  These convert syscall arg or result data from
456    the platform-specific in-guest-state format to the canonical
457    formats, and back. */
458 
459 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)460 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
461                                     /*IN*/ VexGuestArchState* gst_vanilla,
462                                     /*IN*/ UInt trc )
463 {
464 #if defined(VGP_x86_linux)
465    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
466    canonical->sysno = gst->guest_EAX;
467    canonical->arg1  = gst->guest_EBX;
468    canonical->arg2  = gst->guest_ECX;
469    canonical->arg3  = gst->guest_EDX;
470    canonical->arg4  = gst->guest_ESI;
471    canonical->arg5  = gst->guest_EDI;
472    canonical->arg6  = gst->guest_EBP;
473    canonical->arg7  = 0;
474    canonical->arg8  = 0;
475 
476 #elif defined(VGP_amd64_linux)
477    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
478    canonical->sysno = gst->guest_RAX;
479    canonical->arg1  = gst->guest_RDI;
480    canonical->arg2  = gst->guest_RSI;
481    canonical->arg3  = gst->guest_RDX;
482    canonical->arg4  = gst->guest_R10;
483    canonical->arg5  = gst->guest_R8;
484    canonical->arg6  = gst->guest_R9;
485    canonical->arg7  = 0;
486    canonical->arg8  = 0;
487 
488 #elif defined(VGP_ppc32_linux)
489    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
490    canonical->sysno = gst->guest_GPR0;
491    canonical->arg1  = gst->guest_GPR3;
492    canonical->arg2  = gst->guest_GPR4;
493    canonical->arg3  = gst->guest_GPR5;
494    canonical->arg4  = gst->guest_GPR6;
495    canonical->arg5  = gst->guest_GPR7;
496    canonical->arg6  = gst->guest_GPR8;
497    canonical->arg7  = 0;
498    canonical->arg8  = 0;
499 
500 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
501    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
502    canonical->sysno = gst->guest_GPR0;
503    canonical->arg1  = gst->guest_GPR3;
504    canonical->arg2  = gst->guest_GPR4;
505    canonical->arg3  = gst->guest_GPR5;
506    canonical->arg4  = gst->guest_GPR6;
507    canonical->arg5  = gst->guest_GPR7;
508    canonical->arg6  = gst->guest_GPR8;
509    canonical->arg7  = 0;
510    canonical->arg8  = 0;
511 
512 #elif defined(VGP_arm_linux)
513    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
514    canonical->sysno = gst->guest_R7;
515    canonical->arg1  = gst->guest_R0;
516    canonical->arg2  = gst->guest_R1;
517    canonical->arg3  = gst->guest_R2;
518    canonical->arg4  = gst->guest_R3;
519    canonical->arg5  = gst->guest_R4;
520    canonical->arg6  = gst->guest_R5;
521    canonical->arg7  = 0;
522    canonical->arg8  = 0;
523 
524 #elif defined(VGP_arm64_linux)
525    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
526    canonical->sysno = gst->guest_X8;
527    canonical->arg1  = gst->guest_X0;
528    canonical->arg2  = gst->guest_X1;
529    canonical->arg3  = gst->guest_X2;
530    canonical->arg4  = gst->guest_X3;
531    canonical->arg5  = gst->guest_X4;
532    canonical->arg6  = gst->guest_X5;
533    canonical->arg7  = 0;
534    canonical->arg8  = 0;
535 
536 #elif defined(VGP_mips32_linux)
537    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
538    canonical->sysno = gst->guest_r2;    // v0
539    if (canonical->sysno == __NR_exit) {
540       canonical->arg1 = gst->guest_r4;    // a0
541       canonical->arg2 = 0;
542       canonical->arg3 = 0;
543       canonical->arg4 = 0;
544       canonical->arg5 = 0;
545       canonical->arg6 = 0;
546       canonical->arg8 = 0;
547    } else if (canonical->sysno != __NR_syscall) {
548       canonical->arg1  = gst->guest_r4;    // a0
549       canonical->arg2  = gst->guest_r5;    // a1
550       canonical->arg3  = gst->guest_r6;    // a2
551       canonical->arg4  = gst->guest_r7;    // a3
552       canonical->arg5  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP)
553       canonical->arg6  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP)
554       canonical->arg7  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP)
555       canonical->arg8 = 0;
556    } else {
557       // Fixme hack handle syscall()
558       canonical->sysno = gst->guest_r4;    // a0
559       canonical->arg1  = gst->guest_r5;    // a1
560       canonical->arg2  = gst->guest_r6;    // a2
561       canonical->arg3  = gst->guest_r7;    // a3
562       canonical->arg4  = *((UInt*) (gst->guest_r29 + 16));    // 16(guest_SP/sp)
563       canonical->arg5  = *((UInt*) (gst->guest_r29 + 20));    // 20(guest_SP/sp)
564       canonical->arg6  = *((UInt*) (gst->guest_r29 + 24));    // 24(guest_SP/sp)
565       canonical->arg8 = __NR_syscall;
566    }
567 
568 #elif defined(VGP_mips64_linux)
569    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
570    canonical->sysno = gst->guest_r2;    // v0
571    canonical->arg1  = gst->guest_r4;    // a0
572    canonical->arg2  = gst->guest_r5;    // a1
573    canonical->arg3  = gst->guest_r6;    // a2
574    canonical->arg4  = gst->guest_r7;    // a3
575    canonical->arg5  = gst->guest_r8;    // a4
576    canonical->arg6  = gst->guest_r9;    // a5
577 
578 #elif defined(VGP_x86_darwin)
579    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
580    UWord *stack = (UWord *)gst->guest_ESP;
581    // GrP fixme hope syscalls aren't called with really shallow stacks...
582    canonical->sysno = gst->guest_EAX;
583    if (canonical->sysno != 0) {
584       // stack[0] is return address
585       canonical->arg1  = stack[1];
586       canonical->arg2  = stack[2];
587       canonical->arg3  = stack[3];
588       canonical->arg4  = stack[4];
589       canonical->arg5  = stack[5];
590       canonical->arg6  = stack[6];
591       canonical->arg7  = stack[7];
592       canonical->arg8  = stack[8];
593    } else {
594       // GrP fixme hack handle syscall()
595       // GrP fixme what about __syscall() ?
596       // stack[0] is return address
597       // DDD: the tool can't see that the params have been shifted!  Can
598       //      lead to incorrect checking, I think, because the PRRAn/PSARn
599       //      macros will mention the pre-shifted args.
600       canonical->sysno = stack[1];
601       vg_assert(canonical->sysno != 0);
602       canonical->arg1  = stack[2];
603       canonical->arg2  = stack[3];
604       canonical->arg3  = stack[4];
605       canonical->arg4  = stack[5];
606       canonical->arg5  = stack[6];
607       canonical->arg6  = stack[7];
608       canonical->arg7  = stack[8];
609       canonical->arg8  = stack[9];
610 
611       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
612             VG_(getpid)(), /*tid,*/
613             VG_SYSNUM_STRING(canonical->sysno));
614    }
615 
616    // Here we determine what kind of syscall it was by looking at the
617    // interrupt kind, and then encode the syscall number using the 64-bit
618    // encoding for Valgrind's internal use.
619    //
620    // DDD: Would it be better to stash the JMP kind into the Darwin
621    // thread state rather than passing in the trc?
622    switch (trc) {
623    case VEX_TRC_JMP_SYS_INT128:
624       // int $0x80 = Unix, 64-bit result
625       vg_assert(canonical->sysno >= 0);
626       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
627       break;
628    case VEX_TRC_JMP_SYS_SYSENTER:
629       // syscall = Unix, 32-bit result
630       // OR        Mach, 32-bit result
631       if (canonical->sysno >= 0) {
632          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
633          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
634                                                              & 0xffff);
635       } else {
636          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
637       }
638       break;
639    case VEX_TRC_JMP_SYS_INT129:
640       // int $0x81 = Mach, 32-bit result
641       vg_assert(canonical->sysno < 0);
642       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
643       break;
644    case VEX_TRC_JMP_SYS_INT130:
645       // int $0x82 = mdep, 32-bit result
646       vg_assert(canonical->sysno >= 0);
647       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
648       break;
649    default:
650       vg_assert(0);
651       break;
652    }
653 
654 #elif defined(VGP_amd64_darwin)
655    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
656    UWord *stack = (UWord *)gst->guest_RSP;
657 
658    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
659 
660    // GrP fixme hope syscalls aren't called with really shallow stacks...
661    canonical->sysno = gst->guest_RAX;
662    if (canonical->sysno != __NR_syscall) {
663       // stack[0] is return address
664       canonical->arg1  = gst->guest_RDI;
665       canonical->arg2  = gst->guest_RSI;
666       canonical->arg3  = gst->guest_RDX;
667       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
668       canonical->arg5  = gst->guest_R8;
669       canonical->arg6  = gst->guest_R9;
670       canonical->arg7  = stack[1];
671       canonical->arg8  = stack[2];
672    } else {
673       // GrP fixme hack handle syscall()
674       // GrP fixme what about __syscall() ?
675       // stack[0] is return address
676       // DDD: the tool can't see that the params have been shifted!  Can
677       //      lead to incorrect checking, I think, because the PRRAn/PSARn
678       //      macros will mention the pre-shifted args.
679       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
680       vg_assert(canonical->sysno != __NR_syscall);
681       canonical->arg1  = gst->guest_RSI;
682       canonical->arg2  = gst->guest_RDX;
683       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
684       canonical->arg4  = gst->guest_R8;
685       canonical->arg5  = gst->guest_R9;
686       canonical->arg6  = stack[1];
687       canonical->arg7  = stack[2];
688       canonical->arg8  = stack[3];
689 
690       PRINT("SYSCALL[%d,?](0) syscall(%s, ...); please stand by...\n",
691             VG_(getpid)(), /*tid,*/
692             VG_SYSNUM_STRING(canonical->sysno));
693    }
694 
695    // no canonical->sysno adjustment needed
696 
697 #elif defined(VGP_s390x_linux)
698    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
699    canonical->sysno = gst->guest_SYSNO;
700    canonical->arg1  = gst->guest_r2;
701    canonical->arg2  = gst->guest_r3;
702    canonical->arg3  = gst->guest_r4;
703    canonical->arg4  = gst->guest_r5;
704    canonical->arg5  = gst->guest_r6;
705    canonical->arg6  = gst->guest_r7;
706    canonical->arg7  = 0;
707    canonical->arg8  = 0;
708 
709 #elif defined(VGP_x86_solaris)
710    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
711    UWord *stack = (UWord *)gst->guest_ESP;
712    canonical->sysno = gst->guest_EAX;
713    /* stack[0] is a return address. */
714    canonical->arg1  = stack[1];
715    canonical->arg2  = stack[2];
716    canonical->arg3  = stack[3];
717    canonical->arg4  = stack[4];
718    canonical->arg5  = stack[5];
719    canonical->arg6  = stack[6];
720    canonical->arg7  = stack[7];
721    canonical->arg8  = stack[8];
722 
723    switch (trc) {
724    case VEX_TRC_JMP_SYS_INT145:
725    case VEX_TRC_JMP_SYS_SYSENTER:
726    case VEX_TRC_JMP_SYS_SYSCALL:
727    /* These three are not actually valid syscall instructions on Solaris.
728       Pretend for now that we handle them as normal syscalls. */
729    case VEX_TRC_JMP_SYS_INT128:
730    case VEX_TRC_JMP_SYS_INT129:
731    case VEX_TRC_JMP_SYS_INT130:
732       /* int $0x91, sysenter, syscall = normal syscall */
733       break;
734    case VEX_TRC_JMP_SYS_INT210:
735       /* int $0xD2 = fasttrap */
736       canonical->sysno
737          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
738       break;
739    default:
740       vg_assert(0);
741       break;
742    }
743 
744 #elif defined(VGP_amd64_solaris)
745    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
746    UWord *stack = (UWord *)gst->guest_RSP;
747    canonical->sysno = gst->guest_RAX;
748    /* stack[0] is a return address. */
749    canonical->arg1 = gst->guest_RDI;
750    canonical->arg2 = gst->guest_RSI;
751    canonical->arg3 = gst->guest_RDX;
752    canonical->arg4 = gst->guest_R10;  /* Not RCX with syscall. */
753    canonical->arg5 = gst->guest_R8;
754    canonical->arg6 = gst->guest_R9;
755    canonical->arg7 = stack[1];
756    canonical->arg8 = stack[2];
757 
758    switch (trc) {
759    case VEX_TRC_JMP_SYS_SYSCALL:
760       /* syscall = normal syscall */
761       break;
762    case VEX_TRC_JMP_SYS_INT210:
763       /* int $0xD2 = fasttrap */
764       canonical->sysno
765          = VG_SOLARIS_SYSCALL_CONSTRUCT_FASTTRAP(canonical->sysno);
766       break;
767    default:
768       vg_assert(0);
769       break;
770    }
771 
772 #else
773 #  error "getSyscallArgsFromGuestState: unknown arch"
774 #endif
775 }
776 
777 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)778 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
779                                     /*OUT*/VexGuestArchState* gst_vanilla )
780 {
781 #if defined(VGP_x86_linux)
782    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
783    gst->guest_EAX = canonical->sysno;
784    gst->guest_EBX = canonical->arg1;
785    gst->guest_ECX = canonical->arg2;
786    gst->guest_EDX = canonical->arg3;
787    gst->guest_ESI = canonical->arg4;
788    gst->guest_EDI = canonical->arg5;
789    gst->guest_EBP = canonical->arg6;
790 
791 #elif defined(VGP_amd64_linux)
792    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
793    gst->guest_RAX = canonical->sysno;
794    gst->guest_RDI = canonical->arg1;
795    gst->guest_RSI = canonical->arg2;
796    gst->guest_RDX = canonical->arg3;
797    gst->guest_R10 = canonical->arg4;
798    gst->guest_R8  = canonical->arg5;
799    gst->guest_R9  = canonical->arg6;
800 
801 #elif defined(VGP_ppc32_linux)
802    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
803    gst->guest_GPR0 = canonical->sysno;
804    gst->guest_GPR3 = canonical->arg1;
805    gst->guest_GPR4 = canonical->arg2;
806    gst->guest_GPR5 = canonical->arg3;
807    gst->guest_GPR6 = canonical->arg4;
808    gst->guest_GPR7 = canonical->arg5;
809    gst->guest_GPR8 = canonical->arg6;
810 
811 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
812    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
813    gst->guest_GPR0 = canonical->sysno;
814    gst->guest_GPR3 = canonical->arg1;
815    gst->guest_GPR4 = canonical->arg2;
816    gst->guest_GPR5 = canonical->arg3;
817    gst->guest_GPR6 = canonical->arg4;
818    gst->guest_GPR7 = canonical->arg5;
819    gst->guest_GPR8 = canonical->arg6;
820 
821 #elif defined(VGP_arm_linux)
822    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
823    gst->guest_R7 = canonical->sysno;
824    gst->guest_R0 = canonical->arg1;
825    gst->guest_R1 = canonical->arg2;
826    gst->guest_R2 = canonical->arg3;
827    gst->guest_R3 = canonical->arg4;
828    gst->guest_R4 = canonical->arg5;
829    gst->guest_R5 = canonical->arg6;
830 
831 #elif defined(VGP_arm64_linux)
832    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
833    gst->guest_X8 = canonical->sysno;
834    gst->guest_X0 = canonical->arg1;
835    gst->guest_X1 = canonical->arg2;
836    gst->guest_X2 = canonical->arg3;
837    gst->guest_X3 = canonical->arg4;
838    gst->guest_X4 = canonical->arg5;
839    gst->guest_X5 = canonical->arg6;
840 
841 #elif defined(VGP_x86_darwin)
842    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
843    UWord *stack = (UWord *)gst->guest_ESP;
844 
845    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
846 
847    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
848    // stack[0] is return address
849    stack[1] = canonical->arg1;
850    stack[2] = canonical->arg2;
851    stack[3] = canonical->arg3;
852    stack[4] = canonical->arg4;
853    stack[5] = canonical->arg5;
854    stack[6] = canonical->arg6;
855    stack[7] = canonical->arg7;
856    stack[8] = canonical->arg8;
857 
858 #elif defined(VGP_amd64_darwin)
859    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
860    UWord *stack = (UWord *)gst->guest_RSP;
861 
862    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
863    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
864 
865    // stack[0] is return address
866    gst->guest_RDI = canonical->arg1;
867    gst->guest_RSI = canonical->arg2;
868    gst->guest_RDX = canonical->arg3;
869    gst->guest_RCX = canonical->arg4;
870    gst->guest_R8  = canonical->arg5;
871    gst->guest_R9  = canonical->arg6;
872    stack[1]       = canonical->arg7;
873    stack[2]       = canonical->arg8;
874 
875 #elif defined(VGP_s390x_linux)
876    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
877    gst->guest_SYSNO  = canonical->sysno;
878    gst->guest_r2     = canonical->arg1;
879    gst->guest_r3     = canonical->arg2;
880    gst->guest_r4     = canonical->arg3;
881    gst->guest_r5     = canonical->arg4;
882    gst->guest_r6     = canonical->arg5;
883    gst->guest_r7     = canonical->arg6;
884 
885 #elif defined(VGP_mips32_linux)
886    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
887    if (canonical->arg8 != __NR_syscall) {
888       gst->guest_r2 = canonical->sysno;
889       gst->guest_r4 = canonical->arg1;
890       gst->guest_r5 = canonical->arg2;
891       gst->guest_r6 = canonical->arg3;
892       gst->guest_r7 = canonical->arg4;
893       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg5; // 16(guest_GPR29/sp)
894       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg6; // 20(sp)
895    } else {
896       canonical->arg8 = 0;
897       gst->guest_r2 = __NR_syscall;
898       gst->guest_r4 = canonical->sysno;
899       gst->guest_r5 = canonical->arg1;
900       gst->guest_r6 = canonical->arg2;
901       gst->guest_r7 = canonical->arg3;
902       *((UInt*) (gst->guest_r29 + 16)) = canonical->arg4; // 16(guest_GPR29/sp)
903       *((UInt*) (gst->guest_r29 + 20)) = canonical->arg5; // 20(sp)
904       *((UInt*) (gst->guest_r29 + 24)) = canonical->arg6; // 24(sp)
905    }
906 
907 #elif defined(VGP_mips64_linux)
908    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
909    gst->guest_r2 = canonical->sysno;
910    gst->guest_r4 = canonical->arg1;
911    gst->guest_r5 = canonical->arg2;
912    gst->guest_r6 = canonical->arg3;
913    gst->guest_r7 = canonical->arg4;
914    gst->guest_r8 = canonical->arg5;
915    gst->guest_r9 = canonical->arg6;
916 
917 #elif defined(VGP_x86_solaris)
918    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
919    UWord *stack = (UWord *)gst->guest_ESP;
920 
921    /* Fasttraps or anything else cannot go through this way. */
922    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
923              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
924    gst->guest_EAX = canonical->sysno;
925    /* stack[0] is a return address. */
926    stack[1] = canonical->arg1;
927    stack[2] = canonical->arg2;
928    stack[3] = canonical->arg3;
929    stack[4] = canonical->arg4;
930    stack[5] = canonical->arg5;
931    stack[6] = canonical->arg6;
932    stack[7] = canonical->arg7;
933    stack[8] = canonical->arg8;
934 
935 #elif defined(VGP_amd64_solaris)
936    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
937    UWord *stack = (UWord *)gst->guest_RSP;
938 
939    /* Fasttraps or anything else cannot go through this way. */
940    vg_assert(VG_SOLARIS_SYSNO_CLASS(canonical->sysno)
941              == VG_SOLARIS_SYSCALL_CLASS_CLASSIC);
942    gst->guest_RAX = canonical->sysno;
943    /* stack[0] is a return address. */
944    gst->guest_RDI = canonical->arg1;
945    gst->guest_RSI = canonical->arg2;
946    gst->guest_RDX = canonical->arg3;
947    gst->guest_R10 = canonical->arg4;
948    gst->guest_R8  = canonical->arg5;
949    gst->guest_R9  = canonical->arg6;
950    stack[1] = canonical->arg7;
951    stack[2] = canonical->arg8;
952 
953 #else
954 #  error "putSyscallArgsIntoGuestState: unknown arch"
955 #endif
956 }
957 
958 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)959 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
960                                       /*IN*/ VexGuestArchState* gst_vanilla )
961 {
962 #  if defined(VGP_x86_linux)
963    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
964    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
965    canonical->what = SsComplete;
966 
967 #  elif defined(VGP_amd64_linux)
968    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
969    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
970    canonical->what = SsComplete;
971 
972 #  elif defined(VGP_ppc32_linux)
973    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
974    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
975    UInt                cr0so = (cr >> 28) & 1;
976    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
977    canonical->what = SsComplete;
978 
979 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
980    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
981    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
982    UInt                cr0so = (cr >> 28) & 1;
983    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
984    canonical->what = SsComplete;
985 
986 #  elif defined(VGP_arm_linux)
987    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
988    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
989    canonical->what = SsComplete;
990 
991 #  elif defined(VGP_arm64_linux)
992    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
993    canonical->sres = VG_(mk_SysRes_arm64_linux)( gst->guest_X0 );
994    canonical->what = SsComplete;
995 
996 #  elif defined(VGP_mips32_linux)
997    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
998    UInt                v0 = gst->guest_r2;    // v0
999    UInt                v1 = gst->guest_r3;    // v1
1000    UInt                a3 = gst->guest_r7;    // a3
1001    canonical->sres = VG_(mk_SysRes_mips32_linux)( v0, v1, a3 );
1002    canonical->what = SsComplete;
1003 
1004 #  elif defined(VGP_mips64_linux)
1005    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1006    ULong                v0 = gst->guest_r2;    // v0
1007    ULong                v1 = gst->guest_r3;    // v1
1008    ULong                a3 = gst->guest_r7;    // a3
1009    canonical->sres = VG_(mk_SysRes_mips64_linux)(v0, v1, a3);
1010    canonical->what = SsComplete;
1011 
1012 #  elif defined(VGP_x86_darwin)
1013    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
1014    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1015    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
1016    UInt err = 0;
1017    UInt wLO = 0;
1018    UInt wHI = 0;
1019    switch (gst->guest_SC_CLASS) {
1020       case VG_DARWIN_SYSCALL_CLASS_UNIX:
1021          // int $0x80 = Unix, 64-bit result
1022          err = carry;
1023          wLO = gst->guest_EAX;
1024          wHI = gst->guest_EDX;
1025          break;
1026       case VG_DARWIN_SYSCALL_CLASS_MACH:
1027          // int $0x81 = Mach, 32-bit result
1028          wLO = gst->guest_EAX;
1029          break;
1030       case VG_DARWIN_SYSCALL_CLASS_MDEP:
1031          // int $0x82 = mdep, 32-bit result
1032          wLO = gst->guest_EAX;
1033          break;
1034       default:
1035          vg_assert(0);
1036          break;
1037    }
1038    canonical->sres = VG_(mk_SysRes_x86_darwin)(
1039                         gst->guest_SC_CLASS, err ? True : False,
1040                         wHI, wLO
1041                      );
1042    canonical->what = SsComplete;
1043 
1044 #  elif defined(VGP_amd64_darwin)
1045    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
1046    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1047    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
1048    ULong err = 0;
1049    ULong wLO = 0;
1050    ULong wHI = 0;
1051    switch (gst->guest_SC_CLASS) {
1052       case VG_DARWIN_SYSCALL_CLASS_UNIX:
1053          // syscall = Unix, 128-bit result
1054          err = carry;
1055          wLO = gst->guest_RAX;
1056          wHI = gst->guest_RDX;
1057          break;
1058       case VG_DARWIN_SYSCALL_CLASS_MACH:
1059          // syscall = Mach, 64-bit result
1060          wLO = gst->guest_RAX;
1061          break;
1062       case VG_DARWIN_SYSCALL_CLASS_MDEP:
1063          // syscall = mdep, 64-bit result
1064          wLO = gst->guest_RAX;
1065          break;
1066       default:
1067          vg_assert(0);
1068          break;
1069    }
1070    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
1071                         gst->guest_SC_CLASS, err ? True : False,
1072                         wHI, wLO
1073                      );
1074    canonical->what = SsComplete;
1075 
1076 #  elif defined(VGP_s390x_linux)
1077    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
1078    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
1079    canonical->what = SsComplete;
1080 
1081 #  elif defined(VGP_x86_solaris)
1082    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1083    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
1084 
1085    canonical->sres = VG_(mk_SysRes_x86_solaris)(carry ? True : False,
1086                                                 gst->guest_EAX,
1087                                                 carry ? 0 : gst->guest_EDX);
1088    canonical->what = SsComplete;
1089 
1090 #  elif defined(VGP_amd64_solaris)
1091    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1092    UInt carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
1093 
1094    canonical->sres = VG_(mk_SysRes_amd64_solaris)(carry ? True : False,
1095                                                   gst->guest_RAX,
1096                                                   carry ? 0 : gst->guest_RDX);
1097    canonical->what = SsComplete;
1098 
1099 #  else
1100 #    error "getSyscallStatusFromGuestState: unknown arch"
1101 #  endif
1102 }
1103 
1104 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)1105 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
1106                                       /*IN*/ SyscallStatus*     canonical,
1107                                       /*OUT*/VexGuestArchState* gst_vanilla )
1108 {
1109 #  if defined(VGP_x86_linux)
1110    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1111    vg_assert(canonical->what == SsComplete);
1112    if (sr_isError(canonical->sres)) {
1113       /* This isn't exactly right, in that really a Failure with res
1114          not in the range 1 .. 4095 is unrepresentable in the
1115          Linux-x86 scheme.  Oh well. */
1116       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
1117    } else {
1118       gst->guest_EAX = sr_Res(canonical->sres);
1119    }
1120    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1121              OFFSET_x86_EAX, sizeof(UWord) );
1122 
1123 #  elif defined(VGP_amd64_linux)
1124    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1125    vg_assert(canonical->what == SsComplete);
1126    if (sr_isError(canonical->sres)) {
1127       /* This isn't exactly right, in that really a Failure with res
1128          not in the range 1 .. 4095 is unrepresentable in the
1129          Linux-amd64 scheme.  Oh well. */
1130       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
1131    } else {
1132       gst->guest_RAX = sr_Res(canonical->sres);
1133    }
1134    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1135              OFFSET_amd64_RAX, sizeof(UWord) );
1136 
1137 #  elif defined(VGP_ppc32_linux)
1138    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
1139    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
1140    vg_assert(canonical->what == SsComplete);
1141    if (sr_isError(canonical->sres)) {
1142       /* set CR0.SO */
1143       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
1144       gst->guest_GPR3 = sr_Err(canonical->sres);
1145    } else {
1146       /* clear CR0.SO */
1147       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
1148       gst->guest_GPR3 = sr_Res(canonical->sres);
1149    }
1150    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1151              OFFSET_ppc32_GPR3, sizeof(UWord) );
1152    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1153              OFFSET_ppc32_CR0_0, sizeof(UChar) );
1154 
1155 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1156    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
1157    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
1158    vg_assert(canonical->what == SsComplete);
1159    if (sr_isError(canonical->sres)) {
1160       /* set CR0.SO */
1161       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
1162       gst->guest_GPR3 = sr_Err(canonical->sres);
1163    } else {
1164       /* clear CR0.SO */
1165       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
1166       gst->guest_GPR3 = sr_Res(canonical->sres);
1167    }
1168    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1169              OFFSET_ppc64_GPR3, sizeof(UWord) );
1170    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1171              OFFSET_ppc64_CR0_0, sizeof(UChar) );
1172 
1173 #  elif defined(VGP_arm_linux)
1174    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
1175    vg_assert(canonical->what == SsComplete);
1176    if (sr_isError(canonical->sres)) {
1177       /* This isn't exactly right, in that really a Failure with res
1178          not in the range 1 .. 4095 is unrepresentable in the
1179          Linux-arm scheme.  Oh well. */
1180       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
1181    } else {
1182       gst->guest_R0 = sr_Res(canonical->sres);
1183    }
1184    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1185              OFFSET_arm_R0, sizeof(UWord) );
1186 
1187 #  elif defined(VGP_arm64_linux)
1188    VexGuestARM64State* gst = (VexGuestARM64State*)gst_vanilla;
1189    vg_assert(canonical->what == SsComplete);
1190    if (sr_isError(canonical->sres)) {
1191       /* This isn't exactly right, in that really a Failure with res
1192          not in the range 1 .. 4095 is unrepresentable in the
1193          Linux-arm64 scheme.  Oh well. */
1194       gst->guest_X0 = - (Long)sr_Err(canonical->sres);
1195    } else {
1196       gst->guest_X0 = sr_Res(canonical->sres);
1197    }
1198    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1199              OFFSET_arm64_X0, sizeof(UWord) );
1200 
1201 #elif defined(VGP_x86_darwin)
1202    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1203    SysRes sres = canonical->sres;
1204    vg_assert(canonical->what == SsComplete);
1205    /* Unfortunately here we have to break abstraction and look
1206       directly inside 'res', in order to decide what to do. */
1207    switch (sres._mode) {
1208       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
1209       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
1210          gst->guest_EAX = sres._wLO;
1211          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1212                    OFFSET_x86_EAX, sizeof(UInt) );
1213          break;
1214       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
1215       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
1216          gst->guest_EAX = sres._wLO;
1217          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1218                    OFFSET_x86_EAX, sizeof(UInt) );
1219          gst->guest_EDX = sres._wHI;
1220          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1221                    OFFSET_x86_EDX, sizeof(UInt) );
1222          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1223                                       gst );
1224          // GrP fixme sets defined for entire eflags, not just bit c
1225          // DDD: this breaks exp-ptrcheck.
1226          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1227                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
1228          break;
1229       default:
1230          vg_assert(0);
1231          break;
1232    }
1233 
1234 #elif defined(VGP_amd64_darwin)
1235    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1236    SysRes sres = canonical->sres;
1237    vg_assert(canonical->what == SsComplete);
1238    /* Unfortunately here we have to break abstraction and look
1239       directly inside 'res', in order to decide what to do. */
1240    switch (sres._mode) {
1241       case SysRes_MACH: // syscall = Mach, 64-bit result
1242       case SysRes_MDEP: // syscall = mdep, 64-bit result
1243          gst->guest_RAX = sres._wLO;
1244          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1245                    OFFSET_amd64_RAX, sizeof(ULong) );
1246          break;
1247       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
1248       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
1249          gst->guest_RAX = sres._wLO;
1250          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1251                    OFFSET_amd64_RAX, sizeof(ULong) );
1252          gst->guest_RDX = sres._wHI;
1253          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1254                    OFFSET_amd64_RDX, sizeof(ULong) );
1255          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
1256                                         gst );
1257          // GrP fixme sets defined for entire rflags, not just bit c
1258          // DDD: this breaks exp-ptrcheck.
1259          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1260                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
1261          break;
1262       default:
1263          vg_assert(0);
1264          break;
1265    }
1266 
1267 #  elif defined(VGP_s390x_linux)
1268    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
1269    vg_assert(canonical->what == SsComplete);
1270    if (sr_isError(canonical->sres)) {
1271       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
1272    } else {
1273       gst->guest_r2 = sr_Res(canonical->sres);
1274    }
1275 
1276 #  elif defined(VGP_mips32_linux)
1277    VexGuestMIPS32State* gst = (VexGuestMIPS32State*)gst_vanilla;
1278    vg_assert(canonical->what == SsComplete);
1279    if (sr_isError(canonical->sres)) {
1280       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1281       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1282    } else {
1283       gst->guest_r2 = sr_Res(canonical->sres);
1284       gst->guest_r3 = sr_ResEx(canonical->sres);
1285       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1286    }
1287    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1288              OFFSET_mips32_r2, sizeof(UWord) );
1289    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1290              OFFSET_mips32_r3, sizeof(UWord) );
1291    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1292              OFFSET_mips32_r7, sizeof(UWord) );
1293 
1294 #  elif defined(VGP_mips64_linux)
1295    VexGuestMIPS64State* gst = (VexGuestMIPS64State*)gst_vanilla;
1296    vg_assert(canonical->what == SsComplete);
1297    if (sr_isError(canonical->sres)) {
1298       gst->guest_r2 = (Int)sr_Err(canonical->sres);
1299       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1300    } else {
1301       gst->guest_r2 = sr_Res(canonical->sres);
1302       gst->guest_r3 = sr_ResEx(canonical->sres);
1303       gst->guest_r7 = (Int)sr_Err(canonical->sres);
1304    }
1305    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1306              OFFSET_mips64_r2, sizeof(UWord) );
1307    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1308              OFFSET_mips64_r3, sizeof(UWord) );
1309    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
1310              OFFSET_mips64_r7, sizeof(UWord) );
1311 
1312 #  elif defined(VGP_x86_solaris)
1313    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
1314    SysRes sres = canonical->sres;
1315    vg_assert(canonical->what == SsComplete);
1316 
1317    if (sr_isError(sres)) {
1318       gst->guest_EAX = sr_Err(sres);
1319       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
1320                sizeof(UInt));
1321       LibVEX_GuestX86_put_eflag_c(1, gst);
1322    }
1323    else {
1324       gst->guest_EAX = sr_Res(sres);
1325       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EAX,
1326                sizeof(UInt));
1327       gst->guest_EDX = sr_ResHI(sres);
1328       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_x86_EDX,
1329                sizeof(UInt));
1330       LibVEX_GuestX86_put_eflag_c(0, gst);
1331    }
1332    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
1333       other eflags defined too (see README.solaris). */
1334    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
1335             guest_CC_DEP1), sizeof(UInt));
1336    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestX86State,
1337             guest_CC_DEP2), sizeof(UInt));
1338 
1339 #  elif defined(VGP_amd64_solaris)
1340    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
1341    SysRes sres = canonical->sres;
1342    vg_assert(canonical->what == SsComplete);
1343 
1344    if (sr_isError(sres)) {
1345       gst->guest_RAX = sr_Err(sres);
1346       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
1347                sizeof(ULong));
1348       LibVEX_GuestAMD64_put_rflag_c(1, gst);
1349    }
1350    else {
1351       gst->guest_RAX = sr_Res(sres);
1352       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RAX,
1353                sizeof(ULong));
1354       gst->guest_RDX = sr_ResHI(sres);
1355       VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, OFFSET_amd64_RDX,
1356                sizeof(ULong));
1357       LibVEX_GuestAMD64_put_rflag_c(0, gst);
1358    }
1359    /* Make CC_DEP1 and CC_DEP2 defined.  This is inaccurate because it makes
1360       other eflags defined too (see README.solaris). */
1361    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
1362             guest_CC_DEP1), sizeof(ULong));
1363    VG_TRACK(post_reg_write, Vg_CoreSysCall, tid, offsetof(VexGuestAMD64State,
1364             guest_CC_DEP2), sizeof(ULong));
1365 
1366 #  else
1367 #    error "putSyscallStatusIntoGuestState: unknown arch"
1368 #  endif
1369 }
1370 
1371 
1372 /* Tell me the offsets in the guest state of the syscall params, so
1373    that the scalar argument checkers don't have to have this info
1374    hardwired. */
1375 
1376 static
getSyscallArgLayout(SyscallArgLayout * layout)1377 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
1378 {
1379    VG_(bzero_inline)(layout, sizeof(*layout));
1380 
1381 #if defined(VGP_x86_linux)
1382    layout->o_sysno  = OFFSET_x86_EAX;
1383    layout->o_arg1   = OFFSET_x86_EBX;
1384    layout->o_arg2   = OFFSET_x86_ECX;
1385    layout->o_arg3   = OFFSET_x86_EDX;
1386    layout->o_arg4   = OFFSET_x86_ESI;
1387    layout->o_arg5   = OFFSET_x86_EDI;
1388    layout->o_arg6   = OFFSET_x86_EBP;
1389    layout->uu_arg7  = -1; /* impossible value */
1390    layout->uu_arg8  = -1; /* impossible value */
1391 
1392 #elif defined(VGP_amd64_linux)
1393    layout->o_sysno  = OFFSET_amd64_RAX;
1394    layout->o_arg1   = OFFSET_amd64_RDI;
1395    layout->o_arg2   = OFFSET_amd64_RSI;
1396    layout->o_arg3   = OFFSET_amd64_RDX;
1397    layout->o_arg4   = OFFSET_amd64_R10;
1398    layout->o_arg5   = OFFSET_amd64_R8;
1399    layout->o_arg6   = OFFSET_amd64_R9;
1400    layout->uu_arg7  = -1; /* impossible value */
1401    layout->uu_arg8  = -1; /* impossible value */
1402 
1403 #elif defined(VGP_ppc32_linux)
1404    layout->o_sysno  = OFFSET_ppc32_GPR0;
1405    layout->o_arg1   = OFFSET_ppc32_GPR3;
1406    layout->o_arg2   = OFFSET_ppc32_GPR4;
1407    layout->o_arg3   = OFFSET_ppc32_GPR5;
1408    layout->o_arg4   = OFFSET_ppc32_GPR6;
1409    layout->o_arg5   = OFFSET_ppc32_GPR7;
1410    layout->o_arg6   = OFFSET_ppc32_GPR8;
1411    layout->uu_arg7  = -1; /* impossible value */
1412    layout->uu_arg8  = -1; /* impossible value */
1413 
1414 #elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
1415    layout->o_sysno  = OFFSET_ppc64_GPR0;
1416    layout->o_arg1   = OFFSET_ppc64_GPR3;
1417    layout->o_arg2   = OFFSET_ppc64_GPR4;
1418    layout->o_arg3   = OFFSET_ppc64_GPR5;
1419    layout->o_arg4   = OFFSET_ppc64_GPR6;
1420    layout->o_arg5   = OFFSET_ppc64_GPR7;
1421    layout->o_arg6   = OFFSET_ppc64_GPR8;
1422    layout->uu_arg7  = -1; /* impossible value */
1423    layout->uu_arg8  = -1; /* impossible value */
1424 
1425 #elif defined(VGP_arm_linux)
1426    layout->o_sysno  = OFFSET_arm_R7;
1427    layout->o_arg1   = OFFSET_arm_R0;
1428    layout->o_arg2   = OFFSET_arm_R1;
1429    layout->o_arg3   = OFFSET_arm_R2;
1430    layout->o_arg4   = OFFSET_arm_R3;
1431    layout->o_arg5   = OFFSET_arm_R4;
1432    layout->o_arg6   = OFFSET_arm_R5;
1433    layout->uu_arg7  = -1; /* impossible value */
1434    layout->uu_arg8  = -1; /* impossible value */
1435 
1436 #elif defined(VGP_arm64_linux)
1437    layout->o_sysno  = OFFSET_arm64_X8;
1438    layout->o_arg1   = OFFSET_arm64_X0;
1439    layout->o_arg2   = OFFSET_arm64_X1;
1440    layout->o_arg3   = OFFSET_arm64_X2;
1441    layout->o_arg4   = OFFSET_arm64_X3;
1442    layout->o_arg5   = OFFSET_arm64_X4;
1443    layout->o_arg6   = OFFSET_arm64_X5;
1444    layout->uu_arg7  = -1; /* impossible value */
1445    layout->uu_arg8  = -1; /* impossible value */
1446 
1447 #elif defined(VGP_mips32_linux)
1448    layout->o_sysno  = OFFSET_mips32_r2;
1449    layout->o_arg1   = OFFSET_mips32_r4;
1450    layout->o_arg2   = OFFSET_mips32_r5;
1451    layout->o_arg3   = OFFSET_mips32_r6;
1452    layout->o_arg4   = OFFSET_mips32_r7;
1453    layout->s_arg5   = sizeof(UWord) * 4;
1454    layout->s_arg6   = sizeof(UWord) * 5;
1455    layout->s_arg7   = sizeof(UWord) * 6;
1456    layout->uu_arg8  = -1; /* impossible value */
1457 
1458 #elif defined(VGP_mips64_linux)
1459    layout->o_sysno  = OFFSET_mips64_r2;
1460    layout->o_arg1   = OFFSET_mips64_r4;
1461    layout->o_arg2   = OFFSET_mips64_r5;
1462    layout->o_arg3   = OFFSET_mips64_r6;
1463    layout->o_arg4   = OFFSET_mips64_r7;
1464    layout->o_arg5   = OFFSET_mips64_r8;
1465    layout->o_arg6   = OFFSET_mips64_r9;
1466    layout->uu_arg7  = -1; /* impossible value */
1467    layout->uu_arg8  = -1; /* impossible value */
1468 
1469 #elif defined(VGP_x86_darwin)
1470    layout->o_sysno  = OFFSET_x86_EAX;
1471    // syscall parameters are on stack in C convention
1472    layout->s_arg1   = sizeof(UWord) * 1;
1473    layout->s_arg2   = sizeof(UWord) * 2;
1474    layout->s_arg3   = sizeof(UWord) * 3;
1475    layout->s_arg4   = sizeof(UWord) * 4;
1476    layout->s_arg5   = sizeof(UWord) * 5;
1477    layout->s_arg6   = sizeof(UWord) * 6;
1478    layout->s_arg7   = sizeof(UWord) * 7;
1479    layout->s_arg8   = sizeof(UWord) * 8;
1480 
1481 #elif defined(VGP_amd64_darwin)
1482    layout->o_sysno  = OFFSET_amd64_RAX;
1483    layout->o_arg1   = OFFSET_amd64_RDI;
1484    layout->o_arg2   = OFFSET_amd64_RSI;
1485    layout->o_arg3   = OFFSET_amd64_RDX;
1486    layout->o_arg4   = OFFSET_amd64_RCX;
1487    layout->o_arg5   = OFFSET_amd64_R8;
1488    layout->o_arg6   = OFFSET_amd64_R9;
1489    layout->s_arg7   = sizeof(UWord) * 1;
1490    layout->s_arg8   = sizeof(UWord) * 2;
1491 
1492 #elif defined(VGP_s390x_linux)
1493    layout->o_sysno  = OFFSET_s390x_SYSNO;
1494    layout->o_arg1   = OFFSET_s390x_r2;
1495    layout->o_arg2   = OFFSET_s390x_r3;
1496    layout->o_arg3   = OFFSET_s390x_r4;
1497    layout->o_arg4   = OFFSET_s390x_r5;
1498    layout->o_arg5   = OFFSET_s390x_r6;
1499    layout->o_arg6   = OFFSET_s390x_r7;
1500    layout->uu_arg7  = -1; /* impossible value */
1501    layout->uu_arg8  = -1; /* impossible value */
1502 
1503 #elif defined(VGP_x86_solaris)
1504    layout->o_sysno  = OFFSET_x86_EAX;
1505    /* Syscall parameters are on the stack. */
1506    layout->s_arg1   = sizeof(UWord) * 1;
1507    layout->s_arg2   = sizeof(UWord) * 2;
1508    layout->s_arg3   = sizeof(UWord) * 3;
1509    layout->s_arg4   = sizeof(UWord) * 4;
1510    layout->s_arg5   = sizeof(UWord) * 5;
1511    layout->s_arg6   = sizeof(UWord) * 6;
1512    layout->s_arg7   = sizeof(UWord) * 7;
1513    layout->s_arg8   = sizeof(UWord) * 8;
1514 
1515 #elif defined(VGP_amd64_solaris)
1516    layout->o_sysno  = OFFSET_amd64_RAX;
1517    layout->o_arg1   = OFFSET_amd64_RDI;
1518    layout->o_arg2   = OFFSET_amd64_RSI;
1519    layout->o_arg3   = OFFSET_amd64_RDX;
1520    layout->o_arg4   = OFFSET_amd64_R10;
1521    layout->o_arg5   = OFFSET_amd64_R8;
1522    layout->o_arg6   = OFFSET_amd64_R9;
1523    layout->s_arg7   = sizeof(UWord) * 1;
1524    layout->s_arg8   = sizeof(UWord) * 2;
1525 
1526 #else
1527 #  error "getSyscallLayout: unknown arch"
1528 #endif
1529 }
1530 
1531 
1532 /* ---------------------------------------------------------------------
1533    The main driver logic
1534    ------------------------------------------------------------------ */
1535 
1536 /* Finding the handlers for a given syscall, or faking up one
1537    when no handler is found. */
1538 
1539 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1540 void bad_before ( ThreadId              tid,
1541                   SyscallArgLayout*     layout,
1542                   /*MOD*/SyscallArgs*   args,
1543                   /*OUT*/SyscallStatus* status,
1544                   /*OUT*/UWord*         flags )
1545 {
1546    VG_(dmsg)("WARNING: unhandled %s syscall: %s\n",
1547       VG_PLATFORM, VG_SYSNUM_STRING(args->sysno));
1548    if (VG_(clo_verbosity) > 1) {
1549       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1550    }
1551    VG_(dmsg)("You may be able to write your own handler.\n");
1552    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1553    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
1554    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1555 
1556    SET_STATUS_Failure(VKI_ENOSYS);
1557 
1558 #  if defined(VGO_solaris)
1559    VG_(exit)(1);
1560 #  endif
1561 }
1562 
1563 static SyscallTableEntry bad_sys =
1564    { bad_before, NULL };
1565 
get_syscall_entry(Int syscallno)1566 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1567 {
1568    const SyscallTableEntry* sys = NULL;
1569 
1570 #  if defined(VGO_linux)
1571    sys = ML_(get_linux_syscall_entry)( syscallno );
1572 
1573 #  elif defined(VGO_darwin)
1574    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1575 
1576    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1577    case VG_DARWIN_SYSCALL_CLASS_UNIX:
1578       if (idx >= 0 && idx < ML_(syscall_table_size) &&
1579           ML_(syscall_table)[idx].before != NULL)
1580          sys = &ML_(syscall_table)[idx];
1581          break;
1582    case VG_DARWIN_SYSCALL_CLASS_MACH:
1583       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1584           ML_(mach_trap_table)[idx].before != NULL)
1585          sys = &ML_(mach_trap_table)[idx];
1586          break;
1587    case VG_DARWIN_SYSCALL_CLASS_MDEP:
1588       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1589           ML_(mdep_trap_table)[idx].before != NULL)
1590          sys = &ML_(mdep_trap_table)[idx];
1591          break;
1592    default:
1593       vg_assert(0);
1594       break;
1595    }
1596 
1597 #  elif defined(VGO_solaris)
1598    sys = ML_(get_solaris_syscall_entry)(syscallno);
1599 
1600 #  else
1601 #    error Unknown OS
1602 #  endif
1603 
1604    return sys == NULL  ? &bad_sys  : sys;
1605 }
1606 
1607 
1608 /* Add and remove signals from mask so that we end up telling the
1609    kernel the state we actually want rather than what the client
1610    wants. */
VG_(sanitize_client_sigmask)1611 void VG_(sanitize_client_sigmask)(vki_sigset_t *mask)
1612 {
1613    VG_(sigdelset)(mask, VKI_SIGKILL);
1614    VG_(sigdelset)(mask, VKI_SIGSTOP);
1615    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1616 }
1617 
1618 typedef
1619    struct {
1620       SyscallArgs   orig_args;
1621       SyscallArgs   args;
1622       SyscallStatus status;
1623       UWord         flags;
1624    }
1625    SyscallInfo;
1626 
1627 SyscallInfo *syscallInfo;
1628 
1629 /* The scheduler needs to be able to zero out these records after a
1630    fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1631 void VG_(clear_syscallInfo) ( Int tid )
1632 {
1633    vg_assert(syscallInfo);
1634    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1635    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1636    syscallInfo[tid].status.what = SsIdle;
1637 }
1638 
VG_(is_in_syscall)1639 Bool VG_(is_in_syscall) ( Int tid )
1640 {
1641    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1642    return (syscallInfo[tid].status.what != SsIdle);
1643 }
1644 
ensure_initialised(void)1645 static void ensure_initialised ( void )
1646 {
1647    Int i;
1648    static Bool init_done = False;
1649    if (init_done)
1650       return;
1651    init_done = True;
1652 
1653    syscallInfo = VG_(malloc)("scinfo", VG_N_THREADS * sizeof syscallInfo[0]);
1654 
1655    for (i = 0; i < VG_N_THREADS; i++) {
1656       VG_(clear_syscallInfo)( i );
1657    }
1658 }
1659 
1660 /* --- This is the main function of this file. --- */
1661 
VG_(client_syscall)1662 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1663 {
1664    Word                     sysno;
1665    ThreadState*             tst;
1666    const SyscallTableEntry* ent;
1667    SyscallArgLayout         layout;
1668    SyscallInfo*             sci;
1669 
1670    ensure_initialised();
1671 
1672    vg_assert(VG_(is_valid_tid)(tid));
1673    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1674    vg_assert(VG_(is_running_thread)(tid));
1675 
1676 #  if !defined(VGO_darwin)
1677    // Resync filtering is meaningless on non-Darwin targets.
1678    vg_assert(VG_(clo_resync_filter) == 0);
1679 #  endif
1680 
1681    tst = VG_(get_ThreadState)(tid);
1682 
1683    /* BEGIN ensure root thread's stack is suitably mapped */
1684    /* In some rare circumstances, we may do the syscall without the
1685       bottom page of the stack being mapped, because the stack pointer
1686       was moved down just a few instructions before the syscall
1687       instruction, and there have been no memory references since
1688       then, that would cause a call to VG_(extend_stack) to have
1689       happened.
1690 
1691       In native execution that's OK: the kernel automagically extends
1692       the stack's mapped area down to cover the stack pointer (or sp -
1693       redzone, really).  In simulated normal execution that's OK too,
1694       since any signals we get from accessing below the mapped area of
1695       the (guest's) stack lead us to VG_(extend_stack), where we
1696       simulate the kernel's stack extension logic.  But that leaves
1697       the problem of entering a syscall with the SP unmapped.  Because
1698       the kernel doesn't know that the segment immediately above SP is
1699       supposed to be a grow-down segment, it causes the syscall to
1700       fail, and thereby causes a divergence between native behaviour
1701       (syscall succeeds) and simulated behaviour (syscall fails).
1702 
1703       This is quite a rare failure mode.  It has only been seen
1704       affecting calls to sys_readlink on amd64-linux, and even then it
1705       requires a certain code sequence around the syscall to trigger
1706       it.  Here is one:
1707 
1708       extern int my_readlink ( const char* path );
1709       asm(
1710       ".text\n"
1711       ".globl my_readlink\n"
1712       "my_readlink:\n"
1713       "\tsubq    $0x1008,%rsp\n"
1714       "\tmovq    %rdi,%rdi\n"              // path is in rdi
1715       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
1716       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
1717       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
1718       "\tsyscall\n"
1719       "\taddq    $0x1008,%rsp\n"
1720       "\tret\n"
1721       ".previous\n"
1722       );
1723 
1724       For more details, see bug #156404
1725       (https://bugs.kde.org/show_bug.cgi?id=156404).
1726 
1727       The fix is actually very simple.  We simply need to call
1728       VG_(extend_stack) for this thread, handing it the lowest
1729       possible valid address for stack (sp - redzone), to ensure the
1730       pages all the way down to that address, are mapped.  Because
1731       this is a potentially expensive and frequent operation, we
1732       do the following:
1733 
1734       Only the main thread (tid=1) has a growdown stack.  So
1735       ignore all others.  It is conceivable, although highly unlikely,
1736       that the main thread exits, and later another thread is
1737       allocated tid=1, but that's harmless, I believe;
1738       VG_(extend_stack) will do nothing when applied to a non-root
1739       thread.
1740 
1741       All this guff is of course Linux-specific.  Hence the ifdef.
1742    */
1743 #  if defined(VGO_linux)
1744    if (tid == 1/*ROOT THREAD*/) {
1745       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1746 
1747       /* The precise thing to do here would be to extend the stack only
1748          if the system call can be proven to access unmapped user stack
1749          memory. That is an enormous amount of work even if a proper
1750          spec of system calls was available.
1751 
1752          In the case where the system call does not access user memory
1753          the stack pointer here can have any value. A legitimate testcase
1754          that exercises this is none/tests/s390x/stmg.c:
1755          The stack pointer happens to be in the reservation segment near
1756          the end of the addressable memory and there is no SkAnonC segment
1757          above.
1758 
1759          So the approximation we're taking here is to extend the stack only
1760          if the client stack pointer does not look bogus. */
1761       if (VG_(am_addr_is_in_extensible_client_stack)(stackMin))
1762          VG_(extend_stack)( tid, stackMin );
1763    }
1764 #  endif
1765    /* END ensure root thread's stack is suitably mapped */
1766 
1767    /* First off, get the syscall args and number.  This is a
1768       platform-dependent action. */
1769 
1770    sci = & syscallInfo[tid];
1771    vg_assert(sci->status.what == SsIdle);
1772 
1773    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1774 
1775    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
1776       we want to keep the originals too, just in case. */
1777    sci->args = sci->orig_args;
1778 
1779    /* Save the syscall number in the thread state in case the syscall
1780       is interrupted by a signal. */
1781    sysno = sci->orig_args.sysno;
1782 
1783    /* It's sometimes useful, as a crude debugging hack, to get a
1784       stack trace at each (or selected) syscalls. */
1785    if (0 && sysno == __NR_ioctl) {
1786       VG_(umsg)("\nioctl:\n");
1787       VG_(get_and_pp_StackTrace)(tid, 10);
1788       VG_(umsg)("\n");
1789    }
1790 
1791 #  if defined(VGO_darwin)
1792    /* Record syscall class.  But why?  Because the syscall might be
1793       interrupted by a signal, and in the signal handler (which will
1794       be m_signals.async_signalhandler) we will need to build a SysRes
1795       reflecting the syscall return result.  In order to do that we
1796       need to know the syscall class.  Hence stash it in the guest
1797       state of this thread.  This madness is not needed on Linux
1798       because it only has a single syscall return convention and so
1799       there is no ambiguity involved in converting the post-signal
1800       machine state into a SysRes. */
1801    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1802 #  endif
1803 
1804    /* The default what-to-do-next thing is hand the syscall to the
1805       kernel, so we pre-set that here.  Set .sres to something
1806       harmless looking (is irrelevant because .what is not
1807       SsComplete.) */
1808    sci->status.what = SsHandToKernel;
1809    sci->status.sres = VG_(mk_SysRes_Error)(0);
1810    sci->flags       = 0;
1811 
1812    /* Fetch the syscall's handlers.  If no handlers exist for this
1813       syscall, we are given dummy handlers which force an immediate
1814       return with ENOSYS. */
1815    ent = get_syscall_entry(sysno);
1816 
1817    /* Fetch the layout information, which tells us where in the guest
1818       state the syscall args reside.  This is a platform-dependent
1819       action.  This info is needed so that the scalar syscall argument
1820       checks (PRE_REG_READ calls) know which bits of the guest state
1821       they need to inspect. */
1822    getSyscallArgLayout( &layout );
1823 
1824    /* Make sure the tmp signal mask matches the real signal mask;
1825       sigsuspend may change this. */
1826    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1827 
1828    /* Right, we're finally ready to Party.  Call the pre-handler and
1829       see what we get back.  At this point:
1830 
1831         sci->status.what  is Unset (we don't know yet).
1832         sci->orig_args    contains the original args.
1833         sci->args         is the same as sci->orig_args.
1834         sci->flags        is zero.
1835    */
1836 
1837    PRINT("SYSCALL[%d,%u](%s) ",
1838       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1839 
1840    /* Do any pre-syscall actions */
1841    if (VG_(needs).syscall_wrapper) {
1842       UWord tmpv[8];
1843       tmpv[0] = sci->orig_args.arg1;
1844       tmpv[1] = sci->orig_args.arg2;
1845       tmpv[2] = sci->orig_args.arg3;
1846       tmpv[3] = sci->orig_args.arg4;
1847       tmpv[4] = sci->orig_args.arg5;
1848       tmpv[5] = sci->orig_args.arg6;
1849       tmpv[6] = sci->orig_args.arg7;
1850       tmpv[7] = sci->orig_args.arg8;
1851       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1852                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1853    }
1854 
1855    vg_assert(ent);
1856    vg_assert(ent->before);
1857    (ent->before)( tid,
1858                   &layout,
1859                   &sci->args, &sci->status, &sci->flags );
1860 
1861    /* If needed, gdbserver will report syscall entry to GDB */
1862    VG_(gdbserver_report_syscall)(True, sysno, tid);
1863 
1864    /* The pre-handler may have modified:
1865          sci->args
1866          sci->status
1867          sci->flags
1868       All else remains unchanged.
1869       Although the args may be modified, pre handlers are not allowed
1870       to change the syscall number.
1871    */
1872    /* Now we proceed according to what the pre-handler decided. */
1873    vg_assert(sci->status.what == SsHandToKernel
1874              || sci->status.what == SsComplete);
1875    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1876 
1877    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1878       /* The pre-handler completed the syscall itself, declaring
1879          success. */
1880       if (sci->flags & SfNoWriteResult) {
1881          PRINT(" --> [pre-success] NoWriteResult");
1882       } else {
1883          PRINT(" --> [pre-success] %s", VG_(sr_as_string)(sci->status.sres));
1884       }
1885       /* In this case the allowable flags are to ask for a signal-poll
1886          and/or a yield after the call.  Changing the args isn't
1887          allowed. */
1888       vg_assert(0 == (sci->flags
1889                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1890       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1891    }
1892 
1893    else
1894    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1895       /* The pre-handler decided to fail syscall itself. */
1896       PRINT(" --> [pre-fail] %s", VG_(sr_as_string)(sci->status.sres));
1897       /* In this case, the pre-handler is also allowed to ask for the
1898          post-handler to be run anyway.  Changing the args is not
1899          allowed. */
1900       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1901       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1902    }
1903 
1904    else
1905    if (sci->status.what != SsHandToKernel) {
1906       /* huh?! */
1907       vg_assert(0);
1908    }
1909 
1910    else /* (sci->status.what == HandToKernel) */ {
1911       /* Ok, this is the usual case -- and the complicated one.  There
1912          are two subcases: sync and async.  async is the general case
1913          and is to be used when there is any possibility that the
1914          syscall might block [a fact that the pre-handler must tell us
1915          via the sci->flags field.]  Because the tidying-away /
1916          context-switch overhead of the async case could be large, if
1917          we are sure that the syscall will not block, we fast-track it
1918          by doing it directly in this thread, which is a lot
1919          simpler. */
1920 
1921       /* Check that the given flags are allowable: MayBlock, PollAfter
1922          and PostOnFail are ok. */
1923       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1924 
1925       if (sci->flags & SfMayBlock) {
1926 
1927          /* Syscall may block, so run it asynchronously */
1928          vki_sigset_t mask;
1929 
1930          PRINT(" --> [async] ... \n");
1931 
1932          mask = tst->sig_mask;
1933          VG_(sanitize_client_sigmask)(&mask);
1934 
1935          /* Gack.  More impedance matching.  Copy the possibly
1936             modified syscall args back into the guest state. */
1937          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1938             then this assertion is senseless:
1939               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1940             The case that exposed it was sys_posix_spawn on Darwin,
1941             which heavily modifies its arguments but then lets the call
1942             go through anyway, with SfToBlock set, hence we end up here. */
1943          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1944 
1945          /* SfNoWriteResult flag is invalid for blocking signals because
1946             do_syscall_for_client() directly modifies the guest state. */
1947          vg_assert(!(sci->flags & SfNoWriteResult));
1948 
1949          /* Drop the bigLock */
1950          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1951          /* Urr.  We're now in a race against other threads trying to
1952             acquire the bigLock.  I guess that doesn't matter provided
1953             that do_syscall_for_client only touches thread-local
1954             state. */
1955 
1956          /* Do the call, which operates directly on the guest state,
1957             not on our abstracted copies of the args/result. */
1958          do_syscall_for_client(sysno, tst, &mask);
1959 
1960          /* do_syscall_for_client may not return if the syscall was
1961             interrupted by a signal.  In that case, flow of control is
1962             first to m_signals.async_sighandler, which calls
1963             VG_(fixup_guest_state_after_syscall_interrupted), which
1964             fixes up the guest state, and possibly calls
1965             VG_(post_syscall).  Once that's done, control drops back
1966             to the scheduler.  */
1967 
1968          /* Darwin: do_syscall_for_client may not return if the
1969             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1970             responded by starting the thread at wqthread_hijack(reuse=1)
1971             (to run another workqueue item). In that case, wqthread_hijack
1972             calls ML_(wqthread_continue), which is similar to
1973             VG_(fixup_guest_state_after_syscall_interrupted). */
1974 
1975          /* Reacquire the lock */
1976          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1977 
1978          /* Even more impedance matching.  Extract the syscall status
1979             from the guest state. */
1980          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1981          vg_assert(sci->status.what == SsComplete);
1982 
1983          /* Be decorative, if required. */
1984          if (VG_(clo_trace_syscalls)) {
1985             PRINT("SYSCALL[%d,%u](%s) ... [async] --> %s",
1986                   VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1987                   VG_(sr_as_string)(sci->status.sres));
1988          }
1989 
1990       } else {
1991 
1992          /* run the syscall directly */
1993          /* The pre-handler may have modified the syscall args, but
1994             since we're passing values in ->args directly to the
1995             kernel, there's no point in flushing them back to the
1996             guest state.  Indeed doing so could be construed as
1997             incorrect. */
1998          SysRes sres
1999             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
2000                                      sci->args.arg3, sci->args.arg4,
2001                                      sci->args.arg5, sci->args.arg6,
2002                                      sci->args.arg7, sci->args.arg8 );
2003          sci->status = convert_SysRes_to_SyscallStatus(sres);
2004 
2005          /* Be decorative, if required. */
2006          if (VG_(clo_trace_syscalls)) {
2007            PRINT("[sync] --> %s", VG_(sr_as_string)(sci->status.sres));
2008          }
2009       }
2010    }
2011 
2012    vg_assert(sci->status.what == SsComplete);
2013 
2014    vg_assert(VG_(is_running_thread)(tid));
2015 
2016    /* Dump the syscall result back in the guest state.  This is
2017       a platform-specific action. */
2018    if (!(sci->flags & SfNoWriteResult))
2019       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
2020 
2021    /* If needed, gdbserver will report syscall return to GDB */
2022    VG_(gdbserver_report_syscall)(False, sysno, tid);
2023 
2024    /* Situation now:
2025       - the guest state is now correctly modified following the syscall
2026       - modified args, original args and syscall status are still
2027         available in the syscallInfo[] entry for this syscall.
2028 
2029       Now go on to do the post-syscall actions (read on down ..)
2030    */
2031    PRINT(" ");
2032    VG_(post_syscall)(tid);
2033    PRINT("\n");
2034 }
2035 
2036 
2037 /* Perform post syscall actions.  The expected state on entry is
2038    precisely as at the end of VG_(client_syscall), that is:
2039 
2040    - guest state up to date following the syscall
2041    - modified args, original args and syscall status are still
2042      available in the syscallInfo[] entry for this syscall.
2043    - syscall status matches what's in the guest state.
2044 
2045    There are two ways to get here: the normal way -- being called by
2046    VG_(client_syscall), and the unusual way, from
2047    VG_(fixup_guest_state_after_syscall_interrupted).
2048    Darwin: there's a third way, ML_(wqthread_continue).
2049 */
VG_(post_syscall)2050 void VG_(post_syscall) (ThreadId tid)
2051 {
2052    SyscallInfo*             sci;
2053    const SyscallTableEntry* ent;
2054    SyscallStatus            test_status;
2055    ThreadState*             tst;
2056    Word sysno;
2057 
2058    /* Preliminaries */
2059    vg_assert(VG_(is_valid_tid)(tid));
2060    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2061    vg_assert(VG_(is_running_thread)(tid));
2062 
2063    tst = VG_(get_ThreadState)(tid);
2064    sci = & syscallInfo[tid];
2065 
2066    /* m_signals.sigvgkill_handler might call here even when not in
2067       a syscall. */
2068    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
2069       sci->status.what = SsIdle;
2070       return;
2071    }
2072 
2073    /* Validate current syscallInfo entry.  In particular we require
2074       that the current .status matches what's actually in the guest
2075       state.  At least in the normal case where we have actually
2076       previously written the result into the guest state. */
2077    vg_assert(sci->status.what == SsComplete);
2078 
2079    /* Get the system call number.  Because the pre-handler isn't
2080       allowed to mess with it, it should be the same for both the
2081       original and potentially-modified args. */
2082    vg_assert(sci->args.sysno == sci->orig_args.sysno);
2083    sysno = sci->args.sysno;
2084 
2085    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
2086    if (!(sci->flags & SfNoWriteResult))
2087       vg_assert(eq_SyscallStatus( sysno, &sci->status, &test_status ));
2088    /* Failure of the above assertion on Darwin can indicate a problem
2089       in the syscall wrappers that pre-fail or pre-succeed the
2090       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
2091       when they really should call SET_STATUS_from_SysRes.  The former
2092       create a UNIX-class syscall result on Darwin, which may not be
2093       correct for the syscall; if that's the case then this assertion
2094       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
2095       non-Darwin platforms this assertion is should never fail, and this
2096       comment is completely irrelevant. */
2097    /* Ok, looks sane */
2098 
2099    /* pre: status == Complete (asserted above) */
2100    /* Consider either success or failure.  Now run the post handler if:
2101       - it exists, and
2102       - Success or (Failure and PostOnFail is set)
2103    */
2104    ent = get_syscall_entry(sysno);
2105    if (ent->after
2106        && ((!sr_isError(sci->status.sres))
2107            || (sr_isError(sci->status.sres)
2108                && (sci->flags & SfPostOnFail) ))) {
2109 
2110       (ent->after)( tid, &sci->args, &sci->status );
2111    }
2112 
2113    /* Because the post handler might have changed the status (eg, the
2114       post-handler for sys_open can change the result from success to
2115       failure if the kernel supplied a fd that it doesn't like), once
2116       again dump the syscall result back in the guest state.*/
2117    if (!(sci->flags & SfNoWriteResult))
2118       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
2119 
2120    /* Do any post-syscall actions required by the tool. */
2121    if (VG_(needs).syscall_wrapper) {
2122       UWord tmpv[8];
2123       tmpv[0] = sci->orig_args.arg1;
2124       tmpv[1] = sci->orig_args.arg2;
2125       tmpv[2] = sci->orig_args.arg3;
2126       tmpv[3] = sci->orig_args.arg4;
2127       tmpv[4] = sci->orig_args.arg5;
2128       tmpv[5] = sci->orig_args.arg6;
2129       tmpv[6] = sci->orig_args.arg7;
2130       tmpv[7] = sci->orig_args.arg8;
2131       VG_TDICT_CALL(tool_post_syscall, tid,
2132                     sysno,
2133                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
2134                     sci->status.sres);
2135    }
2136 
2137    /* The syscall is done. */
2138    vg_assert(sci->status.what == SsComplete);
2139    sci->status.what = SsIdle;
2140 
2141    /* The pre/post wrappers may have concluded that pending signals
2142       might have been created, and will have set SfPollAfter to
2143       request a poll for them once the syscall is done. */
2144    if (sci->flags & SfPollAfter)
2145       VG_(poll_signals)(tid);
2146 
2147    /* Similarly, the wrappers might have asked for a yield
2148       afterwards. */
2149    if (sci->flags & SfYieldAfter)
2150       VG_(vg_yield)();
2151 }
2152 
2153 
2154 /* ---------------------------------------------------------------------
2155    Dealing with syscalls which get interrupted by a signal:
2156    VG_(fixup_guest_state_after_syscall_interrupted)
2157    ------------------------------------------------------------------ */
2158 
2159 /* Syscalls done on behalf of the client are finally handed off to the
2160    kernel in VG_(client_syscall) above, either by calling
2161    do_syscall_for_client (the async case), or by calling
2162    VG_(do_syscall6) (the sync case).
2163 
2164    If the syscall is not interrupted by a signal (it may block and
2165    later unblock, but that's irrelevant here) then those functions
2166    eventually return and so control is passed to VG_(post_syscall).
2167    NB: not sure if the sync case can actually get interrupted, as it
2168    operates with all signals masked.
2169 
2170    However, the syscall may get interrupted by an async-signal.  In
2171    that case do_syscall_for_client/VG_(do_syscall6) do not
2172    return.  Instead we wind up in m_signals.async_sighandler.  We need
2173    to fix up the guest state to make it look like the syscall was
2174    interrupted for guest.  So async_sighandler calls here, and this
2175    does the fixup.  Note that from here we wind up calling
2176    VG_(post_syscall) too.
2177 */
2178 
2179 
2180 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
2181    syscall-$PLAT.S for details.
2182 */
2183 #if defined(VGO_linux)
2184   extern const Addr ML_(blksys_setup);
2185   extern const Addr ML_(blksys_restart);
2186   extern const Addr ML_(blksys_complete);
2187   extern const Addr ML_(blksys_committed);
2188   extern const Addr ML_(blksys_finished);
2189 #elif defined(VGO_darwin)
2190   /* Darwin requires extra uglyness */
2191   extern const Addr ML_(blksys_setup_MACH);
2192   extern const Addr ML_(blksys_restart_MACH);
2193   extern const Addr ML_(blksys_complete_MACH);
2194   extern const Addr ML_(blksys_committed_MACH);
2195   extern const Addr ML_(blksys_finished_MACH);
2196   extern const Addr ML_(blksys_setup_MDEP);
2197   extern const Addr ML_(blksys_restart_MDEP);
2198   extern const Addr ML_(blksys_complete_MDEP);
2199   extern const Addr ML_(blksys_committed_MDEP);
2200   extern const Addr ML_(blksys_finished_MDEP);
2201   extern const Addr ML_(blksys_setup_UNIX);
2202   extern const Addr ML_(blksys_restart_UNIX);
2203   extern const Addr ML_(blksys_complete_UNIX);
2204   extern const Addr ML_(blksys_committed_UNIX);
2205   extern const Addr ML_(blksys_finished_UNIX);
2206 #elif defined(VGO_solaris)
2207   extern const Addr ML_(blksys_setup);
2208   extern const Addr ML_(blksys_complete);
2209   extern const Addr ML_(blksys_committed);
2210   extern const Addr ML_(blksys_finished);
2211   extern const Addr ML_(blksys_setup_DRET);
2212   extern const Addr ML_(blksys_complete_DRET);
2213   extern const Addr ML_(blksys_committed_DRET);
2214   extern const Addr ML_(blksys_finished_DRET);
2215 #else
2216 # error "Unknown OS"
2217 #endif
2218 
2219 
2220 /* Back up guest state to restart a system call. */
2221 
ML_(fixup_guest_state_to_restart_syscall)2222 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
2223 {
2224 #if defined(VGP_x86_linux)
2225    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
2226 
2227    /* Make sure our caller is actually sane, and we're really backing
2228       back over a syscall.
2229 
2230       int $0x80 == CD 80
2231    */
2232    {
2233       UChar *p = (UChar *)arch->vex.guest_EIP;
2234 
2235       if (p[0] != 0xcd || p[1] != 0x80)
2236          VG_(message)(Vg_DebugMsg,
2237                       "?! restarting over syscall at %#x %02x %02x\n",
2238                       arch->vex.guest_EIP, p[0], p[1]);
2239 
2240       vg_assert(p[0] == 0xcd && p[1] == 0x80);
2241    }
2242 
2243 #elif defined(VGP_amd64_linux)
2244    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
2245 
2246    /* Make sure our caller is actually sane, and we're really backing
2247       back over a syscall.
2248 
2249       syscall == 0F 05
2250    */
2251    {
2252       UChar *p = (UChar *)arch->vex.guest_RIP;
2253 
2254       if (p[0] != 0x0F || p[1] != 0x05)
2255          VG_(message)(Vg_DebugMsg,
2256                       "?! restarting over syscall at %#llx %02x %02x\n",
2257                       arch->vex.guest_RIP, p[0], p[1]);
2258 
2259       vg_assert(p[0] == 0x0F && p[1] == 0x05);
2260    }
2261 
2262 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux)
2263    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
2264 
2265    /* Make sure our caller is actually sane, and we're really backing
2266       back over a syscall.
2267 
2268       sc == 44 00 00 02
2269    */
2270    {
2271       UChar *p = (UChar *)arch->vex.guest_CIA;
2272 
2273       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
2274          VG_(message)(Vg_DebugMsg,
2275                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2276                       (ULong)arch->vex.guest_CIA, p[0], p[1], p[2], p[3]);
2277 
2278       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
2279    }
2280 
2281 #elif defined(VGP_ppc64le_linux)
2282    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
2283 
2284    /* Make sure our caller is actually sane, and we're really backing
2285       back over a syscall.
2286 
2287       sc == 44 00 00 02
2288    */
2289    {
2290       UChar *p = (UChar *)arch->vex.guest_CIA;
2291 
2292       if (p[3] != 0x44 || p[2] != 0x0 || p[1] != 0x0 || p[0] != 0x02)
2293          VG_(message)(Vg_DebugMsg,
2294                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2295                       arch->vex.guest_CIA, p[3], p[2], p[1], p[0]);
2296 
2297       vg_assert(p[3] == 0x44 && p[2] == 0x0 && p[1] == 0x0 && p[0] == 0x2);
2298    }
2299 
2300 #elif defined(VGP_arm_linux)
2301    if (arch->vex.guest_R15T & 1) {
2302       // Thumb mode.  SVC is a encoded as
2303       //   1101 1111 imm8
2304       // where imm8 is the SVC number, and we only accept 0.
2305       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
2306       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
2307       Bool   valid = p[0] == 0 && p[1] == 0xDF;
2308       if (!valid) {
2309          VG_(message)(Vg_DebugMsg,
2310                       "?! restarting over (Thumb) syscall that is not syscall "
2311                       "at %#x %02x %02x\n",
2312                       arch->vex.guest_R15T - 1, p[0], p[1]);
2313       }
2314       vg_assert(valid);
2315       // FIXME: NOTE, this really isn't right.  We need to back up
2316       // ITSTATE to what it was before the SVC instruction, but we
2317       // don't know what it was.  At least assert that it is now
2318       // zero, because if it is nonzero then it must also have
2319       // been nonzero for the SVC itself, which means it was
2320       // conditional.  Urk.
2321       vg_assert(arch->vex.guest_ITSTATE == 0);
2322    } else {
2323       // ARM mode.  SVC is encoded as
2324       //   cond 1111 imm24
2325       // where imm24 is the SVC number, and we only accept 0.
2326       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
2327       UChar* p     = (UChar*)arch->vex.guest_R15T;
2328       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
2329                      && (p[3] & 0xF) == 0xF;
2330       if (!valid) {
2331          VG_(message)(Vg_DebugMsg,
2332                       "?! restarting over (ARM) syscall that is not syscall "
2333                       "at %#x %02x %02x %02x %02x\n",
2334                       arch->vex.guest_R15T, p[0], p[1], p[2], p[3]);
2335       }
2336       vg_assert(valid);
2337    }
2338 
2339 #elif defined(VGP_arm64_linux)
2340    arch->vex.guest_PC -= 4;             // sizeof(arm64 instr)
2341 
2342    /* Make sure our caller is actually sane, and we're really backing
2343       back over a syscall.
2344 
2345       svc #0 == d4 00 00 01
2346    */
2347    {
2348       UChar *p = (UChar *)arch->vex.guest_PC;
2349 
2350       if (p[0] != 0x01 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0xD4)
2351          VG_(message)(
2352             Vg_DebugMsg,
2353             "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2354             arch->vex.guest_PC, p[0], p[1], p[2], p[3]
2355           );
2356 
2357       vg_assert(p[0] == 0x01 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0xD4);
2358    }
2359 
2360 #elif defined(VGP_x86_darwin)
2361    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
2362 
2363    /* Make sure our caller is actually sane, and we're really backing
2364       back over a syscall.
2365 
2366       int $0x80 == CD 80  // Used to communicate with BSD syscalls
2367       int $0x81 == CD 81  // Used to communicate with Mach traps
2368       int $0x82 == CD 82  // Used to communicate with "thread" ?
2369       sysenter  == 0F 34  // Used to communicate with Unix syscalls
2370    */
2371    {
2372        UChar *p = (UChar *)arch->vex.guest_EIP;
2373        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
2374                   || (p[0] == 0xCD && p[1] == 0x81)
2375                   || (p[0] == 0xCD && p[1] == 0x82)
2376                   || (p[0] == 0x0F && p[1] == 0x34);
2377        if (!ok)
2378            VG_(message)(Vg_DebugMsg,
2379                         "?! restarting over syscall at %#x %02x %02x\n",
2380                         arch->vex.guest_EIP, p[0], p[1]);
2381        vg_assert(ok);
2382    }
2383 
2384 #elif defined(VGP_amd64_darwin)
2385    arch->vex.guest_RIP = arch->vex.guest_IP_AT_SYSCALL;
2386 
2387    /* Make sure our caller is actually sane, and we're really backing
2388       back over a syscall.
2389 
2390       syscall   == 0F 05
2391    */
2392    {
2393        UChar *p = (UChar *)arch->vex.guest_RIP;
2394 
2395        Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
2396        if (!ok)
2397            VG_(message)(Vg_DebugMsg,
2398                         "?! restarting over syscall at %#llx %02x %02x\n",
2399                         arch->vex.guest_RIP, p[0], p[1]);
2400        vg_assert(ok);
2401    }
2402 
2403 #elif defined(VGP_s390x_linux)
2404    arch->vex.guest_IA -= 2;             // sizeof(syscall)
2405 
2406    /* Make sure our caller is actually sane, and we're really backing
2407       back over a syscall.
2408 
2409       syscall == 0A <num>
2410    */
2411    {
2412       UChar *p = (UChar *)arch->vex.guest_IA;
2413       if (p[0] != 0x0A)
2414          VG_(message)(Vg_DebugMsg,
2415                       "?! restarting over syscall at %#llx %02x %02x\n",
2416                       arch->vex.guest_IA, p[0], p[1]);
2417 
2418       vg_assert(p[0] == 0x0A);
2419    }
2420 
2421 #elif defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
2422 
2423    arch->vex.guest_PC -= 4;             // sizeof(mips instr)
2424 
2425    /* Make sure our caller is actually sane, and we're really backing
2426       back over a syscall.
2427 
2428       syscall == 00 00 00 0C
2429       big endian
2430       syscall == 0C 00 00 00
2431    */
2432    {
2433       UChar *p = (UChar *)(arch->vex.guest_PC);
2434 #     if defined (VG_LITTLEENDIAN)
2435       if (p[0] != 0x0c || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x00)
2436          VG_(message)(Vg_DebugMsg,
2437                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2438                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2439 
2440       vg_assert(p[0] == 0x0c && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x00);
2441 #     elif defined (VG_BIGENDIAN)
2442       if (p[0] != 0x00 || p[1] != 0x00 || p[2] != 0x00 || p[3] != 0x0c)
2443          VG_(message)(Vg_DebugMsg,
2444                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
2445                       (ULong)arch->vex.guest_PC, p[0], p[1], p[2], p[3]);
2446 
2447       vg_assert(p[0] == 0x00 && p[1] == 0x00 && p[2] == 0x00 && p[3] == 0x0c);
2448 #     else
2449 #        error "Unknown endianness"
2450 #     endif
2451    }
2452 
2453 #elif defined(VGP_x86_solaris)
2454    arch->vex.guest_EIP -= 2;   // sizeof(int $0x91) or sizeof(syscall)
2455 
2456    /* Make sure our caller is actually sane, and we're really backing
2457       back over a syscall.
2458 
2459       int $0x91 == CD 91
2460       syscall   == 0F 05
2461       sysenter  == 0F 34
2462 
2463       Handle also other syscall instructions because we also handle them in
2464       the scheduler.
2465       int $0x80 == CD 80
2466       int $0x81 == CD 81
2467       int $0x82 == CD 82
2468    */
2469    {
2470       UChar *p = (UChar *)arch->vex.guest_EIP;
2471 
2472       Bool  ok = (p[0] == 0xCD && p[1] == 0x91)
2473                   || (p[0] == 0x0F && p[1] == 0x05)
2474                   || (p[0] == 0x0F && p[1] == 0x34)
2475                   || (p[0] == 0xCD && p[1] == 0x80)
2476                   || (p[0] == 0xCD && p[1] == 0x81)
2477                   || (p[0] == 0xCD && p[1] == 0x82);
2478       if (!ok)
2479          VG_(message)(Vg_DebugMsg,
2480                       "?! restarting over syscall at %#x %02x %02x\n",
2481                       arch->vex.guest_EIP, p[0], p[1]);
2482       vg_assert(ok);
2483    }
2484 
2485 #elif defined(VGP_amd64_solaris)
2486    arch->vex.guest_RIP -= 2;   // sizeof(syscall)
2487 
2488    /* Make sure our caller is actually sane, and we're really backing
2489       back over a syscall.
2490 
2491       syscall   == 0F 05
2492    */
2493    {
2494       UChar *p = (UChar *)arch->vex.guest_RIP;
2495 
2496       Bool  ok = (p[0] == 0x0F && p[1] == 0x05);
2497       if (!ok)
2498          VG_(message)(Vg_DebugMsg,
2499                       "?! restarting over syscall at %#llx %02x %02x\n",
2500                       arch->vex.guest_RIP, p[0], p[1]);
2501       vg_assert(ok);
2502    }
2503 
2504 #else
2505 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
2506 #endif
2507 }
2508 
2509 
2510 /*
2511    Fix up the guest state when a syscall is interrupted by a signal
2512    and so has been forced to return 'sysret'.
2513 
2514    To do this, we determine the precise state of the syscall by
2515    looking at the (real) IP at the time the signal happened.  The
2516    syscall sequence looks like:
2517 
2518      1. unblock signals
2519      2. perform syscall
2520      3. save result to guest state (EAX, RAX, R3+CR0.SO, R0, V0)
2521      4. re-block signals
2522 
2523    If a signal
2524    happens at      Then     Why?
2525    [1-2)           restart  nothing has happened (restart syscall)
2526    [2]             restart  syscall hasn't started, or kernel wants to restart
2527    [2-3)           save     syscall complete, but results not saved
2528    [3-4)           syscall complete, results saved
2529 
2530    Sometimes we never want to restart an interrupted syscall (because
2531    sigaction says not to), so we only restart if "restart" is True.
2532 
2533    This will also call VG_(post_syscall) if the syscall has actually
2534    completed (either because it was interrupted, or because it
2535    actually finished).  It will not call VG_(post_syscall) if the
2536    syscall is set up for restart, which means that the pre-wrapper may
2537    get called multiple times.
2538 */
2539 
2540 void
VG_(fixup_guest_state_after_syscall_interrupted)2541 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
2542                                                   Addr     ip,
2543                                                   SysRes   sres,
2544                                                   Bool     restart,
2545                                                   struct vki_ucontext *uc)
2546 {
2547    /* Note that we don't know the syscall number here, since (1) in
2548       general there's no reliable way to get hold of it short of
2549       stashing it in the guest state before the syscall, and (2) in
2550       any case we don't need to know it for the actions done by this
2551       routine.
2552 
2553       Furthermore, 'sres' is only used in the case where the syscall
2554       is complete, but the result has not been committed to the guest
2555       state yet.  In any other situation it will be meaningless and
2556       therefore ignored. */
2557 
2558    ThreadState*     tst;
2559    SyscallStatus    canonical;
2560    ThreadArchState* th_regs;
2561    SyscallInfo*     sci;
2562 
2563    /* Compute some Booleans indicating which range we're in. */
2564    Bool outside_range,
2565         in_setup_to_restart,      // [1,2) in the .S files
2566         at_restart,               // [2]   in the .S files
2567         in_complete_to_committed, // [3,4) in the .S files
2568         in_committed_to_finished; // [4,5) in the .S files
2569 
2570    if (VG_(clo_trace_signals))
2571       VG_(message)( Vg_DebugMsg,
2572                     "interrupted_syscall: tid=%u, ip=%#lx, "
2573                     "restart=%s, sres.isErr=%s, sres.val=%lu\n",
2574                     tid,
2575                     ip,
2576                     restart ? "True" : "False",
2577                     sr_isError(sres) ? "True" : "False",
2578                     sr_isError(sres) ? sr_Err(sres) : sr_Res(sres));
2579 
2580    vg_assert(VG_(is_valid_tid)(tid));
2581    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2582    vg_assert(VG_(is_running_thread)(tid));
2583 
2584    tst     = VG_(get_ThreadState)(tid);
2585    th_regs = &tst->arch;
2586    sci     = & syscallInfo[tid];
2587 
2588 #  if defined(VGO_linux)
2589    outside_range
2590       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
2591    in_setup_to_restart
2592       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
2593    at_restart
2594       = ip == ML_(blksys_restart);
2595    in_complete_to_committed
2596       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2597    in_committed_to_finished
2598       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2599 #  elif defined(VGO_darwin)
2600    outside_range
2601       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
2602       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
2603       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
2604    in_setup_to_restart
2605       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
2606       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
2607       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
2608    at_restart
2609       =  (ip == ML_(blksys_restart_MACH))
2610       || (ip == ML_(blksys_restart_MDEP))
2611       || (ip == ML_(blksys_restart_UNIX));
2612    in_complete_to_committed
2613       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
2614       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
2615       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
2616    in_committed_to_finished
2617       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
2618       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
2619       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
2620    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
2621 #  elif defined(VGO_solaris)
2622    /* The solaris port is never outside the range. */
2623    outside_range = False;
2624    /* The Solaris kernel never restarts syscalls directly! */
2625    at_restart = False;
2626    if (tst->os_state.in_door_return) {
2627       vg_assert(ip >= ML_(blksys_setup_DRET)
2628                 && ip < ML_(blksys_finished_DRET));
2629 
2630       in_setup_to_restart
2631          = ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_complete_DRET);
2632       in_complete_to_committed
2633          = ip >= ML_(blksys_complete_DRET) && ip < ML_(blksys_committed_DRET);
2634       in_committed_to_finished
2635          = ip >= ML_(blksys_committed_DRET) && ip < ML_(blksys_finished_DRET);
2636    }
2637    else {
2638       vg_assert(ip >= ML_(blksys_setup) && ip < ML_(blksys_finished));
2639 
2640       in_setup_to_restart
2641          = ip >= ML_(blksys_setup) && ip < ML_(blksys_complete);
2642       in_complete_to_committed
2643          = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
2644       in_committed_to_finished
2645          = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
2646    }
2647 #  else
2648 #    error "Unknown OS"
2649 #  endif
2650 
2651    /* Figure out what the state of the syscall was by examining the
2652       (real) IP at the time of the signal, and act accordingly. */
2653    if (outside_range) {
2654       if (VG_(clo_trace_signals))
2655          VG_(message)( Vg_DebugMsg,
2656                        "  not in syscall at all: hmm, very suspicious\n" );
2657       /* Looks like we weren't in a syscall at all.  Hmm. */
2658       vg_assert(sci->status.what != SsIdle);
2659       return;
2660    }
2661 
2662    /* We should not be here unless this thread had first started up
2663       the machinery for a syscall by calling VG_(client_syscall).
2664       Hence: */
2665    vg_assert(sci->status.what != SsIdle);
2666 
2667    /* now, do one of four fixup actions, depending on where the IP has
2668       got to. */
2669 
2670    if (in_setup_to_restart) {
2671       /* syscall hasn't even started; go around again */
2672       if (VG_(clo_trace_signals))
2673          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
2674       vg_assert(sci->status.what == SsHandToKernel);
2675       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2676    }
2677 
2678    else
2679    if (at_restart) {
2680 #     if defined(VGO_solaris)
2681       /* We should never hit this branch on Solaris, see the comment above. */
2682       vg_assert(0);
2683 #     endif
2684 
2685       /* We're either about to run the syscall, or it was interrupted
2686          and the kernel restarted it.  Restart if asked, otherwise
2687          EINTR it. */
2688       if (restart) {
2689          if (VG_(clo_trace_signals))
2690             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
2691          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2692       } else {
2693          if (VG_(clo_trace_signals))
2694             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
2695          canonical = convert_SysRes_to_SyscallStatus(
2696                         VG_(mk_SysRes_Error)( VKI_EINTR )
2697                      );
2698          if (!(sci->flags & SfNoWriteResult))
2699             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2700          sci->status = canonical;
2701          VG_(post_syscall)(tid);
2702       }
2703    }
2704 
2705    else
2706    if (in_complete_to_committed) {
2707       /* Syscall complete, but result hasn't been written back yet.
2708          Write the SysRes we were supplied with back to the guest
2709          state. */
2710       if (VG_(clo_trace_signals))
2711          VG_(message)( Vg_DebugMsg,
2712                        "  completed, but uncommitted: committing\n");
2713       canonical = convert_SysRes_to_SyscallStatus( sres );
2714       vg_assert(!(sci->flags & SfNoWriteResult));
2715       putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2716 #     if defined(VGO_solaris)
2717       if (tst->os_state.in_door_return) {
2718 #        if defined(VGP_x86_solaris)
2719          /* Registers %esp and %ebp were also modified by the syscall. */
2720          tst->arch.vex.guest_ESP = uc->uc_mcontext.gregs[VKI_UESP];
2721          tst->arch.vex.guest_EBP = uc->uc_mcontext.gregs[VKI_EBP];
2722 #        elif defined(VGP_amd64_solaris)
2723          tst->arch.vex.guest_RSP = uc->uc_mcontext.gregs[VKI_REG_RSP];
2724          tst->arch.vex.guest_RBP = uc->uc_mcontext.gregs[VKI_REG_RBP];
2725 #        endif
2726       }
2727 #     endif
2728       sci->status = canonical;
2729       VG_(post_syscall)(tid);
2730    }
2731 
2732    else
2733    if (in_committed_to_finished) {
2734       /* Result committed, but the signal mask has not been restored;
2735          we expect our caller (the signal handler) will have fixed
2736          this up. */
2737       if (VG_(clo_trace_signals))
2738          VG_(message)( Vg_DebugMsg,
2739                        "  completed and committed: nothing to do\n");
2740 #     if defined(VGP_x86_solaris)
2741       /* The %eax and %edx values are committed but the carry flag is still
2742          uncommitted.  Save it now. */
2743       LibVEX_GuestX86_put_eflag_c(sr_isError(sres), &th_regs->vex);
2744 #     elif defined(VGP_amd64_solaris)
2745       LibVEX_GuestAMD64_put_rflag_c(sr_isError(sres), &th_regs->vex);
2746 #     endif
2747       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2748       vg_assert(sci->status.what == SsComplete);
2749       VG_(post_syscall)(tid);
2750    }
2751 
2752    else
2753       VG_(core_panic)("?? strange syscall interrupt state?");
2754 
2755    /* In all cases, the syscall is now finished (even if we called
2756       ML_(fixup_guest_state_to_restart_syscall), since that just
2757       re-positions the guest's IP for another go at it).  So we need
2758       to record that fact. */
2759    sci->status.what = SsIdle;
2760 }
2761 
2762 
2763 #if defined(VGO_solaris)
2764 /* Returns True if ip is inside a fixable syscall code in syscall-*-*.S.  This
2765    function can be called by a 'non-running' thread! */
VG_(is_ip_in_blocking_syscall)2766 Bool VG_(is_ip_in_blocking_syscall)(ThreadId tid, Addr ip)
2767 {
2768    ThreadState *tst = VG_(get_ThreadState)(tid);
2769 
2770    if (tst->os_state.in_door_return)
2771       return ip >= ML_(blksys_setup_DRET) && ip < ML_(blksys_finished_DRET);
2772    else
2773       return ip >= ML_(blksys_setup) && ip < ML_(blksys_finished);
2774 }
2775 #endif
2776 
2777 
2778 #if defined(VGO_darwin)
2779 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2780 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2781 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2782 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2783 {
2784    ThreadState*     tst;
2785    SyscallInfo*     sci;
2786 
2787    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2788 
2789    PRINT("SYSCALL[%d,%u](%s) workq_ops() starting new workqueue item\n",
2790          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2791 
2792    vg_assert(VG_(is_valid_tid)(tid));
2793    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2794    vg_assert(VG_(is_running_thread)(tid));
2795 
2796    tst     = VG_(get_ThreadState)(tid);
2797    sci     = & syscallInfo[tid];
2798    vg_assert(sci->status.what != SsIdle);
2799    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
2800 
2801    // Pretend the syscall completed normally, but don't touch the thread state.
2802    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2803    sci->flags |= SfNoWriteResult;
2804    VG_(post_syscall)(tid);
2805 
2806    ML_(sync_mappings)("in", "ML_(wqthread_continue_NORETURN)", 0);
2807 
2808    sci->status.what = SsIdle;
2809 
2810    vg_assert(tst->sched_jmpbuf_valid);
2811    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2812 
2813    /* NOTREACHED */
2814    vg_assert(0);
2815 }
2816 #endif
2817 
2818 
2819 /* ---------------------------------------------------------------------
2820    A place to store the where-to-call-when-really-done pointer
2821    ------------------------------------------------------------------ */
2822 
2823 // When the final thread is done, where shall I call to shutdown the
2824 // system cleanly?  Is set once at startup (in m_main) and never
2825 // changes after that.  Is basically a pointer to the exit
2826 // continuation.  This is all just a nasty hack to avoid calling
2827 // directly from m_syswrap to m_main at exit, since that would cause
2828 // m_main to become part of a module cycle, which is silly.
2829 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2830        (ThreadId,VgSchedReturnCode)
2831    = NULL;
2832 
2833 /*--------------------------------------------------------------------*/
2834 /*--- end                                                          ---*/
2835 /*--------------------------------------------------------------------*/
2836