• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Handle system calls.                          syswrap-main.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2011 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "libvex_guest_offsets.h"
32 #include "libvex_trc_values.h"
33 #include "pub_core_basics.h"
34 #include "pub_core_aspacemgr.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_libcbase.h"
40 #include "pub_core_libcassert.h"
41 #include "pub_core_libcprint.h"
42 #include "pub_core_libcproc.h"      // For VG_(getpid)()
43 #include "pub_core_libcsignal.h"
44 #include "pub_core_scheduler.h"     // For VG_({acquire,release}_BigLock),
45                                     //   and VG_(vg_yield)
46 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
47 #include "pub_core_tooliface.h"
48 #include "pub_core_options.h"
49 #include "pub_core_signals.h"       // For VG_SIGVGKILL, VG_(poll_signals)
50 #include "pub_core_syscall.h"
51 #include "pub_core_machine.h"
52 #include "pub_core_syswrap.h"
53 
54 #include "priv_types_n_macros.h"
55 #include "priv_syswrap-main.h"
56 
57 #if defined(VGO_darwin)
58 #include "priv_syswrap-darwin.h"
59 #endif
60 
61 /* Useful info which needs to be recorded somewhere:
62    Use of registers in syscalls is:
63 
64           NUM   ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8 RESULT
65    LINUX:
66    x86    eax   ebx  ecx  edx  esi  edi  ebp  n/a  n/a  eax       (== NUM)
67    amd64  rax   rdi  rsi  rdx  r10  r8   r9   n/a  n/a  rax       (== NUM)
68    ppc32  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
69    ppc64  r0    r3   r4   r5   r6   r7   r8   n/a  n/a  r3+CR0.SO (== ARG1)
70    arm    r7    r0   r1   r2   r3   r4   r5   n/a  n/a  r0        (== ARG1)
71 
72    On s390x the svc instruction is used for system calls. The system call
73    number is encoded in the instruction (8 bit immediate field). Since Linux
74    2.6 it is also allowed to use svc 0 with the system call number in r1.
75    This was introduced for system calls >255, but works for all. It is
76    also possible to see the svc 0 together with an EXecute instruction, that
77    fills in the immediate field.
78    s390x r1/SVC r2   r3   r4   r5   r6   r7   n/a  n/a  r2        (== ARG1)
79 
80    DARWIN:
81    x86    eax +4   +8   +12  +16  +20  +24  +28  +32  edx:eax, eflags.c
82    amd64  rax rdi  rsi  rdx  rcx  r8   r9   +8   +16  rdx:rax, rflags.c
83 
84    For x86-darwin, "+N" denotes "in memory at N(%esp)"; ditto
85    amd64-darwin.  Apparently 0(%esp) is some kind of return address
86    (perhaps for syscalls done with "sysenter"?)  I don't think it is
87    relevant for syscalls done with "int $0x80/1/2".
88 */
89 
90 /* This is the top level of the system-call handler module.  All
91    system calls are channelled through here, doing two things:
92 
93    * notify the tool of the events (mem/reg reads, writes) happening
94 
95    * perform the syscall, usually by passing it along to the kernel
96      unmodified.
97 
98    A magical piece of assembly code, do_syscall_for_client_WRK, in
99    syscall-$PLATFORM.S does the tricky bit of passing a syscall to the
100    kernel, whilst having the simulator retain control.
101 */
102 
103 /* The main function is VG_(client_syscall).  The simulation calls it
104    whenever a client thread wants to do a syscall.  The following is a
105    sketch of what it does.
106 
107    * Ensures the root thread's stack is suitably mapped.  Tedious and
108      arcane.  See big big comment in VG_(client_syscall).
109 
110    * First, it rounds up the syscall number and args (which is a
111      platform dependent activity) and puts them in a struct ("args")
112      and also a copy in "orig_args".
113 
114      The pre/post wrappers refer to these structs and so no longer
115      need magic macros to access any specific registers.  This struct
116      is stored in thread-specific storage.
117 
118 
119    * The pre-wrapper is called, passing it a pointer to struct
120      "args".
121 
122 
123    * The pre-wrapper examines the args and pokes the tool
124      appropriately.  It may modify the args; this is why "orig_args"
125      is also stored.
126 
127      The pre-wrapper may choose to 'do' the syscall itself, and
128      concludes one of three outcomes:
129 
130        Success(N)    -- syscall is already complete, with success;
131                         result is N
132 
133        Fail(N)       -- syscall is already complete, with failure;
134                         error code is N
135 
136        HandToKernel  -- (the usual case): this needs to be given to
137                         the kernel to be done, using the values in
138                         the possibly-modified "args" struct.
139 
140      In addition, the pre-wrapper may set some flags:
141 
142        MayBlock   -- only applicable when outcome==HandToKernel
143 
144        PostOnFail -- only applicable when outcome==HandToKernel or Fail
145 
146 
147    * If the pre-outcome is HandToKernel, the syscall is duly handed
148      off to the kernel (perhaps involving some thread switchery, but
149      that's not important).  This reduces the possible set of outcomes
150      to either Success(N) or Fail(N).
151 
152 
153    * The outcome (Success(N) or Fail(N)) is written back to the guest
154      register(s).  This is platform specific:
155 
156      x86:    Success(N) ==>  eax = N
157              Fail(N)    ==>  eax = -N
158 
159      ditto amd64
160 
161      ppc32:  Success(N) ==>  r3 = N, CR0.SO = 0
162              Fail(N) ==>     r3 = N, CR0.SO = 1
163 
164      Darwin:
165      x86:    Success(N) ==>  edx:eax = N, cc = 0
166              Fail(N)    ==>  edx:eax = N, cc = 1
167 
168      s390x:  Success(N) ==>  r2 = N
169              Fail(N)    ==>  r2 = -N
170 
171    * The post wrapper is called if:
172 
173      - it exists, and
174      - outcome==Success or (outcome==Fail and PostOnFail is set)
175 
176      The post wrapper is passed the adulterated syscall args (struct
177      "args"), and the syscall outcome (viz, Success(N) or Fail(N)).
178 
179    There are several other complications, primarily to do with
180    syscalls getting interrupted, explained in comments in the code.
181 */
182 
183 /* CAVEATS for writing wrappers.  It is important to follow these!
184 
185    The macros defined in priv_types_n_macros.h are designed to help
186    decouple the wrapper logic from the actual representation of
187    syscall args/results, since these wrappers are designed to work on
188    multiple platforms.
189 
190    Sometimes a PRE wrapper will complete the syscall itself, without
191    handing it to the kernel.  It will use one of SET_STATUS_Success,
192    SET_STATUS_Failure or SET_STATUS_from_SysRes to set the return
193    value.  It is critical to appreciate that use of the macro does not
194    immediately cause the underlying guest state to be updated -- that
195    is done by the driver logic in this file, when the wrapper returns.
196 
197    As a result, PRE wrappers of the following form will malfunction:
198 
199    PRE(fooble)
200    {
201       ... do stuff ...
202       SET_STATUS_Somehow(...)
203 
204       // do something that assumes guest state is up to date
205    }
206 
207    In particular, direct or indirect calls to VG_(poll_signals) after
208    setting STATUS can cause the guest state to be read (in order to
209    build signal frames).  Do not do this.  If you want a signal poll
210    after the syscall goes through, do "*flags |= SfPollAfter" and the
211    driver logic will do it for you.
212 
213    -----------
214 
215    Another critical requirement following introduction of new address
216    space manager (JRS, 20050923):
217 
218    In a situation where the mappedness of memory has changed, aspacem
219    should be notified BEFORE the tool.  Hence the following is
220    correct:
221 
222       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
223       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
224       if (d)
225          VG_(discard_translations)(s->start, s->end+1 - s->start);
226 
227    whilst this is wrong:
228 
229       VG_TRACK( die_mem_munmap, s->start, s->end+1 - s->start );
230       Bool d = VG_(am_notify_munmap)(s->start, s->end+1 - s->start);
231       if (d)
232          VG_(discard_translations)(s->start, s->end+1 - s->start);
233 
234    The reason is that the tool may itself ask aspacem for more shadow
235    memory as a result of the VG_TRACK call.  In such a situation it is
236    critical that aspacem's segment array is up to date -- hence the
237    need to notify aspacem first.
238 
239    -----------
240 
241    Also .. take care to call VG_(discard_translations) whenever
242    memory with execute permissions is unmapped.
243 */
244 
245 
246 /* ---------------------------------------------------------------------
247    Do potentially blocking syscall for the client, and mess with
248    signal masks at the same time.
249    ------------------------------------------------------------------ */
250 
251 /* Perform a syscall on behalf of a client thread, using a specific
252    signal mask.  On completion, the signal mask is set to restore_mask
253    (which presumably blocks almost everything).  If a signal happens
254    during the syscall, the handler should call
255    VG_(fixup_guest_state_after_syscall_interrupted) to adjust the
256    thread's context to do the right thing.
257 
258    The _WRK function is handwritten assembly, implemented per-platform
259    in coregrind/m_syswrap/syscall-$PLAT.S.  It has some very magic
260    properties.  See comments at the top of
261    VG_(fixup_guest_state_after_syscall_interrupted) below for details.
262 
263    This function (these functions) are required to return zero in case
264    of success (even if the syscall itself failed), and nonzero if the
265    sigprocmask-swizzling calls failed.  We don't actually care about
266    the failure values from sigprocmask, although most of the assembly
267    implementations do attempt to return that, using the convention
268    0 for success, or 0x8000 | error-code for failure.
269 */
270 #if defined(VGO_linux)
271 extern
272 UWord ML_(do_syscall_for_client_WRK)( Word syscallno,
273                                       void* guest_state,
274                                       const vki_sigset_t *syscall_mask,
275                                       const vki_sigset_t *restore_mask,
276                                       Word sigsetSzB );
277 #elif defined(VGO_darwin)
278 extern
279 UWord ML_(do_syscall_for_client_unix_WRK)( Word syscallno,
280                                            void* guest_state,
281                                            const vki_sigset_t *syscall_mask,
282                                            const vki_sigset_t *restore_mask,
283                                            Word sigsetSzB ); /* unused */
284 extern
285 UWord ML_(do_syscall_for_client_mach_WRK)( Word syscallno,
286                                            void* guest_state,
287                                            const vki_sigset_t *syscall_mask,
288                                            const vki_sigset_t *restore_mask,
289                                            Word sigsetSzB ); /* unused */
290 extern
291 UWord ML_(do_syscall_for_client_mdep_WRK)( Word syscallno,
292                                            void* guest_state,
293                                            const vki_sigset_t *syscall_mask,
294                                            const vki_sigset_t *restore_mask,
295                                            Word sigsetSzB ); /* unused */
296 #else
297 #  error "Unknown OS"
298 #endif
299 
300 
301 static
do_syscall_for_client(Int syscallno,ThreadState * tst,const vki_sigset_t * syscall_mask)302 void do_syscall_for_client ( Int syscallno,
303                              ThreadState* tst,
304                              const vki_sigset_t* syscall_mask )
305 {
306    vki_sigset_t saved;
307    UWord err;
308 #  if defined(VGO_linux)
309    err = ML_(do_syscall_for_client_WRK)(
310             syscallno, &tst->arch.vex,
311             syscall_mask, &saved, sizeof(vki_sigset_t)
312          );
313 #  elif defined(VGO_darwin)
314    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
315       case VG_DARWIN_SYSCALL_CLASS_UNIX:
316          err = ML_(do_syscall_for_client_unix_WRK)(
317                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
318                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
319                );
320          break;
321       case VG_DARWIN_SYSCALL_CLASS_MACH:
322          err = ML_(do_syscall_for_client_mach_WRK)(
323                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
324                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
325                );
326          break;
327       case VG_DARWIN_SYSCALL_CLASS_MDEP:
328          err = ML_(do_syscall_for_client_mdep_WRK)(
329                   VG_DARWIN_SYSNO_FOR_KERNEL(syscallno), &tst->arch.vex,
330                   syscall_mask, &saved, 0/*unused:sigsetSzB*/
331                );
332          break;
333       default:
334          vg_assert(0);
335          /*NOTREACHED*/
336          break;
337    }
338 #  else
339 #    error "Unknown OS"
340 #  endif
341    vg_assert2(
342       err == 0,
343       "ML_(do_syscall_for_client_WRK): sigprocmask error %d",
344       (Int)(err & 0xFFF)
345    );
346 }
347 
348 
349 /* ---------------------------------------------------------------------
350    Impedance matchers and misc helpers
351    ------------------------------------------------------------------ */
352 
353 static
eq_SyscallArgs(SyscallArgs * a1,SyscallArgs * a2)354 Bool eq_SyscallArgs ( SyscallArgs* a1, SyscallArgs* a2 )
355 {
356    return a1->sysno == a2->sysno
357           && a1->arg1 == a2->arg1
358           && a1->arg2 == a2->arg2
359           && a1->arg3 == a2->arg3
360           && a1->arg4 == a2->arg4
361           && a1->arg5 == a2->arg5
362           && a1->arg6 == a2->arg6
363           && a1->arg7 == a2->arg7
364           && a1->arg8 == a2->arg8;
365 }
366 
367 static
eq_SyscallStatus(SyscallStatus * s1,SyscallStatus * s2)368 Bool eq_SyscallStatus ( SyscallStatus* s1, SyscallStatus* s2 )
369 {
370    /* was: return s1->what == s2->what && sr_EQ( s1->sres, s2->sres ); */
371    if (s1->what == s2->what && sr_EQ( s1->sres, s2->sres ))
372       return True;
373 #  if defined(VGO_darwin)
374    /* Darwin-specific debugging guff */
375    vg_assert(s1->what == s2->what);
376    VG_(printf)("eq_SyscallStatus:\n");
377    VG_(printf)("  {%lu %lu %u}\n", s1->sres._wLO, s1->sres._wHI, s1->sres._mode);
378    VG_(printf)("  {%lu %lu %u}\n", s2->sres._wLO, s2->sres._wHI, s2->sres._mode);
379    vg_assert(0);
380 #  endif
381    return False;
382 }
383 
384 /* Convert between SysRes and SyscallStatus, to the extent possible. */
385 
386 static
convert_SysRes_to_SyscallStatus(SysRes res)387 SyscallStatus convert_SysRes_to_SyscallStatus ( SysRes res )
388 {
389    SyscallStatus status;
390    status.what = SsComplete;
391    status.sres = res;
392    return status;
393 }
394 
395 
396 /* Impedance matchers.  These convert syscall arg or result data from
397    the platform-specific in-guest-state format to the canonical
398    formats, and back. */
399 
400 static
getSyscallArgsFromGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla,UInt trc)401 void getSyscallArgsFromGuestState ( /*OUT*/SyscallArgs*       canonical,
402                                     /*IN*/ VexGuestArchState* gst_vanilla,
403                                     /*IN*/ UInt trc )
404 {
405 #if defined(VGP_x86_linux)
406    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
407    canonical->sysno = gst->guest_EAX;
408    canonical->arg1  = gst->guest_EBX;
409    canonical->arg2  = gst->guest_ECX;
410    canonical->arg3  = gst->guest_EDX;
411    canonical->arg4  = gst->guest_ESI;
412    canonical->arg5  = gst->guest_EDI;
413    canonical->arg6  = gst->guest_EBP;
414    canonical->arg7  = 0;
415    canonical->arg8  = 0;
416 
417 #elif defined(VGP_amd64_linux)
418    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
419    canonical->sysno = gst->guest_RAX;
420    canonical->arg1  = gst->guest_RDI;
421    canonical->arg2  = gst->guest_RSI;
422    canonical->arg3  = gst->guest_RDX;
423    canonical->arg4  = gst->guest_R10;
424    canonical->arg5  = gst->guest_R8;
425    canonical->arg6  = gst->guest_R9;
426    canonical->arg7  = 0;
427    canonical->arg8  = 0;
428 
429 #elif defined(VGP_ppc32_linux)
430    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
431    canonical->sysno = gst->guest_GPR0;
432    canonical->arg1  = gst->guest_GPR3;
433    canonical->arg2  = gst->guest_GPR4;
434    canonical->arg3  = gst->guest_GPR5;
435    canonical->arg4  = gst->guest_GPR6;
436    canonical->arg5  = gst->guest_GPR7;
437    canonical->arg6  = gst->guest_GPR8;
438    canonical->arg7  = 0;
439    canonical->arg8  = 0;
440 
441 #elif defined(VGP_ppc64_linux)
442    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
443    canonical->sysno = gst->guest_GPR0;
444    canonical->arg1  = gst->guest_GPR3;
445    canonical->arg2  = gst->guest_GPR4;
446    canonical->arg3  = gst->guest_GPR5;
447    canonical->arg4  = gst->guest_GPR6;
448    canonical->arg5  = gst->guest_GPR7;
449    canonical->arg6  = gst->guest_GPR8;
450    canonical->arg7  = 0;
451    canonical->arg8  = 0;
452 
453 #elif defined(VGP_arm_linux)
454    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
455    canonical->sysno = gst->guest_R7;
456    canonical->arg1  = gst->guest_R0;
457    canonical->arg2  = gst->guest_R1;
458    canonical->arg3  = gst->guest_R2;
459    canonical->arg4  = gst->guest_R3;
460    canonical->arg5  = gst->guest_R4;
461    canonical->arg6  = gst->guest_R5;
462    canonical->arg7  = 0;
463    canonical->arg8  = 0;
464 
465 #elif defined(VGP_x86_darwin)
466    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
467    UWord *stack = (UWord *)gst->guest_ESP;
468    // GrP fixme hope syscalls aren't called with really shallow stacks...
469    canonical->sysno = gst->guest_EAX;
470    if (canonical->sysno != 0) {
471       // stack[0] is return address
472       canonical->arg1  = stack[1];
473       canonical->arg2  = stack[2];
474       canonical->arg3  = stack[3];
475       canonical->arg4  = stack[4];
476       canonical->arg5  = stack[5];
477       canonical->arg6  = stack[6];
478       canonical->arg7  = stack[7];
479       canonical->arg8  = stack[8];
480    } else {
481       // GrP fixme hack handle syscall()
482       // GrP fixme what about __syscall() ?
483       // stack[0] is return address
484       // DDD: the tool can't see that the params have been shifted!  Can
485       //      lead to incorrect checking, I think, because the PRRAn/PSARn
486       //      macros will mention the pre-shifted args.
487       canonical->sysno = stack[1];
488       vg_assert(canonical->sysno != 0);
489       canonical->arg1  = stack[2];
490       canonical->arg2  = stack[3];
491       canonical->arg3  = stack[4];
492       canonical->arg4  = stack[5];
493       canonical->arg5  = stack[6];
494       canonical->arg6  = stack[7];
495       canonical->arg7  = stack[8];
496       canonical->arg8  = stack[9];
497 
498       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
499             VG_(getpid)(), /*tid,*/
500             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
501    }
502 
503    // Here we determine what kind of syscall it was by looking at the
504    // interrupt kind, and then encode the syscall number using the 64-bit
505    // encoding for Valgrind's internal use.
506    //
507    // DDD: Would it be better to stash the JMP kind into the Darwin
508    // thread state rather than passing in the trc?
509    switch (trc) {
510    case VEX_TRC_JMP_SYS_INT128:
511       // int $0x80 = Unix, 64-bit result
512       vg_assert(canonical->sysno >= 0);
513       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno);
514       break;
515    case VEX_TRC_JMP_SYS_SYSENTER:
516       // syscall = Unix, 32-bit result
517       // OR        Mach, 32-bit result
518       if (canonical->sysno >= 0) {
519          // GrP fixme hack:  0xffff == I386_SYSCALL_NUMBER_MASK
520          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(canonical->sysno
521                                                              & 0xffff);
522       } else {
523          canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
524       }
525       break;
526    case VEX_TRC_JMP_SYS_INT129:
527       // int $0x81 = Mach, 32-bit result
528       vg_assert(canonical->sysno < 0);
529       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MACH(-canonical->sysno);
530       break;
531    case VEX_TRC_JMP_SYS_INT130:
532       // int $0x82 = mdep, 32-bit result
533       vg_assert(canonical->sysno >= 0);
534       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_MDEP(canonical->sysno);
535       break;
536    default:
537       vg_assert(0);
538       break;
539    }
540 
541 #elif defined(VGP_amd64_darwin)
542    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
543    UWord *stack = (UWord *)gst->guest_RSP;
544 
545    vg_assert(trc == VEX_TRC_JMP_SYS_SYSCALL);
546 
547    // GrP fixme hope syscalls aren't called with really shallow stacks...
548    canonical->sysno = gst->guest_RAX;
549    if (canonical->sysno != __NR_syscall) {
550       // stack[0] is return address
551       canonical->arg1  = gst->guest_RDI;
552       canonical->arg2  = gst->guest_RSI;
553       canonical->arg3  = gst->guest_RDX;
554       canonical->arg4  = gst->guest_R10;  // not rcx with syscall insn
555       canonical->arg5  = gst->guest_R8;
556       canonical->arg6  = gst->guest_R9;
557       canonical->arg7  = stack[1];
558       canonical->arg8  = stack[2];
559    } else {
560       // GrP fixme hack handle syscall()
561       // GrP fixme what about __syscall() ?
562       // stack[0] is return address
563       // DDD: the tool can't see that the params have been shifted!  Can
564       //      lead to incorrect checking, I think, because the PRRAn/PSARn
565       //      macros will mention the pre-shifted args.
566       canonical->sysno = VG_DARWIN_SYSCALL_CONSTRUCT_UNIX(gst->guest_RDI);
567       vg_assert(canonical->sysno != __NR_syscall);
568       canonical->arg1  = gst->guest_RSI;
569       canonical->arg2  = gst->guest_RDX;
570       canonical->arg3  = gst->guest_R10;  // not rcx with syscall insn
571       canonical->arg4  = gst->guest_R8;
572       canonical->arg5  = gst->guest_R9;
573       canonical->arg6  = stack[1];
574       canonical->arg7  = stack[2];
575       canonical->arg8  = stack[3];
576 
577       PRINT("SYSCALL[%d,?](%s) syscall(%s, ...); please stand by...\n",
578             VG_(getpid)(), /*tid,*/
579             VG_SYSNUM_STRING(0), VG_SYSNUM_STRING(canonical->sysno));
580    }
581 
582    // no canonical->sysno adjustment needed
583 
584 #elif defined(VGP_s390x_linux)
585    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
586    canonical->sysno = gst->guest_SYSNO;
587    canonical->arg1  = gst->guest_r2;
588    canonical->arg2  = gst->guest_r3;
589    canonical->arg3  = gst->guest_r4;
590    canonical->arg4  = gst->guest_r5;
591    canonical->arg5  = gst->guest_r6;
592    canonical->arg6  = gst->guest_r7;
593    canonical->arg7  = 0;
594    canonical->arg8  = 0;
595 #else
596 #  error "getSyscallArgsFromGuestState: unknown arch"
597 #endif
598 }
599 
600 static
putSyscallArgsIntoGuestState(SyscallArgs * canonical,VexGuestArchState * gst_vanilla)601 void putSyscallArgsIntoGuestState ( /*IN*/ SyscallArgs*       canonical,
602                                     /*OUT*/VexGuestArchState* gst_vanilla )
603 {
604 #if defined(VGP_x86_linux)
605    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
606    gst->guest_EAX = canonical->sysno;
607    gst->guest_EBX = canonical->arg1;
608    gst->guest_ECX = canonical->arg2;
609    gst->guest_EDX = canonical->arg3;
610    gst->guest_ESI = canonical->arg4;
611    gst->guest_EDI = canonical->arg5;
612    gst->guest_EBP = canonical->arg6;
613 
614 #elif defined(VGP_amd64_linux)
615    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
616    gst->guest_RAX = canonical->sysno;
617    gst->guest_RDI = canonical->arg1;
618    gst->guest_RSI = canonical->arg2;
619    gst->guest_RDX = canonical->arg3;
620    gst->guest_R10 = canonical->arg4;
621    gst->guest_R8  = canonical->arg5;
622    gst->guest_R9  = canonical->arg6;
623 
624 #elif defined(VGP_ppc32_linux)
625    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
626    gst->guest_GPR0 = canonical->sysno;
627    gst->guest_GPR3 = canonical->arg1;
628    gst->guest_GPR4 = canonical->arg2;
629    gst->guest_GPR5 = canonical->arg3;
630    gst->guest_GPR6 = canonical->arg4;
631    gst->guest_GPR7 = canonical->arg5;
632    gst->guest_GPR8 = canonical->arg6;
633 
634 #elif defined(VGP_ppc64_linux)
635    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
636    gst->guest_GPR0 = canonical->sysno;
637    gst->guest_GPR3 = canonical->arg1;
638    gst->guest_GPR4 = canonical->arg2;
639    gst->guest_GPR5 = canonical->arg3;
640    gst->guest_GPR6 = canonical->arg4;
641    gst->guest_GPR7 = canonical->arg5;
642    gst->guest_GPR8 = canonical->arg6;
643 
644 #elif defined(VGP_arm_linux)
645    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
646    gst->guest_R7 = canonical->sysno;
647    gst->guest_R0 = canonical->arg1;
648    gst->guest_R1 = canonical->arg2;
649    gst->guest_R2 = canonical->arg3;
650    gst->guest_R3 = canonical->arg4;
651    gst->guest_R4 = canonical->arg5;
652    gst->guest_R5 = canonical->arg6;
653 
654 #elif defined(VGP_x86_darwin)
655    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
656    UWord *stack = (UWord *)gst->guest_ESP;
657 
658    gst->guest_EAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
659 
660    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
661    // stack[0] is return address
662    stack[1] = canonical->arg1;
663    stack[2] = canonical->arg2;
664    stack[3] = canonical->arg3;
665    stack[4] = canonical->arg4;
666    stack[5] = canonical->arg5;
667    stack[6] = canonical->arg6;
668    stack[7] = canonical->arg7;
669    stack[8] = canonical->arg8;
670 
671 #elif defined(VGP_amd64_darwin)
672    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
673    UWord *stack = (UWord *)gst->guest_RSP;
674 
675    gst->guest_RAX = VG_DARWIN_SYSNO_FOR_KERNEL(canonical->sysno);
676    // GrP fixme? gst->guest_TEMP_EFLAG_C = 0;
677 
678    // stack[0] is return address
679    gst->guest_RDI = canonical->arg1;
680    gst->guest_RSI = canonical->arg2;
681    gst->guest_RDX = canonical->arg3;
682    gst->guest_RCX = canonical->arg4;
683    gst->guest_R8  = canonical->arg5;
684    gst->guest_R9  = canonical->arg6;
685    stack[1]       = canonical->arg7;
686    stack[2]       = canonical->arg8;
687 
688 #elif defined(VGP_s390x_linux)
689    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
690    gst->guest_SYSNO  = canonical->sysno;
691    gst->guest_r2     = canonical->arg1;
692    gst->guest_r3     = canonical->arg2;
693    gst->guest_r4     = canonical->arg3;
694    gst->guest_r5     = canonical->arg4;
695    gst->guest_r6     = canonical->arg5;
696    gst->guest_r7     = canonical->arg6;
697 
698 #else
699 #  error "putSyscallArgsIntoGuestState: unknown arch"
700 #endif
701 }
702 
703 static
getSyscallStatusFromGuestState(SyscallStatus * canonical,VexGuestArchState * gst_vanilla)704 void getSyscallStatusFromGuestState ( /*OUT*/SyscallStatus*     canonical,
705                                       /*IN*/ VexGuestArchState* gst_vanilla )
706 {
707 #  if defined(VGP_x86_linux)
708    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
709    canonical->sres = VG_(mk_SysRes_x86_linux)( gst->guest_EAX );
710    canonical->what = SsComplete;
711 
712 #  elif defined(VGP_amd64_linux)
713    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
714    canonical->sres = VG_(mk_SysRes_amd64_linux)( gst->guest_RAX );
715    canonical->what = SsComplete;
716 
717 #  elif defined(VGP_ppc32_linux)
718    VexGuestPPC32State* gst   = (VexGuestPPC32State*)gst_vanilla;
719    UInt                cr    = LibVEX_GuestPPC32_get_CR( gst );
720    UInt                cr0so = (cr >> 28) & 1;
721    canonical->sres = VG_(mk_SysRes_ppc32_linux)( gst->guest_GPR3, cr0so );
722    canonical->what = SsComplete;
723 
724 #  elif defined(VGP_ppc64_linux)
725    VexGuestPPC64State* gst   = (VexGuestPPC64State*)gst_vanilla;
726    UInt                cr    = LibVEX_GuestPPC64_get_CR( gst );
727    UInt                cr0so = (cr >> 28) & 1;
728    canonical->sres = VG_(mk_SysRes_ppc64_linux)( gst->guest_GPR3, cr0so );
729    canonical->what = SsComplete;
730 
731 #  elif defined(VGP_arm_linux)
732    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
733    canonical->sres = VG_(mk_SysRes_arm_linux)( gst->guest_R0 );
734    canonical->what = SsComplete;
735 
736 #  elif defined(VGP_x86_darwin)
737    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
738    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
739    UInt carry = 1 & LibVEX_GuestX86_get_eflags(gst);
740    UInt err = 0;
741    UInt wLO = 0;
742    UInt wHI = 0;
743    switch (gst->guest_SC_CLASS) {
744       case VG_DARWIN_SYSCALL_CLASS_UNIX:
745          // int $0x80 = Unix, 64-bit result
746          err = carry;
747          wLO = gst->guest_EAX;
748          wHI = gst->guest_EDX;
749          break;
750       case VG_DARWIN_SYSCALL_CLASS_MACH:
751          // int $0x81 = Mach, 32-bit result
752          wLO = gst->guest_EAX;
753          break;
754       case VG_DARWIN_SYSCALL_CLASS_MDEP:
755          // int $0x82 = mdep, 32-bit result
756          wLO = gst->guest_EAX;
757          break;
758       default:
759          vg_assert(0);
760          break;
761    }
762    canonical->sres = VG_(mk_SysRes_x86_darwin)(
763                         gst->guest_SC_CLASS, err ? True : False,
764                         wHI, wLO
765                      );
766    canonical->what = SsComplete;
767 
768 #  elif defined(VGP_amd64_darwin)
769    /* duplicates logic in m_signals.VG_UCONTEXT_SYSCALL_SYSRES */
770    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
771    ULong carry = 1 & LibVEX_GuestAMD64_get_rflags(gst);
772    ULong err = 0;
773    ULong wLO = 0;
774    ULong wHI = 0;
775    switch (gst->guest_SC_CLASS) {
776       case VG_DARWIN_SYSCALL_CLASS_UNIX:
777          // syscall = Unix, 128-bit result
778          err = carry;
779          wLO = gst->guest_RAX;
780          wHI = gst->guest_RDX;
781          break;
782       case VG_DARWIN_SYSCALL_CLASS_MACH:
783          // syscall = Mach, 64-bit result
784          wLO = gst->guest_RAX;
785          break;
786       case VG_DARWIN_SYSCALL_CLASS_MDEP:
787          // syscall = mdep, 64-bit result
788          wLO = gst->guest_RAX;
789          break;
790       default:
791          vg_assert(0);
792          break;
793    }
794    canonical->sres = VG_(mk_SysRes_amd64_darwin)(
795                         gst->guest_SC_CLASS, err ? True : False,
796                         wHI, wLO
797                      );
798    canonical->what = SsComplete;
799 
800 #  elif defined(VGP_s390x_linux)
801    VexGuestS390XState* gst   = (VexGuestS390XState*)gst_vanilla;
802    canonical->sres = VG_(mk_SysRes_s390x_linux)( gst->guest_r2 );
803    canonical->what = SsComplete;
804 
805 #  else
806 #    error "getSyscallStatusFromGuestState: unknown arch"
807 #  endif
808 }
809 
810 static
putSyscallStatusIntoGuestState(ThreadId tid,SyscallStatus * canonical,VexGuestArchState * gst_vanilla)811 void putSyscallStatusIntoGuestState ( /*IN*/ ThreadId tid,
812                                       /*IN*/ SyscallStatus*     canonical,
813                                       /*OUT*/VexGuestArchState* gst_vanilla )
814 {
815 #  if defined(VGP_x86_linux)
816    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
817    vg_assert(canonical->what == SsComplete);
818    if (sr_isError(canonical->sres)) {
819       /* This isn't exactly right, in that really a Failure with res
820          not in the range 1 .. 4095 is unrepresentable in the
821          Linux-x86 scheme.  Oh well. */
822       gst->guest_EAX = - (Int)sr_Err(canonical->sres);
823    } else {
824       gst->guest_EAX = sr_Res(canonical->sres);
825    }
826    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
827              OFFSET_x86_EAX, sizeof(UWord) );
828 
829 #  elif defined(VGP_amd64_linux)
830    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
831    vg_assert(canonical->what == SsComplete);
832    if (sr_isError(canonical->sres)) {
833       /* This isn't exactly right, in that really a Failure with res
834          not in the range 1 .. 4095 is unrepresentable in the
835          Linux-amd64 scheme.  Oh well. */
836       gst->guest_RAX = - (Long)sr_Err(canonical->sres);
837    } else {
838       gst->guest_RAX = sr_Res(canonical->sres);
839    }
840    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
841              OFFSET_amd64_RAX, sizeof(UWord) );
842 
843 #  elif defined(VGP_ppc32_linux)
844    VexGuestPPC32State* gst = (VexGuestPPC32State*)gst_vanilla;
845    UInt old_cr = LibVEX_GuestPPC32_get_CR(gst);
846    vg_assert(canonical->what == SsComplete);
847    if (sr_isError(canonical->sres)) {
848       /* set CR0.SO */
849       LibVEX_GuestPPC32_put_CR( old_cr | (1<<28), gst );
850       gst->guest_GPR3 = sr_Err(canonical->sres);
851    } else {
852       /* clear CR0.SO */
853       LibVEX_GuestPPC32_put_CR( old_cr & ~(1<<28), gst );
854       gst->guest_GPR3 = sr_Res(canonical->sres);
855    }
856    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
857              OFFSET_ppc32_GPR3, sizeof(UWord) );
858    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
859              OFFSET_ppc32_CR0_0, sizeof(UChar) );
860 
861 #  elif defined(VGP_ppc64_linux)
862    VexGuestPPC64State* gst = (VexGuestPPC64State*)gst_vanilla;
863    UInt old_cr = LibVEX_GuestPPC64_get_CR(gst);
864    vg_assert(canonical->what == SsComplete);
865    if (sr_isError(canonical->sres)) {
866       /* set CR0.SO */
867       LibVEX_GuestPPC64_put_CR( old_cr | (1<<28), gst );
868       gst->guest_GPR3 = sr_Err(canonical->sres);
869    } else {
870       /* clear CR0.SO */
871       LibVEX_GuestPPC64_put_CR( old_cr & ~(1<<28), gst );
872       gst->guest_GPR3 = sr_Res(canonical->sres);
873    }
874    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
875              OFFSET_ppc64_GPR3, sizeof(UWord) );
876    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
877              OFFSET_ppc64_CR0_0, sizeof(UChar) );
878 
879 #  elif defined(VGP_arm_linux)
880    VexGuestARMState* gst = (VexGuestARMState*)gst_vanilla;
881    vg_assert(canonical->what == SsComplete);
882    if (sr_isError(canonical->sres)) {
883       /* This isn't exactly right, in that really a Failure with res
884          not in the range 1 .. 4095 is unrepresentable in the
885          Linux-arm scheme.  Oh well. */
886       gst->guest_R0 = - (Int)sr_Err(canonical->sres);
887    } else {
888       gst->guest_R0 = sr_Res(canonical->sres);
889    }
890    VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
891              OFFSET_arm_R0, sizeof(UWord) );
892 
893 #elif defined(VGP_x86_darwin)
894    VexGuestX86State* gst = (VexGuestX86State*)gst_vanilla;
895    SysRes sres = canonical->sres;
896    vg_assert(canonical->what == SsComplete);
897    /* Unfortunately here we have to break abstraction and look
898       directly inside 'res', in order to decide what to do. */
899    switch (sres._mode) {
900       case SysRes_MACH: // int $0x81 = Mach, 32-bit result
901       case SysRes_MDEP: // int $0x82 = mdep, 32-bit result
902          gst->guest_EAX = sres._wLO;
903          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
904                    OFFSET_x86_EAX, sizeof(UInt) );
905          break;
906       case SysRes_UNIX_OK:  // int $0x80 = Unix, 64-bit result
907       case SysRes_UNIX_ERR: // int $0x80 = Unix, 64-bit error
908          gst->guest_EAX = sres._wLO;
909          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
910                    OFFSET_x86_EAX, sizeof(UInt) );
911          gst->guest_EDX = sres._wHI;
912          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
913                    OFFSET_x86_EDX, sizeof(UInt) );
914          LibVEX_GuestX86_put_eflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
915                                       gst );
916          // GrP fixme sets defined for entire eflags, not just bit c
917          // DDD: this breaks exp-ptrcheck.
918          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
919                    offsetof(VexGuestX86State, guest_CC_DEP1), sizeof(UInt) );
920          break;
921       default:
922          vg_assert(0);
923          break;
924    }
925 
926 #elif defined(VGP_amd64_darwin)
927    VexGuestAMD64State* gst = (VexGuestAMD64State*)gst_vanilla;
928    SysRes sres = canonical->sres;
929    vg_assert(canonical->what == SsComplete);
930    /* Unfortunately here we have to break abstraction and look
931       directly inside 'res', in order to decide what to do. */
932    switch (sres._mode) {
933       case SysRes_MACH: // syscall = Mach, 64-bit result
934       case SysRes_MDEP: // syscall = mdep, 64-bit result
935          gst->guest_RAX = sres._wLO;
936          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
937                    OFFSET_amd64_RAX, sizeof(ULong) );
938          break;
939       case SysRes_UNIX_OK:  // syscall = Unix, 128-bit result
940       case SysRes_UNIX_ERR: // syscall = Unix, 128-bit error
941          gst->guest_RAX = sres._wLO;
942          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
943                    OFFSET_amd64_RAX, sizeof(ULong) );
944          gst->guest_RDX = sres._wHI;
945          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
946                    OFFSET_amd64_RDX, sizeof(ULong) );
947          LibVEX_GuestAMD64_put_rflag_c( sres._mode==SysRes_UNIX_ERR ? 1 : 0,
948                                         gst );
949          // GrP fixme sets defined for entire rflags, not just bit c
950          // DDD: this breaks exp-ptrcheck.
951          VG_TRACK( post_reg_write, Vg_CoreSysCall, tid,
952                    offsetof(VexGuestAMD64State, guest_CC_DEP1), sizeof(ULong) );
953          break;
954       default:
955          vg_assert(0);
956          break;
957    }
958 
959 #  elif defined(VGP_s390x_linux)
960    VexGuestS390XState* gst = (VexGuestS390XState*)gst_vanilla;
961    vg_assert(canonical->what == SsComplete);
962    if (sr_isError(canonical->sres)) {
963       gst->guest_r2 = - (Long)sr_Err(canonical->sres);
964    } else {
965       gst->guest_r2 = sr_Res(canonical->sres);
966    }
967 
968 #  else
969 #    error "putSyscallStatusIntoGuestState: unknown arch"
970 #  endif
971 }
972 
973 
974 /* Tell me the offsets in the guest state of the syscall params, so
975    that the scalar argument checkers don't have to have this info
976    hardwired. */
977 
978 static
getSyscallArgLayout(SyscallArgLayout * layout)979 void getSyscallArgLayout ( /*OUT*/SyscallArgLayout* layout )
980 {
981 #if defined(VGP_x86_linux)
982    layout->o_sysno  = OFFSET_x86_EAX;
983    layout->o_arg1   = OFFSET_x86_EBX;
984    layout->o_arg2   = OFFSET_x86_ECX;
985    layout->o_arg3   = OFFSET_x86_EDX;
986    layout->o_arg4   = OFFSET_x86_ESI;
987    layout->o_arg5   = OFFSET_x86_EDI;
988    layout->o_arg6   = OFFSET_x86_EBP;
989    layout->uu_arg7  = -1; /* impossible value */
990    layout->uu_arg8  = -1; /* impossible value */
991 
992 #elif defined(VGP_amd64_linux)
993    layout->o_sysno  = OFFSET_amd64_RAX;
994    layout->o_arg1   = OFFSET_amd64_RDI;
995    layout->o_arg2   = OFFSET_amd64_RSI;
996    layout->o_arg3   = OFFSET_amd64_RDX;
997    layout->o_arg4   = OFFSET_amd64_R10;
998    layout->o_arg5   = OFFSET_amd64_R8;
999    layout->o_arg6   = OFFSET_amd64_R9;
1000    layout->uu_arg7  = -1; /* impossible value */
1001    layout->uu_arg8  = -1; /* impossible value */
1002 
1003 #elif defined(VGP_ppc32_linux)
1004    layout->o_sysno  = OFFSET_ppc32_GPR0;
1005    layout->o_arg1   = OFFSET_ppc32_GPR3;
1006    layout->o_arg2   = OFFSET_ppc32_GPR4;
1007    layout->o_arg3   = OFFSET_ppc32_GPR5;
1008    layout->o_arg4   = OFFSET_ppc32_GPR6;
1009    layout->o_arg5   = OFFSET_ppc32_GPR7;
1010    layout->o_arg6   = OFFSET_ppc32_GPR8;
1011    layout->uu_arg7  = -1; /* impossible value */
1012    layout->uu_arg8  = -1; /* impossible value */
1013 
1014 #elif defined(VGP_ppc64_linux)
1015    layout->o_sysno  = OFFSET_ppc64_GPR0;
1016    layout->o_arg1   = OFFSET_ppc64_GPR3;
1017    layout->o_arg2   = OFFSET_ppc64_GPR4;
1018    layout->o_arg3   = OFFSET_ppc64_GPR5;
1019    layout->o_arg4   = OFFSET_ppc64_GPR6;
1020    layout->o_arg5   = OFFSET_ppc64_GPR7;
1021    layout->o_arg6   = OFFSET_ppc64_GPR8;
1022    layout->uu_arg7  = -1; /* impossible value */
1023    layout->uu_arg8  = -1; /* impossible value */
1024 
1025 #elif defined(VGP_arm_linux)
1026    layout->o_sysno  = OFFSET_arm_R7;
1027    layout->o_arg1   = OFFSET_arm_R0;
1028    layout->o_arg2   = OFFSET_arm_R1;
1029    layout->o_arg3   = OFFSET_arm_R2;
1030    layout->o_arg4   = OFFSET_arm_R3;
1031    layout->o_arg5   = OFFSET_arm_R4;
1032    layout->o_arg6   = OFFSET_arm_R5;
1033    layout->uu_arg7  = -1; /* impossible value */
1034    layout->uu_arg8  = -1; /* impossible value */
1035 
1036 #elif defined(VGP_x86_darwin)
1037    layout->o_sysno  = OFFSET_x86_EAX;
1038    // syscall parameters are on stack in C convention
1039    layout->s_arg1   = sizeof(UWord) * 1;
1040    layout->s_arg2   = sizeof(UWord) * 2;
1041    layout->s_arg3   = sizeof(UWord) * 3;
1042    layout->s_arg4   = sizeof(UWord) * 4;
1043    layout->s_arg5   = sizeof(UWord) * 5;
1044    layout->s_arg6   = sizeof(UWord) * 6;
1045    layout->s_arg7   = sizeof(UWord) * 7;
1046    layout->s_arg8   = sizeof(UWord) * 8;
1047 
1048 #elif defined(VGP_amd64_darwin)
1049    layout->o_sysno  = OFFSET_amd64_RAX;
1050    layout->o_arg1   = OFFSET_amd64_RDI;
1051    layout->o_arg2   = OFFSET_amd64_RSI;
1052    layout->o_arg3   = OFFSET_amd64_RDX;
1053    layout->o_arg4   = OFFSET_amd64_RCX;
1054    layout->o_arg5   = OFFSET_amd64_R8;
1055    layout->o_arg6   = OFFSET_amd64_R9;
1056    layout->s_arg7   = sizeof(UWord) * 1;
1057    layout->s_arg8   = sizeof(UWord) * 2;
1058 
1059 #elif defined(VGP_s390x_linux)
1060    layout->o_sysno  = OFFSET_s390x_SYSNO;
1061    layout->o_arg1   = OFFSET_s390x_r2;
1062    layout->o_arg2   = OFFSET_s390x_r3;
1063    layout->o_arg3   = OFFSET_s390x_r4;
1064    layout->o_arg4   = OFFSET_s390x_r5;
1065    layout->o_arg5   = OFFSET_s390x_r6;
1066    layout->o_arg6   = OFFSET_s390x_r7;
1067    layout->uu_arg7  = -1; /* impossible value */
1068    layout->uu_arg8  = -1; /* impossible value */
1069 #else
1070 #  error "getSyscallLayout: unknown arch"
1071 #endif
1072 }
1073 
1074 
1075 /* ---------------------------------------------------------------------
1076    The main driver logic
1077    ------------------------------------------------------------------ */
1078 
1079 /* Finding the handlers for a given syscall, or faking up one
1080    when no handler is found. */
1081 
1082 static
bad_before(ThreadId tid,SyscallArgLayout * layout,SyscallArgs * args,SyscallStatus * status,UWord * flags)1083 void bad_before ( ThreadId              tid,
1084                   SyscallArgLayout*     layout,
1085                   /*MOD*/SyscallArgs*   args,
1086                   /*OUT*/SyscallStatus* status,
1087                   /*OUT*/UWord*         flags )
1088 {
1089    VG_(dmsg)("WARNING: unhandled syscall: %s\n",
1090       VG_SYSNUM_STRING_EXTRA(args->sysno));
1091    if (VG_(clo_verbosity) > 1) {
1092       VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1093    }
1094    VG_(dmsg)("You may be able to write your own handler.\n");
1095    VG_(dmsg)("Read the file README_MISSING_SYSCALL_OR_IOCTL.\n");
1096    VG_(dmsg)("Nevertheless we consider this a bug.  Please report\n");
1097    VG_(dmsg)("it at http://valgrind.org/support/bug_reports.html.\n");
1098 
1099    SET_STATUS_Failure(VKI_ENOSYS);
1100 }
1101 
1102 static SyscallTableEntry bad_sys =
1103    { bad_before, NULL };
1104 
get_syscall_entry(Int syscallno)1105 static const SyscallTableEntry* get_syscall_entry ( Int syscallno )
1106 {
1107    const SyscallTableEntry* sys = NULL;
1108 
1109 #  if defined(VGO_linux)
1110    sys = ML_(get_linux_syscall_entry)( syscallno );
1111 
1112 #  elif defined(VGO_darwin)
1113    Int idx = VG_DARWIN_SYSNO_INDEX(syscallno);
1114 
1115    switch (VG_DARWIN_SYSNO_CLASS(syscallno)) {
1116    case VG_DARWIN_SYSCALL_CLASS_UNIX:
1117       if (idx >= 0 && idx < ML_(syscall_table_size) &&
1118           ML_(syscall_table)[idx].before != NULL)
1119          sys = &ML_(syscall_table)[idx];
1120          break;
1121    case VG_DARWIN_SYSCALL_CLASS_MACH:
1122       if (idx >= 0 && idx < ML_(mach_trap_table_size) &&
1123           ML_(mach_trap_table)[idx].before != NULL)
1124          sys = &ML_(mach_trap_table)[idx];
1125          break;
1126    case VG_DARWIN_SYSCALL_CLASS_MDEP:
1127       if (idx >= 0 && idx < ML_(mdep_trap_table_size) &&
1128           ML_(mdep_trap_table)[idx].before != NULL)
1129          sys = &ML_(mdep_trap_table)[idx];
1130          break;
1131    default:
1132       vg_assert(0);
1133       break;
1134    }
1135 
1136 #  else
1137 #    error Unknown OS
1138 #  endif
1139 
1140    return sys == NULL  ? &bad_sys  : sys;
1141 }
1142 
1143 
1144 /* Add and remove signals from mask so that we end up telling the
1145    kernel the state we actually want rather than what the client
1146    wants. */
sanitize_client_sigmask(vki_sigset_t * mask)1147 static void sanitize_client_sigmask(vki_sigset_t *mask)
1148 {
1149    VG_(sigdelset)(mask, VKI_SIGKILL);
1150    VG_(sigdelset)(mask, VKI_SIGSTOP);
1151    VG_(sigdelset)(mask, VG_SIGVGKILL); /* never block */
1152 }
1153 
1154 typedef
1155    struct {
1156       SyscallArgs   orig_args;
1157       SyscallArgs   args;
1158       SyscallStatus status;
1159       UWord         flags;
1160    }
1161    SyscallInfo;
1162 
1163 SyscallInfo syscallInfo[VG_N_THREADS];
1164 
1165 
1166 /* The scheduler needs to be able to zero out these records after a
1167    fork, hence this is exported from m_syswrap. */
VG_(clear_syscallInfo)1168 void VG_(clear_syscallInfo) ( Int tid )
1169 {
1170    vg_assert(tid >= 0 && tid < VG_N_THREADS);
1171    VG_(memset)( & syscallInfo[tid], 0, sizeof( syscallInfo[tid] ));
1172    syscallInfo[tid].status.what = SsIdle;
1173 }
1174 
ensure_initialised(void)1175 static void ensure_initialised ( void )
1176 {
1177    Int i;
1178    static Bool init_done = False;
1179    if (init_done)
1180       return;
1181    init_done = True;
1182    for (i = 0; i < VG_N_THREADS; i++) {
1183       VG_(clear_syscallInfo)( i );
1184    }
1185 }
1186 
1187 /* --- This is the main function of this file. --- */
1188 
VG_(client_syscall)1189 void VG_(client_syscall) ( ThreadId tid, UInt trc )
1190 {
1191    Word                     sysno;
1192    ThreadState*             tst;
1193    const SyscallTableEntry* ent;
1194    SyscallArgLayout         layout;
1195    SyscallInfo*             sci;
1196 
1197    ensure_initialised();
1198 
1199    vg_assert(VG_(is_valid_tid)(tid));
1200    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1201    vg_assert(VG_(is_running_thread)(tid));
1202 
1203    tst = VG_(get_ThreadState)(tid);
1204 
1205    /* BEGIN ensure root thread's stack is suitably mapped */
1206    /* In some rare circumstances, we may do the syscall without the
1207       bottom page of the stack being mapped, because the stack pointer
1208       was moved down just a few instructions before the syscall
1209       instruction, and there have been no memory references since
1210       then, that would cause a call to VG_(extend_stack) to have
1211       happened.
1212 
1213       In native execution that's OK: the kernel automagically extends
1214       the stack's mapped area down to cover the stack pointer (or sp -
1215       redzone, really).  In simulated normal execution that's OK too,
1216       since any signals we get from accessing below the mapped area of
1217       the (guest's) stack lead us to VG_(extend_stack), where we
1218       simulate the kernel's stack extension logic.  But that leaves
1219       the problem of entering a syscall with the SP unmapped.  Because
1220       the kernel doesn't know that the segment immediately above SP is
1221       supposed to be a grow-down segment, it causes the syscall to
1222       fail, and thereby causes a divergence between native behaviour
1223       (syscall succeeds) and simulated behaviour (syscall fails).
1224 
1225       This is quite a rare failure mode.  It has only been seen
1226       affecting calls to sys_readlink on amd64-linux, and even then it
1227       requires a certain code sequence around the syscall to trigger
1228       it.  Here is one:
1229 
1230       extern int my_readlink ( const char* path );
1231       asm(
1232       ".text\n"
1233       ".globl my_readlink\n"
1234       "my_readlink:\n"
1235       "\tsubq    $0x1008,%rsp\n"
1236       "\tmovq    %rdi,%rdi\n"              // path is in rdi
1237       "\tmovq    %rsp,%rsi\n"              // &buf[0] -> rsi
1238       "\tmovl    $0x1000,%edx\n"           // sizeof(buf) in rdx
1239       "\tmovl    $"__NR_READLINK",%eax\n"  // syscall number
1240       "\tsyscall\n"
1241       "\taddq    $0x1008,%rsp\n"
1242       "\tret\n"
1243       ".previous\n"
1244       );
1245 
1246       For more details, see bug #156404
1247       (https://bugs.kde.org/show_bug.cgi?id=156404).
1248 
1249       The fix is actually very simple.  We simply need to call
1250       VG_(extend_stack) for this thread, handing it the lowest
1251       possible valid address for stack (sp - redzone), to ensure the
1252       pages all the way down to that address, are mapped.  Because
1253       this is a potentially expensive and frequent operation, we
1254       filter in two ways:
1255 
1256       First, only the main thread (tid=1) has a growdown stack.  So
1257       ignore all others.  It is conceivable, although highly unlikely,
1258       that the main thread exits, and later another thread is
1259       allocated tid=1, but that's harmless, I believe;
1260       VG_(extend_stack) will do nothing when applied to a non-root
1261       thread.
1262 
1263       Secondly, first call VG_(am_find_nsegment) directly, to see if
1264       the page holding (sp - redzone) is mapped correctly.  If so, do
1265       nothing.  This is almost always the case.  VG_(extend_stack)
1266       calls VG_(am_find_nsegment) twice, so this optimisation -- and
1267       that's all it is -- more or less halves the number of calls to
1268       VG_(am_find_nsegment) required.
1269 
1270       TODO: the test "seg->kind == SkAnonC" is really inadequate,
1271       because although it tests whether the segment is mapped
1272       _somehow_, it doesn't check that it has the right permissions
1273       (r,w, maybe x) ?  We could test that here, but it will also be
1274       necessary to fix the corresponding test in VG_(extend_stack).
1275 
1276       All this guff is of course Linux-specific.  Hence the ifdef.
1277    */
1278 #  if defined(VGO_linux)
1279    if (tid == 1/*ROOT THREAD*/) {
1280       Addr     stackMin   = VG_(get_SP)(tid) - VG_STACK_REDZONE_SZB;
1281       NSegment const* seg = VG_(am_find_nsegment)(stackMin);
1282       if (seg && seg->kind == SkAnonC) {
1283          /* stackMin is already mapped.  Nothing to do. */
1284       } else {
1285          (void)VG_(extend_stack)( stackMin,
1286                                   tst->client_stack_szB );
1287       }
1288    }
1289 #  endif
1290    /* END ensure root thread's stack is suitably mapped */
1291 
1292    /* First off, get the syscall args and number.  This is a
1293       platform-dependent action. */
1294 
1295    sci = & syscallInfo[tid];
1296    vg_assert(sci->status.what == SsIdle);
1297 
1298    getSyscallArgsFromGuestState( &sci->orig_args, &tst->arch.vex, trc );
1299 
1300    /* Copy .orig_args to .args.  The pre-handler may modify .args, but
1301       we want to keep the originals too, just in case. */
1302    sci->args = sci->orig_args;
1303 
1304    /* Save the syscall number in the thread state in case the syscall
1305       is interrupted by a signal. */
1306    sysno = sci->orig_args.sysno;
1307 
1308    /* It's sometimes useful, as a crude debugging hack, to get a
1309       stack trace at each (or selected) syscalls. */
1310    if (0 && sysno == __NR_ioctl) {
1311       VG_(umsg)("\nioctl:\n");
1312       VG_(get_and_pp_StackTrace)(tid, 10);
1313       VG_(umsg)("\n");
1314    }
1315 
1316 #  if defined(VGO_darwin)
1317    /* Record syscall class.  But why?  Because the syscall might be
1318       interrupted by a signal, and in the signal handler (which will
1319       be m_signals.async_signalhandler) we will need to build a SysRes
1320       reflecting the syscall return result.  In order to do that we
1321       need to know the syscall class.  Hence stash it in the guest
1322       state of this thread.  This madness is not needed on Linux
1323       because it only has a single syscall return convention and so
1324       there is no ambiguity involved in converting the post-signal
1325       machine state into a SysRes. */
1326    tst->arch.vex.guest_SC_CLASS = VG_DARWIN_SYSNO_CLASS(sysno);
1327 #  endif
1328 
1329    /* The default what-to-do-next thing is hand the syscall to the
1330       kernel, so we pre-set that here.  Set .sres to something
1331       harmless looking (is irrelevant because .what is not
1332       SsComplete.) */
1333    sci->status.what = SsHandToKernel;
1334    sci->status.sres = VG_(mk_SysRes_Error)(0);
1335    sci->flags       = 0;
1336 
1337    /* Fetch the syscall's handlers.  If no handlers exist for this
1338       syscall, we are given dummy handlers which force an immediate
1339       return with ENOSYS. */
1340    ent = get_syscall_entry(sysno);
1341 
1342    /* Fetch the layout information, which tells us where in the guest
1343       state the syscall args reside.  This is a platform-dependent
1344       action.  This info is needed so that the scalar syscall argument
1345       checks (PRE_REG_READ calls) know which bits of the guest state
1346       they need to inspect. */
1347    getSyscallArgLayout( &layout );
1348 
1349    /* Make sure the tmp signal mask matches the real signal mask;
1350       sigsuspend may change this. */
1351    vg_assert(VG_(iseqsigset)(&tst->sig_mask, &tst->tmp_sig_mask));
1352 
1353    /* Right, we're finally ready to Party.  Call the pre-handler and
1354       see what we get back.  At this point:
1355 
1356         sci->status.what  is Unset (we don't know yet).
1357         sci->orig_args    contains the original args.
1358         sci->args         is the same as sci->orig_args.
1359         sci->flags        is zero.
1360    */
1361 
1362    PRINT("SYSCALL[%d,%d](%s) ",
1363       VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno));
1364 
1365    /* Do any pre-syscall actions */
1366    if (VG_(needs).syscall_wrapper) {
1367       UWord tmpv[8];
1368       tmpv[0] = sci->orig_args.arg1;
1369       tmpv[1] = sci->orig_args.arg2;
1370       tmpv[2] = sci->orig_args.arg3;
1371       tmpv[3] = sci->orig_args.arg4;
1372       tmpv[4] = sci->orig_args.arg5;
1373       tmpv[5] = sci->orig_args.arg6;
1374       tmpv[6] = sci->orig_args.arg7;
1375       tmpv[7] = sci->orig_args.arg8;
1376       VG_TDICT_CALL(tool_pre_syscall, tid, sysno,
1377                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]));
1378    }
1379 
1380    vg_assert(ent);
1381    vg_assert(ent->before);
1382    (ent->before)( tid,
1383                   &layout,
1384                   &sci->args, &sci->status, &sci->flags );
1385 
1386    /* The pre-handler may have modified:
1387          sci->args
1388          sci->status
1389          sci->flags
1390       All else remains unchanged.
1391       Although the args may be modified, pre handlers are not allowed
1392       to change the syscall number.
1393    */
1394    /* Now we proceed according to what the pre-handler decided. */
1395    vg_assert(sci->status.what == SsHandToKernel
1396              || sci->status.what == SsComplete);
1397    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1398 
1399    if (sci->status.what == SsComplete && !sr_isError(sci->status.sres)) {
1400       /* The pre-handler completed the syscall itself, declaring
1401          success. */
1402       if (sci->flags & SfNoWriteResult) {
1403          PRINT(" --> [pre-success] NoWriteResult");
1404       } else {
1405          PRINT(" --> [pre-success] Success(0x%llx:0x%llx)",
1406                (ULong)sr_ResHI(sci->status.sres),
1407                (ULong)sr_Res(sci->status.sres));
1408       }
1409       /* In this case the allowable flags are to ask for a signal-poll
1410          and/or a yield after the call.  Changing the args isn't
1411          allowed. */
1412       vg_assert(0 == (sci->flags
1413                       & ~(SfPollAfter | SfYieldAfter | SfNoWriteResult)));
1414       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1415    }
1416 
1417    else
1418    if (sci->status.what == SsComplete && sr_isError(sci->status.sres)) {
1419       /* The pre-handler decided to fail syscall itself. */
1420       PRINT(" --> [pre-fail] Failure(0x%llx)", (ULong)sr_Err(sci->status.sres));
1421       /* In this case, the pre-handler is also allowed to ask for the
1422          post-handler to be run anyway.  Changing the args is not
1423          allowed. */
1424       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1425       vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1426    }
1427 
1428    else
1429    if (sci->status.what != SsHandToKernel) {
1430       /* huh?! */
1431       vg_assert(0);
1432    }
1433 
1434    else /* (sci->status.what == HandToKernel) */ {
1435       /* Ok, this is the usual case -- and the complicated one.  There
1436          are two subcases: sync and async.  async is the general case
1437          and is to be used when there is any possibility that the
1438          syscall might block [a fact that the pre-handler must tell us
1439          via the sci->flags field.]  Because the tidying-away /
1440          context-switch overhead of the async case could be large, if
1441          we are sure that the syscall will not block, we fast-track it
1442          by doing it directly in this thread, which is a lot
1443          simpler. */
1444 
1445       /* Check that the given flags are allowable: MayBlock, PollAfter
1446          and PostOnFail are ok. */
1447       vg_assert(0 == (sci->flags & ~(SfMayBlock | SfPostOnFail | SfPollAfter)));
1448 
1449       if (sci->flags & SfMayBlock) {
1450 
1451          /* Syscall may block, so run it asynchronously */
1452          vki_sigset_t mask;
1453 
1454          PRINT(" --> [async] ... \n");
1455 
1456          mask = tst->sig_mask;
1457          sanitize_client_sigmask(&mask);
1458 
1459          /* Gack.  More impedance matching.  Copy the possibly
1460             modified syscall args back into the guest state. */
1461          /* JRS 2009-Mar-16: if the syscall args are possibly modified,
1462             then this assertion is senseless:
1463               vg_assert(eq_SyscallArgs(&sci->args, &sci->orig_args));
1464             The case that exposed it was sys_posix_spawn on Darwin,
1465             which heavily modifies its arguments but then lets the call
1466             go through anyway, with SfToBlock set, hence we end up here. */
1467          putSyscallArgsIntoGuestState( &sci->args, &tst->arch.vex );
1468 
1469          /* Drop the bigLock */
1470          VG_(release_BigLock)(tid, VgTs_WaitSys, "VG_(client_syscall)[async]");
1471          /* Urr.  We're now in a race against other threads trying to
1472             acquire the bigLock.  I guess that doesn't matter provided
1473             that do_syscall_for_client only touches thread-local
1474             state. */
1475 
1476          /* Do the call, which operates directly on the guest state,
1477             not on our abstracted copies of the args/result. */
1478          do_syscall_for_client(sysno, tst, &mask);
1479 
1480          /* do_syscall_for_client may not return if the syscall was
1481             interrupted by a signal.  In that case, flow of control is
1482             first to m_signals.async_sighandler, which calls
1483             VG_(fixup_guest_state_after_syscall_interrupted), which
1484             fixes up the guest state, and possibly calls
1485             VG_(post_syscall).  Once that's done, control drops back
1486             to the scheduler.  */
1487 
1488          /* Darwin: do_syscall_for_client may not return if the
1489             syscall was workq_ops(WQOPS_THREAD_RETURN) and the kernel
1490             responded by starting the thread at wqthread_hijack(reuse=1)
1491             (to run another workqueue item). In that case, wqthread_hijack
1492             calls ML_(wqthread_continue), which is similar to
1493             VG_(fixup_guest_state_after_syscall_interrupted). */
1494 
1495          /* Reacquire the lock */
1496          VG_(acquire_BigLock)(tid, "VG_(client_syscall)[async]");
1497 
1498          /* Even more impedance matching.  Extract the syscall status
1499             from the guest state. */
1500          getSyscallStatusFromGuestState( &sci->status, &tst->arch.vex );
1501          vg_assert(sci->status.what == SsComplete);
1502 
1503          /* Be decorative, if required. */
1504          if (VG_(clo_trace_syscalls)) {
1505             Bool failed = sr_isError(sci->status.sres);
1506             if (failed) {
1507                PRINT("SYSCALL[%d,%d](%s) ... [async] --> Failure(0x%llx)",
1508                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1509                      (ULong)sr_Err(sci->status.sres));
1510             } else {
1511                PRINT("SYSCALL[%d,%d](%s) ... [async] --> "
1512                      "Success(0x%llx:0x%llx)",
1513                      VG_(getpid)(), tid, VG_SYSNUM_STRING(sysno),
1514                      (ULong)sr_ResHI(sci->status.sres),
1515                      (ULong)sr_Res(sci->status.sres) );
1516             }
1517          }
1518 
1519       } else {
1520 
1521          /* run the syscall directly */
1522          /* The pre-handler may have modified the syscall args, but
1523             since we're passing values in ->args directly to the
1524             kernel, there's no point in flushing them back to the
1525             guest state.  Indeed doing so could be construed as
1526             incorrect. */
1527          SysRes sres
1528             = VG_(do_syscall)(sysno, sci->args.arg1, sci->args.arg2,
1529                                      sci->args.arg3, sci->args.arg4,
1530                                      sci->args.arg5, sci->args.arg6,
1531                                      sci->args.arg7, sci->args.arg8 );
1532          sci->status = convert_SysRes_to_SyscallStatus(sres);
1533 
1534          /* Be decorative, if required. */
1535          if (VG_(clo_trace_syscalls)) {
1536             Bool failed = sr_isError(sci->status.sres);
1537             if (failed) {
1538                PRINT("[sync] --> Failure(0x%llx)",
1539                      (ULong)sr_Err(sci->status.sres) );
1540             } else {
1541                PRINT("[sync] --> Success(0x%llx:0x%llx)",
1542                      (ULong)sr_ResHI(sci->status.sres),
1543                      (ULong)sr_Res(sci->status.sres) );
1544             }
1545          }
1546       }
1547    }
1548 
1549    vg_assert(sci->status.what == SsComplete);
1550 
1551    vg_assert(VG_(is_running_thread)(tid));
1552 
1553    /* Dump the syscall result back in the guest state.  This is
1554       a platform-specific action. */
1555    if (!(sci->flags & SfNoWriteResult))
1556       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1557 
1558    /* Situation now:
1559       - the guest state is now correctly modified following the syscall
1560       - modified args, original args and syscall status are still
1561         available in the syscallInfo[] entry for this syscall.
1562 
1563       Now go on to do the post-syscall actions (read on down ..)
1564    */
1565    PRINT(" ");
1566    VG_(post_syscall)(tid);
1567    PRINT("\n");
1568 }
1569 
1570 
1571 /* Perform post syscall actions.  The expected state on entry is
1572    precisely as at the end of VG_(client_syscall), that is:
1573 
1574    - guest state up to date following the syscall
1575    - modified args, original args and syscall status are still
1576      available in the syscallInfo[] entry for this syscall.
1577    - syscall status matches what's in the guest state.
1578 
1579    There are two ways to get here: the normal way -- being called by
1580    VG_(client_syscall), and the unusual way, from
1581    VG_(fixup_guest_state_after_syscall_interrupted).
1582    Darwin: there's a third way, ML_(wqthread_continue).
1583 */
VG_(post_syscall)1584 void VG_(post_syscall) (ThreadId tid)
1585 {
1586    SyscallInfo*             sci;
1587    const SyscallTableEntry* ent;
1588    SyscallStatus            test_status;
1589    ThreadState*             tst;
1590    Word sysno;
1591 
1592    /* Preliminaries */
1593    vg_assert(VG_(is_valid_tid)(tid));
1594    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1595    vg_assert(VG_(is_running_thread)(tid));
1596 
1597    tst = VG_(get_ThreadState)(tid);
1598    sci = & syscallInfo[tid];
1599 
1600    /* m_signals.sigvgkill_handler might call here even when not in
1601       a syscall. */
1602    if (sci->status.what == SsIdle || sci->status.what == SsHandToKernel) {
1603       sci->status.what = SsIdle;
1604       return;
1605    }
1606 
1607    /* Validate current syscallInfo entry.  In particular we require
1608       that the current .status matches what's actually in the guest
1609       state.  At least in the normal case where we have actually
1610       previously written the result into the guest state. */
1611    vg_assert(sci->status.what == SsComplete);
1612 
1613    getSyscallStatusFromGuestState( &test_status, &tst->arch.vex );
1614    if (!(sci->flags & SfNoWriteResult))
1615       vg_assert(eq_SyscallStatus( &sci->status, &test_status ));
1616    /* Failure of the above assertion on Darwin can indicate a problem
1617       in the syscall wrappers that pre-fail or pre-succeed the
1618       syscall, by calling SET_STATUS_Success or SET_STATUS_Failure,
1619       when they really should call SET_STATUS_from_SysRes.  The former
1620       create a UNIX-class syscall result on Darwin, which may not be
1621       correct for the syscall; if that's the case then this assertion
1622       fires.  See PRE(thread_fast_set_cthread_self) for an example.  On
1623       non-Darwin platforms this assertion is should never fail, and this
1624       comment is completely irrelevant. */
1625    /* Ok, looks sane */
1626 
1627    /* Get the system call number.  Because the pre-handler isn't
1628       allowed to mess with it, it should be the same for both the
1629       original and potentially-modified args. */
1630    vg_assert(sci->args.sysno == sci->orig_args.sysno);
1631    sysno = sci->args.sysno;
1632    ent = get_syscall_entry(sysno);
1633 
1634    /* pre: status == Complete (asserted above) */
1635    /* Consider either success or failure.  Now run the post handler if:
1636       - it exists, and
1637       - Success or (Failure and PostOnFail is set)
1638    */
1639    if (ent->after
1640        && ((!sr_isError(sci->status.sres))
1641            || (sr_isError(sci->status.sres)
1642                && (sci->flags & SfPostOnFail) ))) {
1643 
1644       (ent->after)( tid, &sci->args, &sci->status );
1645    }
1646 
1647    /* Because the post handler might have changed the status (eg, the
1648       post-handler for sys_open can change the result from success to
1649       failure if the kernel supplied a fd that it doesn't like), once
1650       again dump the syscall result back in the guest state.*/
1651    if (!(sci->flags & SfNoWriteResult))
1652       putSyscallStatusIntoGuestState( tid, &sci->status, &tst->arch.vex );
1653 
1654    /* Do any post-syscall actions required by the tool. */
1655    if (VG_(needs).syscall_wrapper) {
1656       UWord tmpv[8];
1657       tmpv[0] = sci->orig_args.arg1;
1658       tmpv[1] = sci->orig_args.arg2;
1659       tmpv[2] = sci->orig_args.arg3;
1660       tmpv[3] = sci->orig_args.arg4;
1661       tmpv[4] = sci->orig_args.arg5;
1662       tmpv[5] = sci->orig_args.arg6;
1663       tmpv[6] = sci->orig_args.arg7;
1664       tmpv[7] = sci->orig_args.arg8;
1665       VG_TDICT_CALL(tool_post_syscall, tid,
1666                     sysno,
1667                     &tmpv[0], sizeof(tmpv)/sizeof(tmpv[0]),
1668                     sci->status.sres);
1669    }
1670 
1671    /* The syscall is done. */
1672    vg_assert(sci->status.what == SsComplete);
1673    sci->status.what = SsIdle;
1674 
1675    /* The pre/post wrappers may have concluded that pending signals
1676       might have been created, and will have set SfPollAfter to
1677       request a poll for them once the syscall is done. */
1678    if (sci->flags & SfPollAfter)
1679       VG_(poll_signals)(tid);
1680 
1681    /* Similarly, the wrappers might have asked for a yield
1682       afterwards. */
1683    if (sci->flags & SfYieldAfter)
1684       VG_(vg_yield)();
1685 }
1686 
1687 
1688 /* ---------------------------------------------------------------------
1689    Dealing with syscalls which get interrupted by a signal:
1690    VG_(fixup_guest_state_after_syscall_interrupted)
1691    ------------------------------------------------------------------ */
1692 
1693 /* Syscalls done on behalf of the client are finally handed off to the
1694    kernel in VG_(client_syscall) above, either by calling
1695    do_syscall_for_client (the async case), or by calling
1696    VG_(do_syscall6) (the sync case).
1697 
1698    If the syscall is not interrupted by a signal (it may block and
1699    later unblock, but that's irrelevant here) then those functions
1700    eventually return and so control is passed to VG_(post_syscall).
1701    NB: not sure if the sync case can actually get interrupted, as it
1702    operates with all signals masked.
1703 
1704    However, the syscall may get interrupted by an async-signal.  In
1705    that case do_syscall_for_client/VG_(do_syscall6) do not
1706    return.  Instead we wind up in m_signals.async_sighandler.  We need
1707    to fix up the guest state to make it look like the syscall was
1708    interrupted for guest.  So async_sighandler calls here, and this
1709    does the fixup.  Note that from here we wind up calling
1710    VG_(post_syscall) too.
1711 */
1712 
1713 
1714 /* These are addresses within ML_(do_syscall_for_client_WRK).  See
1715    syscall-$PLAT.S for details.
1716 */
1717 #if defined(VGO_linux)
1718   extern const Addr ML_(blksys_setup);
1719   extern const Addr ML_(blksys_restart);
1720   extern const Addr ML_(blksys_complete);
1721   extern const Addr ML_(blksys_committed);
1722   extern const Addr ML_(blksys_finished);
1723 #elif defined(VGO_darwin)
1724   /* Darwin requires extra uglyness */
1725   extern const Addr ML_(blksys_setup_MACH);
1726   extern const Addr ML_(blksys_restart_MACH);
1727   extern const Addr ML_(blksys_complete_MACH);
1728   extern const Addr ML_(blksys_committed_MACH);
1729   extern const Addr ML_(blksys_finished_MACH);
1730   extern const Addr ML_(blksys_setup_MDEP);
1731   extern const Addr ML_(blksys_restart_MDEP);
1732   extern const Addr ML_(blksys_complete_MDEP);
1733   extern const Addr ML_(blksys_committed_MDEP);
1734   extern const Addr ML_(blksys_finished_MDEP);
1735   extern const Addr ML_(blksys_setup_UNIX);
1736   extern const Addr ML_(blksys_restart_UNIX);
1737   extern const Addr ML_(blksys_complete_UNIX);
1738   extern const Addr ML_(blksys_committed_UNIX);
1739   extern const Addr ML_(blksys_finished_UNIX);
1740 #else
1741 # error "Unknown OS"
1742 #endif
1743 
1744 
1745 /* Back up guest state to restart a system call. */
1746 
ML_(fixup_guest_state_to_restart_syscall)1747 void ML_(fixup_guest_state_to_restart_syscall) ( ThreadArchState* arch )
1748 {
1749 #if defined(VGP_x86_linux)
1750    arch->vex.guest_EIP -= 2;             // sizeof(int $0x80)
1751 
1752    /* Make sure our caller is actually sane, and we're really backing
1753       back over a syscall.
1754 
1755       int $0x80 == CD 80
1756    */
1757    {
1758       UChar *p = (UChar *)arch->vex.guest_EIP;
1759 
1760       if (p[0] != 0xcd || p[1] != 0x80)
1761          VG_(message)(Vg_DebugMsg,
1762                       "?! restarting over syscall at %#x %02x %02x\n",
1763                       arch->vex.guest_EIP, p[0], p[1]);
1764 
1765       vg_assert(p[0] == 0xcd && p[1] == 0x80);
1766    }
1767 
1768 #elif defined(VGP_amd64_linux)
1769    arch->vex.guest_RIP -= 2;             // sizeof(syscall)
1770 
1771    /* Make sure our caller is actually sane, and we're really backing
1772       back over a syscall.
1773 
1774       syscall == 0F 05
1775    */
1776    {
1777       UChar *p = (UChar *)arch->vex.guest_RIP;
1778 
1779       if (p[0] != 0x0F || p[1] != 0x05)
1780          VG_(message)(Vg_DebugMsg,
1781                       "?! restarting over syscall at %#llx %02x %02x\n",
1782                       arch->vex.guest_RIP, p[0], p[1]);
1783 
1784       vg_assert(p[0] == 0x0F && p[1] == 0x05);
1785    }
1786 
1787 #elif defined(VGP_ppc32_linux) || defined(VGP_ppc64_linux)
1788    arch->vex.guest_CIA -= 4;             // sizeof(ppc32 instr)
1789 
1790    /* Make sure our caller is actually sane, and we're really backing
1791       back over a syscall.
1792 
1793       sc == 44 00 00 02
1794    */
1795    {
1796       UChar *p = (UChar *)arch->vex.guest_CIA;
1797 
1798       if (p[0] != 0x44 || p[1] != 0x0 || p[2] != 0x0 || p[3] != 0x02)
1799          VG_(message)(Vg_DebugMsg,
1800                       "?! restarting over syscall at %#llx %02x %02x %02x %02x\n",
1801                       arch->vex.guest_CIA + 0ULL, p[0], p[1], p[2], p[3]);
1802 
1803       vg_assert(p[0] == 0x44 && p[1] == 0x0 && p[2] == 0x0 && p[3] == 0x2);
1804    }
1805 
1806 #elif defined(VGP_arm_linux)
1807    if (arch->vex.guest_R15T & 1) {
1808       // Thumb mode.  SVC is a encoded as
1809       //   1101 1111 imm8
1810       // where imm8 is the SVC number, and we only accept 0.
1811       arch->vex.guest_R15T -= 2;   // sizeof(thumb 16 bit insn)
1812       UChar* p     = (UChar*)(arch->vex.guest_R15T - 1);
1813       Bool   valid = p[0] == 0 && p[1] == 0xDF;
1814       if (!valid) {
1815          VG_(message)(Vg_DebugMsg,
1816                       "?! restarting over (Thumb) syscall that is not syscall "
1817                       "at %#llx %02x %02x\n",
1818                       arch->vex.guest_R15T - 1ULL, p[0], p[1]);
1819       }
1820       vg_assert(valid);
1821       // FIXME: NOTE, this really isn't right.  We need to back up
1822       // ITSTATE to what it was before the SVC instruction, but we
1823       // don't know what it was.  At least assert that it is now
1824       // zero, because if it is nonzero then it must also have
1825       // been nonzero for the SVC itself, which means it was
1826       // conditional.  Urk.
1827       vg_assert(arch->vex.guest_ITSTATE == 0);
1828    } else {
1829       // ARM mode.  SVC is encoded as
1830       //   cond 1111 imm24
1831       // where imm24 is the SVC number, and we only accept 0.
1832       arch->vex.guest_R15T -= 4;   // sizeof(arm instr)
1833       UChar* p     = (UChar*)arch->vex.guest_R15T;
1834       Bool   valid = p[0] == 0 && p[1] == 0 && p[2] == 0
1835                      && (p[3] & 0xF) == 0xF;
1836       if (!valid) {
1837          VG_(message)(Vg_DebugMsg,
1838                       "?! restarting over (ARM) syscall that is not syscall "
1839                       "at %#llx %02x %02x %02x %02x\n",
1840                       arch->vex.guest_R15T + 0ULL, p[0], p[1], p[2], p[3]);
1841       }
1842       vg_assert(valid);
1843    }
1844 
1845 #elif defined(VGP_x86_darwin)
1846    arch->vex.guest_EIP = arch->vex.guest_IP_AT_SYSCALL;
1847 
1848    /* Make sure our caller is actually sane, and we're really backing
1849       back over a syscall.
1850 
1851       int $0x80 == CD 80
1852       int $0x81 == CD 81
1853       int $0x82 == CD 82
1854       sysenter  == 0F 34
1855    */
1856    {
1857        UChar *p = (UChar *)arch->vex.guest_EIP;
1858        Bool  ok = (p[0] == 0xCD && p[1] == 0x80)
1859                   || (p[0] == 0xCD && p[1] == 0x81)
1860                   || (p[0] == 0xCD && p[1] == 0x82)
1861                   || (p[0] == 0x0F && p[1] == 0x34);
1862        if (!ok)
1863            VG_(message)(Vg_DebugMsg,
1864                         "?! restarting over syscall at %#x %02x %02x\n",
1865                         arch->vex.guest_EIP, p[0], p[1]);
1866        vg_assert(ok);
1867    }
1868 
1869 #elif defined(VGP_amd64_darwin)
1870    // DDD: #warning GrP fixme amd64 restart unimplemented
1871    vg_assert(0);
1872 
1873 #elif defined(VGP_s390x_linux)
1874    arch->vex.guest_IA -= 2;             // sizeof(syscall)
1875 
1876    /* Make sure our caller is actually sane, and we're really backing
1877       back over a syscall.
1878 
1879       syscall == 0A <num>
1880    */
1881    {
1882       UChar *p = (UChar *)arch->vex.guest_IA;
1883       if (p[0] != 0x0A)
1884          VG_(message)(Vg_DebugMsg,
1885                       "?! restarting over syscall at %#llx %02x %02x\n",
1886                       arch->vex.guest_IA, p[0], p[1]);
1887 
1888       vg_assert(p[0] == 0x0A);
1889    }
1890 #else
1891 #  error "ML_(fixup_guest_state_to_restart_syscall): unknown plat"
1892 #endif
1893 }
1894 
1895 /*
1896    Fix up the guest state when a syscall is interrupted by a signal
1897    and so has been forced to return 'sysret'.
1898 
1899    To do this, we determine the precise state of the syscall by
1900    looking at the (real) IP at the time the signal happened.  The
1901    syscall sequence looks like:
1902 
1903      1. unblock signals
1904      2. perform syscall
1905      3. save result to guest state (EAX, RAX, R3+CR0.SO)
1906      4. re-block signals
1907 
1908    If a signal
1909    happens at      Then     Why?
1910    [1-2)           restart  nothing has happened (restart syscall)
1911    [2]             restart  syscall hasn't started, or kernel wants to restart
1912    [2-3)           save     syscall complete, but results not saved
1913    [3-4)           syscall complete, results saved
1914 
1915    Sometimes we never want to restart an interrupted syscall (because
1916    sigaction says not to), so we only restart if "restart" is True.
1917 
1918    This will also call VG_(post_syscall) if the syscall has actually
1919    completed (either because it was interrupted, or because it
1920    actually finished).  It will not call VG_(post_syscall) if the
1921    syscall is set up for restart, which means that the pre-wrapper may
1922    get called multiple times.
1923 */
1924 
1925 void
VG_(fixup_guest_state_after_syscall_interrupted)1926 VG_(fixup_guest_state_after_syscall_interrupted)( ThreadId tid,
1927                                                   Addr     ip,
1928                                                   SysRes   sres,
1929                                                   Bool     restart)
1930 {
1931    /* Note that we don't know the syscall number here, since (1) in
1932       general there's no reliable way to get hold of it short of
1933       stashing it in the guest state before the syscall, and (2) in
1934       any case we don't need to know it for the actions done by this
1935       routine.
1936 
1937       Furthermore, 'sres' is only used in the case where the syscall
1938       is complete, but the result has not been committed to the guest
1939       state yet.  In any other situation it will be meaningless and
1940       therefore ignored. */
1941 
1942    ThreadState*     tst;
1943    SyscallStatus    canonical;
1944    ThreadArchState* th_regs;
1945    SyscallInfo*     sci;
1946 
1947    /* Compute some Booleans indicating which range we're in. */
1948    Bool outside_range,
1949         in_setup_to_restart,      // [1,2) in the .S files
1950         at_restart,               // [2]   in the .S files
1951         in_complete_to_committed, // [3,4) in the .S files
1952         in_committed_to_finished; // [4,5) in the .S files
1953 
1954 #  if defined(VGO_linux)
1955    outside_range
1956       = ip < ML_(blksys_setup) || ip >= ML_(blksys_finished);
1957    in_setup_to_restart
1958       = ip >= ML_(blksys_setup) && ip < ML_(blksys_restart);
1959    at_restart
1960       = ip == ML_(blksys_restart);
1961    in_complete_to_committed
1962       = ip >= ML_(blksys_complete) && ip < ML_(blksys_committed);
1963    in_committed_to_finished
1964       = ip >= ML_(blksys_committed) && ip < ML_(blksys_finished);
1965 #  elif defined(VGO_darwin)
1966    outside_range
1967       =  (ip < ML_(blksys_setup_MACH) || ip >= ML_(blksys_finished_MACH))
1968       && (ip < ML_(blksys_setup_MDEP) || ip >= ML_(blksys_finished_MDEP))
1969       && (ip < ML_(blksys_setup_UNIX) || ip >= ML_(blksys_finished_UNIX));
1970    in_setup_to_restart
1971       =  (ip >= ML_(blksys_setup_MACH) && ip < ML_(blksys_restart_MACH))
1972       || (ip >= ML_(blksys_setup_MDEP) && ip < ML_(blksys_restart_MDEP))
1973       || (ip >= ML_(blksys_setup_UNIX) && ip < ML_(blksys_restart_UNIX));
1974    at_restart
1975       =  (ip == ML_(blksys_restart_MACH))
1976       || (ip == ML_(blksys_restart_MDEP))
1977       || (ip == ML_(blksys_restart_UNIX));
1978    in_complete_to_committed
1979       =  (ip >= ML_(blksys_complete_MACH) && ip < ML_(blksys_committed_MACH))
1980       || (ip >= ML_(blksys_complete_MDEP) && ip < ML_(blksys_committed_MDEP))
1981       || (ip >= ML_(blksys_complete_UNIX) && ip < ML_(blksys_committed_UNIX));
1982    in_committed_to_finished
1983       =  (ip >= ML_(blksys_committed_MACH) && ip < ML_(blksys_finished_MACH))
1984       || (ip >= ML_(blksys_committed_MDEP) && ip < ML_(blksys_finished_MDEP))
1985       || (ip >= ML_(blksys_committed_UNIX) && ip < ML_(blksys_finished_UNIX));
1986    /* Wasn't that just So Much Fun?  Does your head hurt yet?  Mine does. */
1987 #  else
1988 #    error "Unknown OS"
1989 #  endif
1990 
1991    if (VG_(clo_trace_signals))
1992       VG_(message)( Vg_DebugMsg,
1993                     "interrupted_syscall: tid=%d, ip=0x%llx, "
1994                     "restart=%s, sres.isErr=%s, sres.val=%lld\n",
1995                     (Int)tid,
1996                     (ULong)ip,
1997                     restart ? "True" : "False",
1998                     sr_isError(sres) ? "True" : "False",
1999                     (Long)(sr_isError(sres) ? sr_Err(sres) : sr_Res(sres)) );
2000 
2001    vg_assert(VG_(is_valid_tid)(tid));
2002    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2003    vg_assert(VG_(is_running_thread)(tid));
2004 
2005    tst     = VG_(get_ThreadState)(tid);
2006    th_regs = &tst->arch;
2007    sci     = & syscallInfo[tid];
2008 
2009    /* Figure out what the state of the syscall was by examining the
2010       (real) IP at the time of the signal, and act accordingly. */
2011    if (outside_range) {
2012       if (VG_(clo_trace_signals))
2013          VG_(message)( Vg_DebugMsg,
2014                        "  not in syscall at all: hmm, very suspicious\n" );
2015       /* Looks like we weren't in a syscall at all.  Hmm. */
2016       vg_assert(sci->status.what != SsIdle);
2017       return;
2018    }
2019 
2020    /* We should not be here unless this thread had first started up
2021       the machinery for a syscall by calling VG_(client_syscall).
2022       Hence: */
2023    vg_assert(sci->status.what != SsIdle);
2024 
2025    /* now, do one of four fixup actions, depending on where the IP has
2026       got to. */
2027 
2028    if (in_setup_to_restart) {
2029       /* syscall hasn't even started; go around again */
2030       if (VG_(clo_trace_signals))
2031          VG_(message)( Vg_DebugMsg, "  not started: restarting\n");
2032       vg_assert(sci->status.what == SsHandToKernel);
2033       ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2034    }
2035 
2036    else
2037    if (at_restart) {
2038       /* We're either about to run the syscall, or it was interrupted
2039          and the kernel restarted it.  Restart if asked, otherwise
2040          EINTR it. */
2041       if (restart) {
2042          if (VG_(clo_trace_signals))
2043             VG_(message)( Vg_DebugMsg, "  at syscall instr: restarting\n");
2044          ML_(fixup_guest_state_to_restart_syscall)(th_regs);
2045       } else {
2046          if (VG_(clo_trace_signals))
2047             VG_(message)( Vg_DebugMsg, "  at syscall instr: returning EINTR\n");
2048          canonical = convert_SysRes_to_SyscallStatus(
2049                         VG_(mk_SysRes_Error)( VKI_EINTR )
2050                      );
2051          if (!(sci->flags & SfNoWriteResult))
2052             putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2053          sci->status = canonical;
2054          VG_(post_syscall)(tid);
2055       }
2056    }
2057 
2058    else
2059    if (in_complete_to_committed) {
2060       /* Syscall complete, but result hasn't been written back yet.
2061          Write the SysRes we were supplied with back to the guest
2062          state. */
2063       if (VG_(clo_trace_signals))
2064          VG_(message)( Vg_DebugMsg,
2065                        "  completed, but uncommitted: committing\n");
2066       canonical = convert_SysRes_to_SyscallStatus( sres );
2067       if (!(sci->flags & SfNoWriteResult))
2068          putSyscallStatusIntoGuestState( tid, &canonical, &th_regs->vex );
2069       sci->status = canonical;
2070       VG_(post_syscall)(tid);
2071    }
2072 
2073    else
2074    if (in_committed_to_finished) {
2075       /* Result committed, but the signal mask has not been restored;
2076          we expect our caller (the signal handler) will have fixed
2077          this up. */
2078       if (VG_(clo_trace_signals))
2079          VG_(message)( Vg_DebugMsg,
2080                        "  completed and committed: nothing to do\n");
2081       getSyscallStatusFromGuestState( &sci->status, &th_regs->vex );
2082       vg_assert(sci->status.what == SsComplete);
2083       VG_(post_syscall)(tid);
2084    }
2085 
2086    else
2087       VG_(core_panic)("?? strange syscall interrupt state?");
2088 
2089    /* In all cases, the syscall is now finished (even if we called
2090       ML_(fixup_guest_state_to_restart_syscall), since that just
2091       re-positions the guest's IP for another go at it).  So we need
2092       to record that fact. */
2093    sci->status.what = SsIdle;
2094 }
2095 
2096 
2097 #if defined(VGO_darwin)
2098 // Clean up after workq_ops(WQOPS_THREAD_RETURN) jumped to wqthread_hijack.
2099 // This is similar to VG_(fixup_guest_state_after_syscall_interrupted).
2100 // This longjmps back to the scheduler.
ML_(wqthread_continue_NORETURN)2101 void ML_(wqthread_continue_NORETURN)(ThreadId tid)
2102 {
2103    ThreadState*     tst;
2104    SyscallInfo*     sci;
2105 
2106    VG_(acquire_BigLock)(tid, "wqthread_continue_NORETURN");
2107 
2108    PRINT("SYSCALL[%d,%d](%s) workq_ops() starting new workqueue item\n",
2109          VG_(getpid)(), tid, VG_SYSNUM_STRING(__NR_workq_ops));
2110 
2111    vg_assert(VG_(is_valid_tid)(tid));
2112    vg_assert(tid >= 1 && tid < VG_N_THREADS);
2113    vg_assert(VG_(is_running_thread)(tid));
2114 
2115    tst     = VG_(get_ThreadState)(tid);
2116    sci     = & syscallInfo[tid];
2117    vg_assert(sci->status.what != SsIdle);
2118    vg_assert(tst->os_state.wq_jmpbuf_valid);  // check this BEFORE post_syscall
2119 
2120    // Pretend the syscall completed normally, but don't touch the thread state.
2121    sci->status = convert_SysRes_to_SyscallStatus( VG_(mk_SysRes_Success)(0) );
2122    sci->flags |= SfNoWriteResult;
2123    VG_(post_syscall)(tid);
2124 
2125    sci->status.what = SsIdle;
2126 
2127    vg_assert(tst->sched_jmpbuf_valid);
2128    VG_MINIMAL_LONGJMP(tst->sched_jmpbuf);
2129 
2130    /* NOTREACHED */
2131    vg_assert(0);
2132 }
2133 #endif
2134 
2135 
2136 /* ---------------------------------------------------------------------
2137    A place to store the where-to-call-when-really-done pointer
2138    ------------------------------------------------------------------ */
2139 
2140 // When the final thread is done, where shall I call to shutdown the
2141 // system cleanly?  Is set once at startup (in m_main) and never
2142 // changes after that.  Is basically a pointer to the exit
2143 // continuation.  This is all just a nasty hack to avoid calling
2144 // directly from m_syswrap to m_main at exit, since that would cause
2145 // m_main to become part of a module cycle, which is silly.
2146 void (* VG_(address_of_m_main_shutdown_actions_NORETURN) )
2147        (ThreadId,VgSchedReturnCode)
2148    = NULL;
2149 
2150 /*--------------------------------------------------------------------*/
2151 /*--- end                                                          ---*/
2152 /*--------------------------------------------------------------------*/
2153