• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGP_x86_linux)
32 
33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
34    in interrupted_syscall() need to be reviewed.  They don't seem
35    to assign the shadow state.
36 */
37 
38 #include "pub_core_basics.h"
39 #include "pub_core_vki.h"
40 #include "pub_core_vkiscnums.h"
41 #include "pub_core_threadstate.h"
42 #include "pub_core_aspacemgr.h"
43 #include "pub_core_debuglog.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcprint.h"
47 #include "pub_core_libcproc.h"
48 #include "pub_core_libcsignal.h"
49 #include "pub_core_mallocfree.h"
50 #include "pub_core_options.h"
51 #include "pub_core_scheduler.h"
52 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
53 #include "pub_core_signals.h"
54 #include "pub_core_syscall.h"
55 #include "pub_core_syswrap.h"
56 #include "pub_core_tooliface.h"
57 
58 #include "priv_types_n_macros.h"
59 #include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
60 #include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
61 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
62 #include "priv_syswrap-main.h"
63 
64 
65 /* ---------------------------------------------------------------------
66    clone() handling
67    ------------------------------------------------------------------ */
68 
69 /* Call f(arg1), but first switch stacks, using 'stack' as the new
70    stack, and use 'retaddr' as f's return-to address.  Also, clear all
71    the integer registers before entering f.*/
72 __attribute__((noreturn))
73 void ML_(call_on_new_stack_0_1) ( Addr stack,
74 			          Addr retaddr,
75 			          void (*f)(Word),
76                                   Word arg1 );
77 //  4(%esp) == stack
78 //  8(%esp) == retaddr
79 // 12(%esp) == f
80 // 16(%esp) == arg1
81 asm(
82 ".text\n"
83 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
84 "vgModuleLocal_call_on_new_stack_0_1:\n"
85 "   movl %esp, %esi\n"     // remember old stack pointer
86 "   movl 4(%esi), %esp\n"  // set stack, assume %esp is now 16-byte aligned
87 "   subl $12, %esp\n"      // skip 12 bytes
88 "   pushl 16(%esi)\n"      // arg1 to stack, %esp is 16-byte aligned
89 "   pushl  8(%esi)\n"      // retaddr to stack
90 "   pushl 12(%esi)\n"      // f to stack
91 "   movl $0, %eax\n"       // zero all GP regs
92 "   movl $0, %ebx\n"
93 "   movl $0, %ecx\n"
94 "   movl $0, %edx\n"
95 "   movl $0, %esi\n"
96 "   movl $0, %edi\n"
97 "   movl $0, %ebp\n"
98 "   ret\n"                 // jump to f
99 "   ud2\n"                 // should never get here
100 ".previous\n"
101 );
102 
103 
104 /*
105         Perform a clone system call.  clone is strange because it has
106         fork()-like return-twice semantics, so it needs special
107         handling here.
108 
109         Upon entry, we have:
110 
111             int (fn)(void*)     in  0+FSZ(%esp)
112             void* child_stack   in  4+FSZ(%esp)
113             int flags           in  8+FSZ(%esp)
114             void* arg           in 12+FSZ(%esp)
115             pid_t* child_tid    in 16+FSZ(%esp)
116             pid_t* parent_tid   in 20+FSZ(%esp)
117             void* tls_ptr       in 24+FSZ(%esp)
118 
119         System call requires:
120 
121             int    $__NR_clone  in %eax
122             int    flags        in %ebx
123             void*  child_stack  in %ecx
124             pid_t* parent_tid   in %edx
125             pid_t* child_tid    in %edi
126             void*  tls_ptr      in %esi
127 
128 	Returns an Int encoded in the linux-x86 way, not a SysRes.
129  */
130 #define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
131 #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
132 #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
133 
134 extern
135 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
136                                  void* stack,
137                                  Int   flags,
138                                  void* arg,
139                                  Int*  child_tid,
140                                  Int*  parent_tid,
141                                  vki_modify_ldt_t * );
142 asm(
143 ".text\n"
144 ".globl do_syscall_clone_x86_linux\n"
145 "do_syscall_clone_x86_linux:\n"
146 "        push    %ebx\n"
147 "        push    %edi\n"
148 "        push    %esi\n"
149 
150          /* set up child stack with function and arg */
151 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
152 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
153 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
154 "        andl    $-16, %ecx\n"              /* align to 16-byte */
155 "        lea     -20(%ecx), %ecx\n"         /* allocate 16*n+4 bytes on stack */
156 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
157 "        movl    %eax, 0(%ecx)\n"           /*   fn */
158 
159          /* get other args to clone */
160 "        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
161 "        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
162 "        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
163 "        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
164 "        movl    $"__NR_CLONE", %eax\n"
165 "        int     $0x80\n"                   /* clone() */
166 "        testl   %eax, %eax\n"              /* child if retval == 0 */
167 "        jnz     1f\n"
168 
169          /* CHILD - call thread function */
170 "        popl    %eax\n"                    /* child %esp is 16-byte aligned */
171 "        call    *%eax\n"                   /* call fn */
172 
173          /* exit with result */
174 "        movl    %eax, %ebx\n"              /* arg1: return value from fn */
175 "        movl    $"__NR_EXIT", %eax\n"
176 "        int     $0x80\n"
177 
178          /* Hm, exit returned */
179 "        ud2\n"
180 
181 "1:\n"   /* PARENT or ERROR */
182 "        pop     %esi\n"
183 "        pop     %edi\n"
184 "        pop     %ebx\n"
185 "        ret\n"
186 ".previous\n"
187 );
188 
189 #undef FSZ
190 #undef __NR_CLONE
191 #undef __NR_EXIT
192 
193 
194 // forward declarations
195 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
196 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
197 
198 /*
199    When a client clones, we need to keep track of the new thread.  This means:
200    1. allocate a ThreadId+ThreadState+stack for the thread
201 
202    2. initialize the thread's new VCPU state
203 
204    3. create the thread using the same args as the client requested,
205    but using the scheduler entrypoint for EIP, and a separate stack
206    for ESP.
207  */
do_clone(ThreadId ptid,UInt flags,Addr esp,Int * parent_tidptr,Int * child_tidptr,vki_modify_ldt_t * tlsinfo)208 static SysRes do_clone ( ThreadId ptid,
209                          UInt flags, Addr esp,
210                          Int* parent_tidptr,
211                          Int* child_tidptr,
212                          vki_modify_ldt_t *tlsinfo)
213 {
214    static const Bool debug = False;
215 
216    ThreadId     ctid = VG_(alloc_ThreadState)();
217    ThreadState* ptst = VG_(get_ThreadState)(ptid);
218    ThreadState* ctst = VG_(get_ThreadState)(ctid);
219    UWord*       stack;
220    SysRes       res;
221    Int          eax;
222    vki_sigset_t blockall, savedmask;
223 
224    VG_(sigfillset)(&blockall);
225 
226    vg_assert(VG_(is_running_thread)(ptid));
227    vg_assert(VG_(is_valid_tid)(ctid));
228 
229    stack = (UWord*)ML_(allocstack)(ctid);
230    if (stack == NULL) {
231       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
232       goto out;
233    }
234 
235    /* Copy register state
236 
237       Both parent and child return to the same place, and the code
238       following the clone syscall works out which is which, so we
239       don't need to worry about it.
240 
241       The parent gets the child's new tid returned from clone, but the
242       child gets 0.
243 
244       If the clone call specifies a NULL esp for the new thread, then
245       it actually gets a copy of the parent's esp.
246    */
247    /* Note: the clone call done by the Quadrics Elan3 driver specifies
248       clone flags of 0xF00, and it seems to rely on the assumption
249       that the child inherits a copy of the parent's GDT.
250       setup_child takes care of setting that up. */
251    setup_child( &ctst->arch, &ptst->arch, True );
252 
253    /* Make sys_clone appear to have returned Success(0) in the
254       child. */
255    ctst->arch.vex.guest_EAX = 0;
256 
257    if (esp != 0)
258       ctst->arch.vex.guest_ESP = esp;
259 
260    ctst->os_state.parent = ptid;
261 
262    /* inherit signal mask */
263    ctst->sig_mask     = ptst->sig_mask;
264    ctst->tmp_sig_mask = ptst->sig_mask;
265 
266    /* Start the child with its threadgroup being the same as the
267       parent's.  This is so that any exit_group calls that happen
268       after the child is created but before it sets its
269       os_state.threadgroup field for real (in thread_wrapper in
270       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
271       a race condition in which the thread is unkillable (via
272       exit_group) because its threadgroup is not set.  The race window
273       is probably only a few hundred or a few thousand cycles long.
274       See #226116. */
275    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
276 
277    ML_(guess_and_register_stack) (esp, ctst);
278 
279    /* Assume the clone will succeed, and tell any tool that wants to
280       know that this thread has come into existence.  We cannot defer
281       it beyond this point because sys_set_thread_area, just below,
282       causes tCheck to assert by making references to the new ThreadId
283       if we don't state the new thread exists prior to that point.
284       If the clone fails, we'll send out a ll_exit notification for it
285       at the out: label below, to clean up. */
286    vg_assert(VG_(owns_BigLock_LL)(ptid));
287    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
288 
289    if (flags & VKI_CLONE_SETTLS) {
290       if (debug)
291 	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%u "
292                      "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
293 		     tlsinfo, tlsinfo->entry_number,
294                      tlsinfo->base_addr, tlsinfo->limit,
295 		     ptst->arch.vex.guest_ESP,
296 		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
297       res = sys_set_thread_area(ctid, tlsinfo);
298       if (sr_isError(res))
299 	 goto out;
300    }
301 
302    flags &= ~VKI_CLONE_SETTLS;
303 
304    /* start the thread with everything blocked */
305    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
306 
307    /* Create the new thread */
308    eax = do_syscall_clone_x86_linux(
309             ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
310             child_tidptr, parent_tidptr, NULL
311          );
312    res = VG_(mk_SysRes_x86_linux)( eax );
313 
314    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
315 
316   out:
317    if (sr_isError(res)) {
318       /* clone failed */
319       VG_(cleanup_thread)(&ctst->arch);
320       ctst->status = VgTs_Empty;
321       /* oops.  Better tell the tool the thread exited in a hurry :-) */
322       VG_TRACK( pre_thread_ll_exit, ctid );
323    }
324 
325    return res;
326 }
327 
328 
329 /* ---------------------------------------------------------------------
330    LDT/GDT simulation
331    ------------------------------------------------------------------ */
332 
333 /* Details of the LDT simulation
334    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335 
336    When a program runs natively, the linux kernel allows each *thread*
337    in it to have its own LDT.  Almost all programs never do this --
338    it's wildly unportable, after all -- and so the kernel never
339    allocates the structure, which is just as well as an LDT occupies
340    64k of memory (8192 entries of size 8 bytes).
341 
342    A thread may choose to modify its LDT entries, by doing the
343    __NR_modify_ldt syscall.  In such a situation the kernel will then
344    allocate an LDT structure for it.  Each LDT entry is basically a
345    (base, limit) pair.  A virtual address in a specific segment is
346    translated to a linear address by adding the segment's base value.
347    In addition, the virtual address must not exceed the limit value.
348 
349    To use an LDT entry, a thread loads one of the segment registers
350    (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
351    .. 8191) it wants to use.  In fact, the required value is (index <<
352    3) + 7, but that's not important right now.  Any normal instruction
353    which includes an addressing mode can then be made relative to that
354    LDT entry by prefixing the insn with a so-called segment-override
355    prefix, a byte which indicates which of the 6 segment registers
356    holds the LDT index.
357 
358    Now, a key constraint is that valgrind's address checks operate in
359    terms of linear addresses.  So we have to explicitly translate
360    virtual addrs into linear addrs, and that means doing a complete
361    LDT simulation.
362 
363    Calls to modify_ldt are intercepted.  For each thread, we maintain
364    an LDT (with the same normally-never-allocated optimisation that
365    the kernel does).  This is updated as expected via calls to
366    modify_ldt.
367 
368    When a thread does an amode calculation involving a segment
369    override prefix, the relevant LDT entry for the thread is
370    consulted.  It all works.
371 
372    There is a conceptual problem, which appears when switching back to
373    native execution, either temporarily to pass syscalls to the
374    kernel, or permanently, when debugging V.  Problem at such points
375    is that it's pretty pointless to copy the simulated machine's
376    segment registers to the real machine, because we'd also need to
377    copy the simulated LDT into the real one, and that's prohibitively
378    expensive.
379 
380    Fortunately it looks like no syscalls rely on the segment regs or
381    LDT being correct, so we can get away with it.  Apart from that the
382    simulation is pretty straightforward.  All 6 segment registers are
383    tracked, although only %ds, %es, %fs and %gs are allowed as
384    prefixes.  Perhaps it could be restricted even more than that -- I
385    am not sure what is and isn't allowed in user-mode.
386 */
387 
388 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
389    the Linux kernel's logic (cut-n-paste of code in
390    linux/kernel/ldt.c).  */
391 
392 static
translate_to_hw_format(vki_modify_ldt_t * inn,VexGuestX86SegDescr * out,Int oldmode)393 void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
394                               /* OUT */ VexGuestX86SegDescr* out,
395                                         Int oldmode )
396 {
397    UInt entry_1, entry_2;
398    vg_assert(8 == sizeof(VexGuestX86SegDescr));
399 
400    if (0)
401       VG_(printf)("translate_to_hw_format: base %#lx, limit %u\n",
402                   inn->base_addr, inn->limit );
403 
404    /* Allow LDTs to be cleared by the user. */
405    if (inn->base_addr == 0 && inn->limit == 0) {
406       if (oldmode ||
407           (inn->contents == 0      &&
408            inn->read_exec_only == 1   &&
409            inn->seg_32bit == 0      &&
410            inn->limit_in_pages == 0   &&
411            inn->seg_not_present == 1   &&
412            inn->useable == 0 )) {
413          entry_1 = 0;
414          entry_2 = 0;
415          goto install;
416       }
417    }
418 
419    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
420              (inn->limit & 0x0ffff);
421    entry_2 = (inn->base_addr & 0xff000000) |
422              ((inn->base_addr & 0x00ff0000) >> 16) |
423              (inn->limit & 0xf0000) |
424              ((inn->read_exec_only ^ 1) << 9) |
425              (inn->contents << 10) |
426              ((inn->seg_not_present ^ 1) << 15) |
427              (inn->seg_32bit << 22) |
428              (inn->limit_in_pages << 23) |
429              0x7000;
430    if (!oldmode)
431       entry_2 |= (inn->useable << 20);
432 
433    /* Install the new entry ...  */
434   install:
435    out->LdtEnt.Words.word1 = entry_1;
436    out->LdtEnt.Words.word2 = entry_2;
437 }
438 
439 /* Create a zeroed-out GDT. */
alloc_zeroed_x86_GDT(void)440 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
441 {
442    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
443    return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
444 }
445 
446 /* Create a zeroed-out LDT. */
alloc_zeroed_x86_LDT(void)447 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
448 {
449    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
450    return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
451 }
452 
453 /* Free up an LDT or GDT allocated by the above fns. */
free_LDT_or_GDT(VexGuestX86SegDescr * dt)454 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
455 {
456    vg_assert(dt);
457    VG_(free)(dt);
458 }
459 
460 /* Copy contents between two existing LDTs. */
copy_LDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)461 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
462                                VexGuestX86SegDescr* dst )
463 {
464    Int i;
465    vg_assert(src);
466    vg_assert(dst);
467    for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
468       dst[i] = src[i];
469 }
470 
471 /* Copy contents between two existing GDTs. */
copy_GDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)472 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
473                                VexGuestX86SegDescr* dst )
474 {
475    Int i;
476    vg_assert(src);
477    vg_assert(dst);
478    for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
479       dst[i] = src[i];
480 }
481 
482 /* Free this thread's DTs, if it has any. */
deallocate_LGDTs_for_thread(VexGuestX86State * vex)483 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
484 {
485    vg_assert(sizeof(HWord) == sizeof(void*));
486 
487    if (0)
488       VG_(printf)("deallocate_LGDTs_for_thread: "
489                   "ldt = 0x%lx, gdt = 0x%lx\n",
490                   vex->guest_LDT, vex->guest_GDT );
491 
492    if (vex->guest_LDT != (HWord)NULL) {
493       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
494       vex->guest_LDT = (HWord)NULL;
495    }
496 
497    if (vex->guest_GDT != (HWord)NULL) {
498       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
499       vex->guest_GDT = (HWord)NULL;
500    }
501 }
502 
503 
504 /*
505  * linux/kernel/ldt.c
506  *
507  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
508  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
509  */
510 
511 /*
512  * read_ldt() is not really atomic - this is not a problem since
513  * synchronization of reads and writes done to the LDT has to be
514  * assured by user-space anyway. Writes are atomic, to protect
515  * the security checks done on new descriptors.
516  */
517 static
read_ldt(ThreadId tid,UChar * ptr,UInt bytecount)518 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
519 {
520    SysRes res;
521    UInt   i, size;
522    UChar* ldt;
523 
524    if (0)
525       VG_(printf)("read_ldt: tid = %u, ptr = %p, bytecount = %u\n",
526                   tid, ptr, bytecount );
527 
528    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
529    vg_assert(8 == sizeof(VexGuestX86SegDescr));
530 
531    ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
532    res = VG_(mk_SysRes_Success)( 0 );
533    if (ldt == NULL)
534       /* LDT not allocated, meaning all entries are null */
535       goto out;
536 
537    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
538    if (size > bytecount)
539       size = bytecount;
540 
541    res = VG_(mk_SysRes_Success)( size );
542    for (i = 0; i < size; i++)
543       ptr[i] = ldt[i];
544 
545   out:
546    return res;
547 }
548 
549 
550 static
write_ldt(ThreadId tid,void * ptr,UInt bytecount,Int oldmode)551 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
552 {
553    SysRes res;
554    VexGuestX86SegDescr* ldt;
555    vki_modify_ldt_t* ldt_info;
556 
557    if (0)
558       VG_(printf)("write_ldt: tid = %u, ptr = %p, "
559                   "bytecount = %u, oldmode = %d\n",
560                   tid, ptr, bytecount, oldmode );
561 
562    vg_assert(8 == sizeof(VexGuestX86SegDescr));
563    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
564 
565    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
566    ldt_info = (vki_modify_ldt_t*)ptr;
567 
568    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
569    if (bytecount != sizeof(vki_modify_ldt_t))
570       goto out;
571 
572    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
573    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
574       goto out;
575    if (ldt_info->contents == 3) {
576       if (oldmode)
577          goto out;
578       if (ldt_info->seg_not_present == 0)
579          goto out;
580    }
581 
582    /* If this thread doesn't have an LDT, we'd better allocate it
583       now. */
584    if (ldt == NULL) {
585       ldt = alloc_zeroed_x86_LDT();
586       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
587    }
588 
589    /* Install the new entry ...  */
590    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
591    res = VG_(mk_SysRes_Success)( 0 );
592 
593   out:
594    return res;
595 }
596 
597 
sys_modify_ldt(ThreadId tid,Int func,void * ptr,UInt bytecount)598 static SysRes sys_modify_ldt ( ThreadId tid,
599                                Int func, void* ptr, UInt bytecount )
600 {
601    SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
602 
603    switch (func) {
604    case 0:
605       ret = read_ldt(tid, ptr, bytecount);
606       break;
607    case 1:
608       ret = write_ldt(tid, ptr, bytecount, 1);
609       break;
610    case 2:
611       VG_(unimplemented)("sys_modify_ldt: func == 2");
612       /* god knows what this is about */
613       /* ret = read_default_ldt(ptr, bytecount); */
614       /*UNREACHED*/
615       break;
616    case 0x11:
617       ret = write_ldt(tid, ptr, bytecount, 0);
618       break;
619    }
620    return ret;
621 }
622 
623 
sys_set_thread_area(ThreadId tid,vki_modify_ldt_t * info)624 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
625 {
626    Int                  idx;
627    VexGuestX86SegDescr* gdt;
628 
629    vg_assert(8 == sizeof(VexGuestX86SegDescr));
630    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
631 
632    if (info == NULL)
633       return VG_(mk_SysRes_Error)( VKI_EFAULT );
634 
635    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
636 
637    /* If the thread doesn't have a GDT, allocate it now. */
638    if (!gdt) {
639       gdt = alloc_zeroed_x86_GDT();
640       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
641    }
642 
643    idx = info->entry_number;
644 
645    if (idx == -1) {
646       /* Find and use the first free entry.  Don't allocate entry
647          zero, because the hardware will never do that, and apparently
648          doing so confuses some code (perhaps stuff running on
649          Wine). */
650       for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
651          if (gdt[idx].LdtEnt.Words.word1 == 0
652              && gdt[idx].LdtEnt.Words.word2 == 0)
653             break;
654       }
655 
656       if (idx == VEX_GUEST_X86_GDT_NENT)
657          return VG_(mk_SysRes_Error)( VKI_ESRCH );
658    } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
659       /* Similarly, reject attempts to use GDT[0]. */
660       return VG_(mk_SysRes_Error)( VKI_EINVAL );
661    }
662 
663    translate_to_hw_format(info, &gdt[idx], 0);
664 
665    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
666              "set_thread_area(info->entry)",
667              (Addr) & info->entry_number, sizeof(unsigned int) );
668    info->entry_number = idx;
669    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
670              (Addr) & info->entry_number, sizeof(unsigned int) );
671 
672    return VG_(mk_SysRes_Success)( 0 );
673 }
674 
675 
sys_get_thread_area(ThreadId tid,vki_modify_ldt_t * info)676 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
677 {
678    Int idx;
679    VexGuestX86SegDescr* gdt;
680 
681    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
682    vg_assert(8 == sizeof(VexGuestX86SegDescr));
683 
684    if (info == NULL)
685       return VG_(mk_SysRes_Error)( VKI_EFAULT );
686 
687    idx = info->entry_number;
688 
689    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
690       return VG_(mk_SysRes_Error)( VKI_EINVAL );
691 
692    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
693 
694    /* If the thread doesn't have a GDT, allocate it now. */
695    if (!gdt) {
696       gdt = alloc_zeroed_x86_GDT();
697       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
698    }
699 
700    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
701                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
702                      gdt[idx].LdtEnt.Bits.BaseLow;
703    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
704                    gdt[idx].LdtEnt.Bits.LimitLow;
705    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
706    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
707    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
708    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
709    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
710    info->useable = gdt[idx].LdtEnt.Bits.Sys;
711    info->reserved = 0;
712 
713    return VG_(mk_SysRes_Success)( 0 );
714 }
715 
716 /* ---------------------------------------------------------------------
717    More thread stuff
718    ------------------------------------------------------------------ */
719 
VG_(cleanup_thread)720 void VG_(cleanup_thread) ( ThreadArchState* arch )
721 {
722    /* Release arch-specific resources held by this thread. */
723    /* On x86, we have to dump the LDT and GDT. */
724    deallocate_LGDTs_for_thread( &arch->vex );
725 }
726 
727 
setup_child(ThreadArchState * child,ThreadArchState * parent,Bool inherit_parents_GDT)728 static void setup_child ( /*OUT*/ ThreadArchState *child,
729                           /*IN*/  ThreadArchState *parent,
730                           Bool inherit_parents_GDT )
731 {
732    /* We inherit our parent's guest state. */
733    child->vex = parent->vex;
734    child->vex_shadow1 = parent->vex_shadow1;
735    child->vex_shadow2 = parent->vex_shadow2;
736 
737    /* We inherit our parent's LDT. */
738    if (parent->vex.guest_LDT == (HWord)NULL) {
739       /* We hope this is the common case. */
740       child->vex.guest_LDT = (HWord)NULL;
741    } else {
742       /* No luck .. we have to take a copy of the parent's. */
743       child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
744       copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
745                         (VexGuestX86SegDescr*)child->vex.guest_LDT );
746    }
747 
748    /* Either we start with an empty GDT (the usual case) or inherit a
749       copy of our parents' one (Quadrics Elan3 driver -style clone
750       only). */
751    child->vex.guest_GDT = (HWord)NULL;
752 
753    if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
754       child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
755       copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
756                         (VexGuestX86SegDescr*)child->vex.guest_GDT );
757    }
758 }
759 
760 
761 /* ---------------------------------------------------------------------
762    PRE/POST wrappers for x86/Linux-specific syscalls
763    ------------------------------------------------------------------ */
764 
765 #define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
766 #define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
767 
768 /* Add prototypes for the wrappers declared here, so that gcc doesn't
769    harass us for not having prototypes.  Really this is a kludge --
770    the right thing to do is to make these wrappers 'static' since they
771    aren't visible outside this file, but that requires even more macro
772    magic. */
773 DECL_TEMPLATE(x86_linux, sys_stat64);
774 DECL_TEMPLATE(x86_linux, sys_fstatat64);
775 DECL_TEMPLATE(x86_linux, sys_fstat64);
776 DECL_TEMPLATE(x86_linux, sys_lstat64);
777 DECL_TEMPLATE(x86_linux, sys_clone);
778 DECL_TEMPLATE(x86_linux, old_mmap);
779 DECL_TEMPLATE(x86_linux, sys_mmap2);
780 DECL_TEMPLATE(x86_linux, sys_sigreturn);
781 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
782 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
783 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
784 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
785 DECL_TEMPLATE(x86_linux, sys_ptrace);
786 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
787 DECL_TEMPLATE(x86_linux, old_select);
788 DECL_TEMPLATE(x86_linux, sys_vm86old);
789 DECL_TEMPLATE(x86_linux, sys_vm86);
790 DECL_TEMPLATE(x86_linux, sys_syscall223);
791 
PRE(old_select)792 PRE(old_select)
793 {
794    /* struct sel_arg_struct {
795       unsigned long n;
796       fd_set *inp, *outp, *exp;
797       struct timeval *tvp;
798       };
799    */
800    PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
801    PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
802    *flags |= SfMayBlock;
803    {
804       UInt* arg_struct = (UInt*)ARG1;
805       UInt a1, a2, a3, a4, a5;
806 
807       a1 = arg_struct[0];
808       a2 = arg_struct[1];
809       a3 = arg_struct[2];
810       a4 = arg_struct[3];
811       a5 = arg_struct[4];
812 
813       PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", (Int)a1,a2,a3,a4,a5);
814       if (a2 != (Addr)NULL)
815          PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
816       if (a3 != (Addr)NULL)
817          PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
818       if (a4 != (Addr)NULL)
819          PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
820       if (a5 != (Addr)NULL)
821          PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
822    }
823 }
824 
PRE(sys_clone)825 PRE(sys_clone)
826 {
827    UInt cloneflags;
828    Bool badarg = False;
829 
830    PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
831    PRE_REG_READ2(int, "clone",
832                  unsigned long, flags,
833                  void *, child_stack);
834 
835    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
836       if (VG_(tdict).track_pre_reg_read) {
837          PRA3("clone", int *, parent_tidptr);
838       }
839       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
840       if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
841                                              VKI_PROT_WRITE)) {
842          badarg = True;
843       }
844    }
845    if (ARG1 & VKI_CLONE_SETTLS) {
846       if (VG_(tdict).track_pre_reg_read) {
847          PRA4("clone", vki_modify_ldt_t *, tlsinfo);
848       }
849       PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
850       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
851                                              VKI_PROT_READ)) {
852          badarg = True;
853       }
854    }
855    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
856       if (VG_(tdict).track_pre_reg_read) {
857          PRA5("clone", int *, child_tidptr);
858       }
859       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
860       if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
861                                              VKI_PROT_WRITE)) {
862          badarg = True;
863       }
864    }
865 
866    if (badarg) {
867       SET_STATUS_Failure( VKI_EFAULT );
868       return;
869    }
870 
871    cloneflags = ARG1;
872 
873    if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
874       SET_STATUS_Failure( VKI_EINVAL );
875       return;
876    }
877 
878    /* Be ultra-paranoid and filter out any clone-variants we don't understand:
879       - ??? specifies clone flags of 0x100011
880       - ??? specifies clone flags of 0x1200011.
881       - NPTL specifies clone flags of 0x7D0F00.
882       - The Quadrics Elan3 driver specifies clone flags of 0xF00.
883       - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
884       Everything else is rejected.
885    */
886    if (
887         1 ||
888         /* 11 Nov 05: for the time being, disable this ultra-paranoia.
889            The switch below probably does a good enough job. */
890           (cloneflags == 0x100011 || cloneflags == 0x1200011
891                                   || cloneflags == 0x7D0F00
892                                   || cloneflags == 0x790F00
893                                   || cloneflags == 0x3D0F00
894                                   || cloneflags == 0x410F00
895                                   || cloneflags == 0xF00
896                                   || cloneflags == 0xF21)) {
897      /* OK */
898    }
899    else {
900       /* Nah.  We don't like it.  Go away. */
901       goto reject;
902    }
903 
904    /* Only look at the flags we really care about */
905    switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
906                          | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
907    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
908       /* thread creation */
909       SET_STATUS_from_SysRes(
910          do_clone(tid,
911                   ARG1,         /* flags */
912                   (Addr)ARG2,   /* child ESP */
913                   (Int *)ARG3,  /* parent_tidptr */
914                   (Int *)ARG5,  /* child_tidptr */
915                   (vki_modify_ldt_t *)ARG4)); /* set_tls */
916       break;
917 
918    case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
919       /* FALLTHROUGH - assume vfork == fork */
920       cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
921 
922    case 0: /* plain fork */
923       SET_STATUS_from_SysRes(
924          ML_(do_fork_clone)(tid,
925                        cloneflags,      /* flags */
926                        (Int *)ARG3,     /* parent_tidptr */
927                        (Int *)ARG5));   /* child_tidptr */
928       break;
929 
930    default:
931    reject:
932       /* should we just ENOSYS? */
933       VG_(message)(Vg_UserMsg, "\n");
934       VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
935       VG_(message)(Vg_UserMsg, "\n");
936       VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
937       VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
938       VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
939       VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
940       VG_(unimplemented)
941          ("Valgrind does not support general clone().");
942    }
943 
944    if (SUCCESS) {
945       if (ARG1 & VKI_CLONE_PARENT_SETTID)
946          POST_MEM_WRITE(ARG3, sizeof(Int));
947       if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
948          POST_MEM_WRITE(ARG5, sizeof(Int));
949 
950       /* Thread creation was successful; let the child have the chance
951          to run */
952       *flags |= SfYieldAfter;
953    }
954 }
955 
PRE(sys_sigreturn)956 PRE(sys_sigreturn)
957 {
958    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
959       an explanation of what follows. */
960 
961    ThreadState* tst;
962    PRINT("sys_sigreturn ( )");
963 
964    vg_assert(VG_(is_valid_tid)(tid));
965    vg_assert(tid >= 1 && tid < VG_N_THREADS);
966    vg_assert(VG_(is_running_thread)(tid));
967 
968    /* Adjust esp to point to start of frame; skip back up over
969       sigreturn sequence's "popl %eax" and handler ret addr */
970    tst = VG_(get_ThreadState)(tid);
971    tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
972    /* XXX why does ESP change differ from rt_sigreturn case below? */
973 
974    /* This is only so that the EIP is (might be) useful to report if
975       something goes wrong in the sigreturn */
976    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
977 
978    /* Restore register state from frame and remove it */
979    VG_(sigframe_destroy)(tid, False);
980 
981    /* Tell the driver not to update the guest state with the "result",
982       and set a bogus result to keep it happy. */
983    *flags |= SfNoWriteResult;
984    SET_STATUS_Success(0);
985 
986    /* Check to see if any signals arose as a result of this. */
987    *flags |= SfPollAfter;
988 }
989 
PRE(sys_rt_sigreturn)990 PRE(sys_rt_sigreturn)
991 {
992    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
993       an explanation of what follows. */
994 
995    ThreadState* tst;
996    PRINT("sys_rt_sigreturn ( )");
997 
998    vg_assert(VG_(is_valid_tid)(tid));
999    vg_assert(tid >= 1 && tid < VG_N_THREADS);
1000    vg_assert(VG_(is_running_thread)(tid));
1001 
1002    /* Adjust esp to point to start of frame; skip back up over handler
1003       ret addr */
1004    tst = VG_(get_ThreadState)(tid);
1005    tst->arch.vex.guest_ESP -= sizeof(Addr);
1006    /* XXX why does ESP change differ from sigreturn case above? */
1007 
1008    /* This is only so that the EIP is (might be) useful to report if
1009       something goes wrong in the sigreturn */
1010    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1011 
1012    /* Restore register state from frame and remove it */
1013    VG_(sigframe_destroy)(tid, True);
1014 
1015    /* Tell the driver not to update the guest state with the "result",
1016       and set a bogus result to keep it happy. */
1017    *flags |= SfNoWriteResult;
1018    SET_STATUS_Success(0);
1019 
1020    /* Check to see if any signals arose as a result of this. */
1021    *flags |= SfPollAfter;
1022 }
1023 
PRE(sys_modify_ldt)1024 PRE(sys_modify_ldt)
1025 {
1026    PRINT("sys_modify_ldt ( %ld, %#lx, %lu )", SARG1, ARG2, ARG3);
1027    PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1028                  unsigned long, bytecount);
1029 
1030    if (ARG1 == 0) {
1031       /* read the LDT into ptr */
1032       PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1033    }
1034    if (ARG1 == 1 || ARG1 == 0x11) {
1035       /* write the LDT with the entry pointed at by ptr */
1036       PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1037    }
1038    /* "do" the syscall ourselves; the kernel never sees it */
1039    SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1040 
1041    if (ARG1 == 0 && SUCCESS && RES > 0) {
1042       POST_MEM_WRITE( ARG2, RES );
1043    }
1044 }
1045 
PRE(sys_set_thread_area)1046 PRE(sys_set_thread_area)
1047 {
1048    PRINT("sys_set_thread_area ( %#lx )", ARG1);
1049    PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1050    PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1051 
1052    /* "do" the syscall ourselves; the kernel never sees it */
1053    SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1054 }
1055 
PRE(sys_get_thread_area)1056 PRE(sys_get_thread_area)
1057 {
1058    PRINT("sys_get_thread_area ( %#lx )", ARG1);
1059    PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1060    PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1061 
1062    /* "do" the syscall ourselves; the kernel never sees it */
1063    SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1064 
1065    if (SUCCESS) {
1066       POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1067    }
1068 }
1069 
1070 // Parts of this are x86-specific, but the *PEEK* cases are generic.
1071 //
1072 // ARG3 is only used for pointers into the traced process's address
1073 // space and for offsets into the traced process's struct
1074 // user_regs_struct. It is never a pointer into this process's memory
1075 // space, and we should therefore not check anything it points to.
PRE(sys_ptrace)1076 PRE(sys_ptrace)
1077 {
1078    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
1079    PRE_REG_READ4(int, "ptrace",
1080                  long, request, long, pid, unsigned long, addr,
1081                  unsigned long, data);
1082    switch (ARG1) {
1083    case VKI_PTRACE_PEEKTEXT:
1084    case VKI_PTRACE_PEEKDATA:
1085    case VKI_PTRACE_PEEKUSR:
1086       PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1087 		     sizeof (long));
1088       break;
1089    case VKI_PTRACE_GETREGS:
1090       PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1091 		     sizeof (struct vki_user_regs_struct));
1092       break;
1093    case VKI_PTRACE_GETFPREGS:
1094       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1095 		     sizeof (struct vki_user_i387_struct));
1096       break;
1097    case VKI_PTRACE_GETFPXREGS:
1098       PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1099                      sizeof(struct vki_user_fxsr_struct) );
1100       break;
1101    case VKI_PTRACE_GET_THREAD_AREA:
1102       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1103                      sizeof(struct vki_user_desc) );
1104       break;
1105    case VKI_PTRACE_SETREGS:
1106       PRE_MEM_READ( "ptrace(setregs)", ARG4,
1107 		     sizeof (struct vki_user_regs_struct));
1108       break;
1109    case VKI_PTRACE_SETFPREGS:
1110       PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1111 		     sizeof (struct vki_user_i387_struct));
1112       break;
1113    case VKI_PTRACE_SETFPXREGS:
1114       PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1115                      sizeof(struct vki_user_fxsr_struct) );
1116       break;
1117    case VKI_PTRACE_SET_THREAD_AREA:
1118       PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1119                      sizeof(struct vki_user_desc) );
1120       break;
1121    case VKI_PTRACE_GETEVENTMSG:
1122       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1123       break;
1124    case VKI_PTRACE_GETSIGINFO:
1125       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1126       break;
1127    case VKI_PTRACE_SETSIGINFO:
1128       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1129       break;
1130    case VKI_PTRACE_GETREGSET:
1131       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1132       break;
1133    case VKI_PTRACE_SETREGSET:
1134       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1135       break;
1136    default:
1137       break;
1138    }
1139 }
1140 
POST(sys_ptrace)1141 POST(sys_ptrace)
1142 {
1143    switch (ARG1) {
1144    case VKI_PTRACE_PEEKTEXT:
1145    case VKI_PTRACE_PEEKDATA:
1146    case VKI_PTRACE_PEEKUSR:
1147       POST_MEM_WRITE( ARG4, sizeof (long));
1148       break;
1149    case VKI_PTRACE_GETREGS:
1150       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1151       break;
1152    case VKI_PTRACE_GETFPREGS:
1153       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1154       break;
1155    case VKI_PTRACE_GETFPXREGS:
1156       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1157       break;
1158    case VKI_PTRACE_GET_THREAD_AREA:
1159       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1160       break;
1161    case VKI_PTRACE_GETEVENTMSG:
1162       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1163       break;
1164    case VKI_PTRACE_GETSIGINFO:
1165       /* XXX: This is a simplification. Different parts of the
1166        * siginfo_t are valid depending on the type of signal.
1167        */
1168       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1169       break;
1170    case VKI_PTRACE_GETREGSET:
1171       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1172       break;
1173    default:
1174       break;
1175    }
1176 }
1177 
PRE(old_mmap)1178 PRE(old_mmap)
1179 {
1180    /* struct mmap_arg_struct {
1181          unsigned long addr;
1182          unsigned long len;
1183          unsigned long prot;
1184          unsigned long flags;
1185          unsigned long fd;
1186          unsigned long offset;
1187    }; */
1188    UWord a1, a2, a3, a4, a5, a6;
1189    SysRes r;
1190 
1191    UWord* args = (UWord*)ARG1;
1192    PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1193    PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1194 
1195    a1 = args[1-1];
1196    a2 = args[2-1];
1197    a3 = args[3-1];
1198    a4 = args[4-1];
1199    a5 = args[5-1];
1200    a6 = args[6-1];
1201 
1202    PRINT("old_mmap ( %#lx, %lu, %ld, %ld, %ld, %ld )",
1203          a1, a2, (Word)a3, (Word)a4, (Word)a5, (Word)a6 );
1204 
1205    r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1206    SET_STATUS_from_SysRes(r);
1207 }
1208 
PRE(sys_mmap2)1209 PRE(sys_mmap2)
1210 {
1211    SysRes r;
1212 
1213    // Exactly like old_mmap() except:
1214    //  - all 6 args are passed in regs, rather than in a memory-block.
1215    //  - the file offset is specified in pagesize units rather than bytes,
1216    //    so that it can be used for files bigger than 2^32 bytes.
1217    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
1218    // 4K-sized.  Assert that the page size is 4K here for safety.
1219    vg_assert(VKI_PAGE_SIZE == 4096);
1220    PRINT("sys_mmap2 ( %#lx, %lu, %lu, %lu, %lu, %lu )",
1221          ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
1222    PRE_REG_READ6(long, "mmap2",
1223                  unsigned long, start, unsigned long, length,
1224                  unsigned long, prot,  unsigned long, flags,
1225                  unsigned long, fd,    unsigned long, offset);
1226 
1227    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1228                                        4096 * (Off64T)ARG6 );
1229    SET_STATUS_from_SysRes(r);
1230 }
1231 
1232 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1233 // applicable to every architecture -- I think only to 32-bit archs.
1234 // We're going to need something like linux/core_os32.h for such
1235 // things, eventually, I think.  --njn
PRE(sys_lstat64)1236 PRE(sys_lstat64)
1237 {
1238    PRINT("sys_lstat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1239    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1240    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1241    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1242 }
1243 
POST(sys_lstat64)1244 POST(sys_lstat64)
1245 {
1246    vg_assert(SUCCESS);
1247    if (RES == 0) {
1248       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1249    }
1250 }
1251 
PRE(sys_stat64)1252 PRE(sys_stat64)
1253 {
1254    FUSE_COMPATIBLE_MAY_BLOCK();
1255    PRINT("sys_stat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1256    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1257    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1258    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1259 }
1260 
POST(sys_stat64)1261 POST(sys_stat64)
1262 {
1263    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1264 }
1265 
PRE(sys_fstatat64)1266 PRE(sys_fstatat64)
1267 {
1268    FUSE_COMPATIBLE_MAY_BLOCK();
1269    // ARG4 =  int flags;  Flags are or'ed together, therefore writing them
1270    // as a hex constant is more meaningful.
1271    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx, %#lx )",
1272          SARG1, ARG2, (HChar*)ARG2, ARG3, ARG4);
1273    PRE_REG_READ4(long, "fstatat64",
1274                  int, dfd, char *, file_name, struct stat64 *, buf, int, flags);
1275    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1276    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1277 }
1278 
POST(sys_fstatat64)1279 POST(sys_fstatat64)
1280 {
1281    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1282 }
1283 
PRE(sys_fstat64)1284 PRE(sys_fstat64)
1285 {
1286    PRINT("sys_fstat64 ( %lu, %#lx )", ARG1, ARG2);
1287    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1288    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1289 }
1290 
POST(sys_fstat64)1291 POST(sys_fstat64)
1292 {
1293    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1294 }
1295 
1296 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1297    identical version. */
PRE(sys_sigsuspend)1298 PRE(sys_sigsuspend)
1299 {
1300    /* The C library interface to sigsuspend just takes a pointer to
1301       a signal mask but this system call has three arguments - the first
1302       two don't appear to be used by the kernel and are always passed as
1303       zero by glibc and the third is the first word of the signal mask
1304       so only 32 signals are supported.
1305 
1306       In fact glibc normally uses rt_sigsuspend if it is available as
1307       that takes a pointer to the signal mask so supports more signals.
1308     */
1309    *flags |= SfMayBlock;
1310    PRINT("sys_sigsuspend ( %ld, %ld, %lu )", SARG1, SARG2, ARG3 );
1311    PRE_REG_READ3(int, "sigsuspend",
1312                  int, history0, int, history1,
1313                  vki_old_sigset_t, mask);
1314 }
1315 
PRE(sys_vm86old)1316 PRE(sys_vm86old)
1317 {
1318    PRINT("sys_vm86old ( %#lx )", ARG1);
1319    PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1320    PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1321 }
1322 
POST(sys_vm86old)1323 POST(sys_vm86old)
1324 {
1325    POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1326 }
1327 
PRE(sys_vm86)1328 PRE(sys_vm86)
1329 {
1330    PRINT("sys_vm86 ( %lu, %#lx )", ARG1, ARG2);
1331    PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1332    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1333       PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1334 }
1335 
POST(sys_vm86)1336 POST(sys_vm86)
1337 {
1338    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1339       POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1340 }
1341 
1342 
1343 /* ---------------------------------------------------------------
1344    PRE/POST wrappers for x86/Linux-variant specific syscalls
1345    ------------------------------------------------------------ */
1346 
PRE(sys_syscall223)1347 PRE(sys_syscall223)
1348 {
1349    Int err;
1350 
1351    /* 223 is used by sys_bproc.  If we're not on a declared bproc
1352       variant, fail in the usual way. */
1353 
1354    if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
1355       PRINT("non-existent syscall! (syscall 223)");
1356       PRE_REG_READ0(long, "ni_syscall(223)");
1357       SET_STATUS_Failure( VKI_ENOSYS );
1358       return;
1359    }
1360 
1361    err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1362                                            ARG4, ARG5, ARG6 );
1363    if (err) {
1364       SET_STATUS_Failure( err );
1365       return;
1366    }
1367    /* Let it go through. */
1368    *flags |= SfMayBlock; /* who knows?  play safe. */
1369 }
1370 
POST(sys_syscall223)1371 POST(sys_syscall223)
1372 {
1373    ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1374                                       ARG4, ARG5, ARG6 );
1375 }
1376 
1377 #undef PRE
1378 #undef POST
1379 
1380 
1381 /* ---------------------------------------------------------------------
1382    The x86/Linux syscall table
1383    ------------------------------------------------------------------ */
1384 
1385 /* Add an x86-linux specific wrapper to a syscall table. */
1386 #define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1387 #define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1388 
1389 
1390 // This table maps from __NR_xxx syscall numbers (from
1391 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1392 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1393 //
1394 // For those syscalls not handled by Valgrind, the annotation indicate its
1395 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1396 // (unknown).
1397 
1398 static SyscallTableEntry syscall_table[] = {
1399 //zz    //   (restart_syscall)                             // 0
1400    GENX_(__NR_exit,              sys_exit),           // 1
1401    GENX_(__NR_fork,              sys_fork),           // 2
1402    GENXY(__NR_read,              sys_read),           // 3
1403    GENX_(__NR_write,             sys_write),          // 4
1404 
1405    GENXY(__NR_open,              sys_open),           // 5
1406    GENXY(__NR_close,             sys_close),          // 6
1407    GENXY(__NR_waitpid,           sys_waitpid),        // 7
1408    GENXY(__NR_creat,             sys_creat),          // 8
1409    GENX_(__NR_link,              sys_link),           // 9
1410 
1411    GENX_(__NR_unlink,            sys_unlink),         // 10
1412    GENX_(__NR_execve,            sys_execve),         // 11
1413    GENX_(__NR_chdir,             sys_chdir),          // 12
1414    GENXY(__NR_time,              sys_time),           // 13
1415    GENX_(__NR_mknod,             sys_mknod),          // 14
1416 
1417    GENX_(__NR_chmod,             sys_chmod),          // 15
1418 //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
1419    GENX_(__NR_break,             sys_ni_syscall),     // 17
1420 //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
1421    LINX_(__NR_lseek,             sys_lseek),          // 19
1422 
1423    GENX_(__NR_getpid,            sys_getpid),         // 20
1424    LINX_(__NR_mount,             sys_mount),          // 21
1425    LINX_(__NR_umount,            sys_oldumount),      // 22
1426    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
1427    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
1428 
1429    LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
1430    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
1431    GENX_(__NR_alarm,             sys_alarm),          // 27
1432 //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
1433    GENX_(__NR_pause,             sys_pause),          // 29
1434 
1435    LINX_(__NR_utime,             sys_utime),          // 30
1436    GENX_(__NR_stty,              sys_ni_syscall),     // 31
1437    GENX_(__NR_gtty,              sys_ni_syscall),     // 32
1438    GENX_(__NR_access,            sys_access),         // 33
1439    GENX_(__NR_nice,              sys_nice),           // 34
1440 
1441    GENX_(__NR_ftime,             sys_ni_syscall),     // 35
1442    GENX_(__NR_sync,              sys_sync),           // 36
1443    GENX_(__NR_kill,              sys_kill),           // 37
1444    GENX_(__NR_rename,            sys_rename),         // 38
1445    GENX_(__NR_mkdir,             sys_mkdir),          // 39
1446 
1447    GENX_(__NR_rmdir,             sys_rmdir),          // 40
1448    GENXY(__NR_dup,               sys_dup),            // 41
1449    LINXY(__NR_pipe,              sys_pipe),           // 42
1450    GENXY(__NR_times,             sys_times),          // 43
1451    GENX_(__NR_prof,              sys_ni_syscall),     // 44
1452 //zz
1453    GENX_(__NR_brk,               sys_brk),            // 45
1454    LINX_(__NR_setgid,            sys_setgid16),       // 46
1455    LINX_(__NR_getgid,            sys_getgid16),       // 47
1456 //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
1457    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
1458 
1459    LINX_(__NR_getegid,           sys_getegid16),      // 50
1460    GENX_(__NR_acct,              sys_acct),           // 51
1461    LINX_(__NR_umount2,           sys_umount),         // 52
1462    GENX_(__NR_lock,              sys_ni_syscall),     // 53
1463    LINXY(__NR_ioctl,             sys_ioctl),          // 54
1464 
1465    LINXY(__NR_fcntl,             sys_fcntl),          // 55
1466    GENX_(__NR_mpx,               sys_ni_syscall),     // 56
1467    GENX_(__NR_setpgid,           sys_setpgid),        // 57
1468    GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
1469 //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
1470 //zz
1471    GENX_(__NR_umask,             sys_umask),          // 60
1472    GENX_(__NR_chroot,            sys_chroot),         // 61
1473 //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
1474    GENXY(__NR_dup2,              sys_dup2),           // 63
1475    GENX_(__NR_getppid,           sys_getppid),        // 64
1476 
1477    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
1478    GENX_(__NR_setsid,            sys_setsid),         // 66
1479    LINXY(__NR_sigaction,         sys_sigaction),      // 67
1480 //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
1481 //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
1482 //zz
1483    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
1484    LINX_(__NR_setregid,          sys_setregid16),     // 71
1485    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
1486    LINXY(__NR_sigpending,        sys_sigpending),     // 73
1487    GENX_(__NR_sethostname,       sys_sethostname),    // 74
1488 //zz
1489    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
1490    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
1491    GENXY(__NR_getrusage,         sys_getrusage),      // 77
1492    GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
1493    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
1494 
1495    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
1496    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
1497    PLAX_(__NR_select,            old_select),         // 82
1498    GENX_(__NR_symlink,           sys_symlink),        // 83
1499 //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
1500 //zz
1501    GENX_(__NR_readlink,          sys_readlink),       // 85
1502 //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
1503 //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
1504 //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
1505 //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
1506 //zz
1507    PLAX_(__NR_mmap,              old_mmap),           // 90
1508    GENXY(__NR_munmap,            sys_munmap),         // 91
1509    GENX_(__NR_truncate,          sys_truncate),       // 92
1510    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
1511    GENX_(__NR_fchmod,            sys_fchmod),         // 94
1512 
1513    LINX_(__NR_fchown,            sys_fchown16),       // 95
1514    GENX_(__NR_getpriority,       sys_getpriority),    // 96
1515    GENX_(__NR_setpriority,       sys_setpriority),    // 97
1516    GENX_(__NR_profil,            sys_ni_syscall),     // 98
1517    GENXY(__NR_statfs,            sys_statfs),         // 99
1518 
1519    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
1520    LINX_(__NR_ioperm,            sys_ioperm),         // 101
1521    LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
1522    LINXY(__NR_syslog,            sys_syslog),         // 103
1523    GENXY(__NR_setitimer,         sys_setitimer),      // 104
1524 
1525    GENXY(__NR_getitimer,         sys_getitimer),      // 105
1526    GENXY(__NR_stat,              sys_newstat),        // 106
1527    GENXY(__NR_lstat,             sys_newlstat),       // 107
1528    GENXY(__NR_fstat,             sys_newfstat),       // 108
1529 //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
1530 //zz
1531    GENX_(__NR_iopl,              sys_iopl),           // 110
1532    LINX_(__NR_vhangup,           sys_vhangup),        // 111
1533    GENX_(__NR_idle,              sys_ni_syscall),     // 112
1534    PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
1535    GENXY(__NR_wait4,             sys_wait4),          // 114
1536 //zz
1537 //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
1538    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
1539    LINXY(__NR_ipc,               sys_ipc),            // 117
1540    GENX_(__NR_fsync,             sys_fsync),          // 118
1541    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
1542 
1543    PLAX_(__NR_clone,             sys_clone),          // 120
1544 //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
1545    GENXY(__NR_uname,             sys_newuname),       // 122
1546    PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
1547    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
1548 
1549    GENXY(__NR_mprotect,          sys_mprotect),       // 125
1550    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
1551 //zz    // Nb: create_module() was removed 2.4-->2.6
1552    GENX_(__NR_create_module,     sys_ni_syscall),     // 127
1553    LINX_(__NR_init_module,       sys_init_module),    // 128
1554    LINX_(__NR_delete_module,     sys_delete_module),  // 129
1555 //zz
1556 //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
1557    GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
1558    LINX_(__NR_quotactl,          sys_quotactl),       // 131
1559    GENX_(__NR_getpgid,           sys_getpgid),        // 132
1560    GENX_(__NR_fchdir,            sys_fchdir),         // 133
1561 //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
1562 //zz
1563 //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
1564    LINX_(__NR_personality,       sys_personality),    // 136
1565    GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
1566    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
1567    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
1568 
1569    LINXY(__NR__llseek,           sys_llseek),         // 140
1570    GENXY(__NR_getdents,          sys_getdents),       // 141
1571    GENX_(__NR__newselect,        sys_select),         // 142
1572    GENX_(__NR_flock,             sys_flock),          // 143
1573    GENX_(__NR_msync,             sys_msync),          // 144
1574 
1575    GENXY(__NR_readv,             sys_readv),          // 145
1576    GENX_(__NR_writev,            sys_writev),         // 146
1577    GENX_(__NR_getsid,            sys_getsid),         // 147
1578    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
1579    LINXY(__NR__sysctl,           sys_sysctl),         // 149
1580 
1581    GENX_(__NR_mlock,             sys_mlock),          // 150
1582    GENX_(__NR_munlock,           sys_munlock),        // 151
1583    GENX_(__NR_mlockall,          sys_mlockall),       // 152
1584    LINX_(__NR_munlockall,        sys_munlockall),     // 153
1585    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
1586 
1587    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
1588    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
1589    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
1590    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
1591    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1592 
1593    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1594    LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
1595    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
1596    GENX_(__NR_mremap,            sys_mremap),         // 163
1597    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
1598 
1599    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
1600    PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
1601    GENX_(__NR_query_module,      sys_ni_syscall),     // 167
1602    GENXY(__NR_poll,              sys_poll),           // 168
1603 //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
1604 //zz
1605    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
1606    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
1607    LINXY(__NR_prctl,             sys_prctl),          // 172
1608    PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
1609    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
1610 
1611    LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
1612    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
1613    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
1614    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
1615    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
1616 
1617    GENXY(__NR_pread64,           sys_pread64),        // 180
1618    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
1619    LINX_(__NR_chown,             sys_chown16),        // 182
1620    GENXY(__NR_getcwd,            sys_getcwd),         // 183
1621    LINXY(__NR_capget,            sys_capget),         // 184
1622 
1623    LINX_(__NR_capset,            sys_capset),         // 185
1624    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
1625    LINXY(__NR_sendfile,          sys_sendfile),       // 187
1626    GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
1627    GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
1628 
1629    // Nb: we treat vfork as fork
1630    GENX_(__NR_vfork,             sys_fork),           // 190
1631    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
1632    PLAX_(__NR_mmap2,             sys_mmap2),          // 192
1633    GENX_(__NR_truncate64,        sys_truncate64),     // 193
1634    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
1635 
1636    PLAXY(__NR_stat64,            sys_stat64),         // 195
1637    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
1638    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
1639    GENX_(__NR_lchown32,          sys_lchown),         // 198
1640    GENX_(__NR_getuid32,          sys_getuid),         // 199
1641 
1642    GENX_(__NR_getgid32,          sys_getgid),         // 200
1643    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
1644    GENX_(__NR_getegid32,         sys_getegid),        // 202
1645    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
1646    GENX_(__NR_setregid32,        sys_setregid),       // 204
1647 
1648    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
1649    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
1650    GENX_(__NR_fchown32,          sys_fchown),         // 207
1651    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
1652    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
1653 
1654    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
1655    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
1656    GENX_(__NR_chown32,           sys_chown),          // 212
1657    GENX_(__NR_setuid32,          sys_setuid),         // 213
1658    GENX_(__NR_setgid32,          sys_setgid),         // 214
1659 
1660    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
1661    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
1662    LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
1663    GENXY(__NR_mincore,           sys_mincore),        // 218
1664    GENX_(__NR_madvise,           sys_madvise),        // 219
1665 
1666    GENXY(__NR_getdents64,        sys_getdents64),     // 220
1667    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
1668    GENX_(222,                    sys_ni_syscall),     // 222
1669    PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
1670    LINX_(__NR_gettid,            sys_gettid),         // 224
1671 
1672    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
1673    LINX_(__NR_setxattr,          sys_setxattr),       // 226
1674    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
1675    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
1676    LINXY(__NR_getxattr,          sys_getxattr),       // 229
1677 
1678    LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
1679    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
1680    LINXY(__NR_listxattr,         sys_listxattr),      // 232
1681    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
1682    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
1683 
1684    LINX_(__NR_removexattr,       sys_removexattr),    // 235
1685    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
1686    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
1687    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
1688    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
1689 
1690    LINXY(__NR_futex,             sys_futex),             // 240
1691    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1692    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1693    PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
1694    PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
1695 
1696    LINXY(__NR_io_setup,          sys_io_setup),       // 245
1697    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
1698    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
1699    LINX_(__NR_io_submit,         sys_io_submit),      // 248
1700    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
1701 
1702    LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
1703    GENX_(251,                    sys_ni_syscall),     // 251
1704    LINX_(__NR_exit_group,        sys_exit_group),     // 252
1705    LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
1706    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
1707 
1708    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
1709    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
1710 //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
1711    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
1712    LINXY(__NR_timer_create,      sys_timer_create),      // 259
1713 
1714    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
1715    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
1716    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
1717    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
1718    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
1719 
1720    LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
1721    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
1722    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
1723    GENXY(__NR_statfs64,          sys_statfs64),       // 268
1724    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
1725 
1726    LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
1727    GENX_(__NR_utimes,            sys_utimes),         // 271
1728    LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
1729    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
1730    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
1731 
1732    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
1733    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
1734    LINXY(__NR_mq_open,           sys_mq_open),        // 277
1735    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
1736    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
1737 
1738    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
1739    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
1740    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
1741    GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
1742    LINXY(__NR_waitid,            sys_waitid),         // 284
1743 
1744    GENX_(285,                    sys_ni_syscall),     // 285
1745    LINX_(__NR_add_key,           sys_add_key),        // 286
1746    LINX_(__NR_request_key,       sys_request_key),    // 287
1747    LINXY(__NR_keyctl,            sys_keyctl),         // 288
1748    LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
1749 
1750    LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
1751    LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
1752    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1753    LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
1754 //   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
1755 
1756    LINXY(__NR_openat,		 sys_openat),           // 295
1757    LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
1758    LINX_(__NR_mknodat,		 sys_mknodat),          // 297
1759    LINX_(__NR_fchownat,		 sys_fchownat),         // 298
1760    LINX_(__NR_futimesat,	 sys_futimesat),        // 299
1761 
1762    PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
1763    LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
1764    LINX_(__NR_renameat,		 sys_renameat),         // 302
1765    LINX_(__NR_linkat,		 sys_linkat),           // 303
1766    LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
1767 
1768    LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
1769    LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
1770    LINX_(__NR_faccessat,	 sys_faccessat),        // 307
1771    LINX_(__NR_pselect6,		 sys_pselect6),         // 308
1772    LINXY(__NR_ppoll,		 sys_ppoll),            // 309
1773 
1774    LINX_(__NR_unshare,		 sys_unshare),          // 310
1775    LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
1776    LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
1777    LINX_(__NR_splice,            sys_splice),           // 313
1778    LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
1779 
1780    LINX_(__NR_tee,               sys_tee),              // 315
1781    LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
1782    LINXY(__NR_move_pages,        sys_move_pages),       // 317
1783    LINXY(__NR_getcpu,            sys_getcpu),           // 318
1784    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
1785 
1786    LINX_(__NR_utimensat,         sys_utimensat),        // 320
1787    LINXY(__NR_signalfd,          sys_signalfd),         // 321
1788    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
1789    LINXY(__NR_eventfd,           sys_eventfd),          // 323
1790    LINX_(__NR_fallocate,         sys_fallocate),        // 324
1791 
1792    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
1793    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
1794    LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
1795    LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
1796    LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
1797 
1798    LINXY(__NR_dup3,              sys_dup3),             // 330
1799    LINXY(__NR_pipe2,             sys_pipe2),            // 331
1800    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
1801    LINXY(__NR_preadv,            sys_preadv),           // 333
1802    LINX_(__NR_pwritev,           sys_pwritev),          // 334
1803 
1804    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1805    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
1806    LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
1807    LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
1808    LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
1809 
1810    LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
1811    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1812    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1813    LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
1814    LINX_(__NR_syncfs,            sys_syncfs),           // 344
1815 
1816    LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
1817 //   LINX_(__NR_setns,             sys_ni_syscall),       // 346
1818    LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
1819    LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
1820    LINX_(__NR_kcmp,              sys_kcmp),             // 349
1821 
1822 //   LIN__(__NR_finit_module,      sys_ni_syscall),       // 350
1823 //   LIN__(__NR_sched_setattr,     sys_ni_syscall),       // 351
1824 //   LIN__(__NR_sched_getattr,     sys_ni_syscall),       // 352
1825 //   LIN__(__NR_renameat2,         sys_ni_syscall),       // 353
1826 //   LIN__(__NR_seccomp,           sys_ni_syscall),       // 354
1827 
1828    LINXY(__NR_getrandom,         sys_getrandom),        // 355
1829    LINXY(__NR_memfd_create,      sys_memfd_create)      // 356
1830 //   LIN__(__NR_bpf,               sys_ni_syscall)        // 357
1831 };
1832 
ML_(get_linux_syscall_entry)1833 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1834 {
1835    const UInt syscall_table_size
1836       = sizeof(syscall_table) / sizeof(syscall_table[0]);
1837 
1838    /* Is it in the contiguous initial section of the table? */
1839    if (sysno < syscall_table_size) {
1840       SyscallTableEntry* sys = &syscall_table[sysno];
1841       if (sys->before == NULL)
1842          return NULL; /* no entry */
1843       else
1844          return sys;
1845    }
1846 
1847    /* Can't find a wrapper */
1848    return NULL;
1849 }
1850 
1851 #endif // defined(VGP_x86_linux)
1852 
1853 /*--------------------------------------------------------------------*/
1854 /*--- end                                                          ---*/
1855 /*--------------------------------------------------------------------*/
1856