1
2 /*--------------------------------------------------------------------*/
3 /*--- Platform-specific syscalls stuff. syswrap-x86-linux.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2013 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGP_x86_linux)
32
33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
34 in interrupted_syscall() need to be reviewed. They don't seem
35 to assign the shadow state.
36 */
37
38 #include "pub_core_basics.h"
39 #include "pub_core_vki.h"
40 #include "pub_core_vkiscnums.h"
41 #include "pub_core_libcsetjmp.h" // to keep _threadstate.h happy
42 #include "pub_core_threadstate.h"
43 #include "pub_core_aspacemgr.h"
44 #include "pub_core_debuglog.h"
45 #include "pub_core_libcbase.h"
46 #include "pub_core_libcassert.h"
47 #include "pub_core_libcprint.h"
48 #include "pub_core_libcproc.h"
49 #include "pub_core_libcsignal.h"
50 #include "pub_core_mallocfree.h"
51 #include "pub_core_options.h"
52 #include "pub_core_scheduler.h"
53 #include "pub_core_sigframe.h" // For VG_(sigframe_destroy)()
54 #include "pub_core_signals.h"
55 #include "pub_core_syscall.h"
56 #include "pub_core_syswrap.h"
57 #include "pub_core_tooliface.h"
58 #include "pub_core_stacks.h" // VG_(register_stack)
59
60 #include "priv_types_n_macros.h"
61 #include "priv_syswrap-generic.h" /* for decls of generic wrappers */
62 #include "priv_syswrap-linux.h" /* for decls of linux-ish wrappers */
63 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
64 #include "priv_syswrap-main.h"
65
66
67 /* ---------------------------------------------------------------------
68 clone() handling
69 ------------------------------------------------------------------ */
70
71 /* Call f(arg1), but first switch stacks, using 'stack' as the new
72 stack, and use 'retaddr' as f's return-to address. Also, clear all
73 the integer registers before entering f.*/
74 __attribute__((noreturn))
75 void ML_(call_on_new_stack_0_1) ( Addr stack,
76 Addr retaddr,
77 void (*f)(Word),
78 Word arg1 );
79 // 4(%esp) == stack
80 // 8(%esp) == retaddr
81 // 12(%esp) == f
82 // 16(%esp) == arg1
83 asm(
84 ".text\n"
85 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
86 "vgModuleLocal_call_on_new_stack_0_1:\n"
87 " movl %esp, %esi\n" // remember old stack pointer
88 " movl 4(%esi), %esp\n" // set stack
89 " pushl 16(%esi)\n" // arg1 to stack
90 " pushl 8(%esi)\n" // retaddr to stack
91 " pushl 12(%esi)\n" // f to stack
92 " movl $0, %eax\n" // zero all GP regs
93 " movl $0, %ebx\n"
94 " movl $0, %ecx\n"
95 " movl $0, %edx\n"
96 " movl $0, %esi\n"
97 " movl $0, %edi\n"
98 " movl $0, %ebp\n"
99 " ret\n" // jump to f
100 " ud2\n" // should never get here
101 ".previous\n"
102 );
103
104
105 /*
106 Perform a clone system call. clone is strange because it has
107 fork()-like return-twice semantics, so it needs special
108 handling here.
109
110 Upon entry, we have:
111
112 int (fn)(void*) in 0+FSZ(%esp)
113 void* child_stack in 4+FSZ(%esp)
114 int flags in 8+FSZ(%esp)
115 void* arg in 12+FSZ(%esp)
116 pid_t* child_tid in 16+FSZ(%esp)
117 pid_t* parent_tid in 20+FSZ(%esp)
118 void* tls_ptr in 24+FSZ(%esp)
119
120 System call requires:
121
122 int $__NR_clone in %eax
123 int flags in %ebx
124 void* child_stack in %ecx
125 pid_t* parent_tid in %edx
126 pid_t* child_tid in %edi
127 void* tls_ptr in %esi
128
129 Returns an Int encoded in the linux-x86 way, not a SysRes.
130 */
131 #define FSZ "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
132 #define __NR_CLONE VG_STRINGIFY(__NR_clone)
133 #define __NR_EXIT VG_STRINGIFY(__NR_exit)
134
135 extern
136 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
137 void* stack,
138 Int flags,
139 void* arg,
140 Int* child_tid,
141 Int* parent_tid,
142 vki_modify_ldt_t * );
143 asm(
144 ".text\n"
145 ".globl do_syscall_clone_x86_linux\n"
146 "do_syscall_clone_x86_linux:\n"
147 " push %ebx\n"
148 " push %edi\n"
149 " push %esi\n"
150
151 /* set up child stack with function and arg */
152 " movl 4+"FSZ"(%esp), %ecx\n" /* syscall arg2: child stack */
153 " movl 12+"FSZ"(%esp), %ebx\n" /* fn arg */
154 " movl 0+"FSZ"(%esp), %eax\n" /* fn */
155 " lea -8(%ecx), %ecx\n" /* make space on stack */
156 " movl %ebx, 4(%ecx)\n" /* fn arg */
157 " movl %eax, 0(%ecx)\n" /* fn */
158
159 /* get other args to clone */
160 " movl 8+"FSZ"(%esp), %ebx\n" /* syscall arg1: flags */
161 " movl 20+"FSZ"(%esp), %edx\n" /* syscall arg3: parent tid * */
162 " movl 16+"FSZ"(%esp), %edi\n" /* syscall arg5: child tid * */
163 " movl 24+"FSZ"(%esp), %esi\n" /* syscall arg4: tls_ptr * */
164 " movl $"__NR_CLONE", %eax\n"
165 " int $0x80\n" /* clone() */
166 " testl %eax, %eax\n" /* child if retval == 0 */
167 " jnz 1f\n"
168
169 /* CHILD - call thread function */
170 " popl %eax\n"
171 " call *%eax\n" /* call fn */
172
173 /* exit with result */
174 " movl %eax, %ebx\n" /* arg1: return value from fn */
175 " movl $"__NR_EXIT", %eax\n"
176 " int $0x80\n"
177
178 /* Hm, exit returned */
179 " ud2\n"
180
181 "1:\n" /* PARENT or ERROR */
182 " pop %esi\n"
183 " pop %edi\n"
184 " pop %ebx\n"
185 " ret\n"
186 ".previous\n"
187 );
188
189 #undef FSZ
190 #undef __NR_CLONE
191 #undef __NR_EXIT
192
193
194 // forward declarations
195 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
196 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
197
198 /*
199 When a client clones, we need to keep track of the new thread. This means:
200 1. allocate a ThreadId+ThreadState+stack for the the thread
201
202 2. initialize the thread's new VCPU state
203
204 3. create the thread using the same args as the client requested,
205 but using the scheduler entrypoint for EIP, and a separate stack
206 for ESP.
207 */
do_clone(ThreadId ptid,UInt flags,Addr esp,Int * parent_tidptr,Int * child_tidptr,vki_modify_ldt_t * tlsinfo)208 static SysRes do_clone ( ThreadId ptid,
209 UInt flags, Addr esp,
210 Int* parent_tidptr,
211 Int* child_tidptr,
212 vki_modify_ldt_t *tlsinfo)
213 {
214 static const Bool debug = False;
215
216 ThreadId ctid = VG_(alloc_ThreadState)();
217 ThreadState* ptst = VG_(get_ThreadState)(ptid);
218 ThreadState* ctst = VG_(get_ThreadState)(ctid);
219 UWord* stack;
220 NSegment const* seg;
221 SysRes res;
222 Int eax;
223 vki_sigset_t blockall, savedmask;
224
225 VG_(sigfillset)(&blockall);
226
227 vg_assert(VG_(is_running_thread)(ptid));
228 vg_assert(VG_(is_valid_tid)(ctid));
229
230 stack = (UWord*)ML_(allocstack)(ctid);
231 if (stack == NULL) {
232 res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
233 goto out;
234 }
235
236 /* Copy register state
237
238 Both parent and child return to the same place, and the code
239 following the clone syscall works out which is which, so we
240 don't need to worry about it.
241
242 The parent gets the child's new tid returned from clone, but the
243 child gets 0.
244
245 If the clone call specifies a NULL esp for the new thread, then
246 it actually gets a copy of the parent's esp.
247 */
248 /* Note: the clone call done by the Quadrics Elan3 driver specifies
249 clone flags of 0xF00, and it seems to rely on the assumption
250 that the child inherits a copy of the parent's GDT.
251 setup_child takes care of setting that up. */
252 setup_child( &ctst->arch, &ptst->arch, True );
253
254 /* Make sys_clone appear to have returned Success(0) in the
255 child. */
256 ctst->arch.vex.guest_EAX = 0;
257
258 if (esp != 0)
259 ctst->arch.vex.guest_ESP = esp;
260
261 ctst->os_state.parent = ptid;
262
263 /* inherit signal mask */
264 ctst->sig_mask = ptst->sig_mask;
265 ctst->tmp_sig_mask = ptst->sig_mask;
266
267 /* Start the child with its threadgroup being the same as the
268 parent's. This is so that any exit_group calls that happen
269 after the child is created but before it sets its
270 os_state.threadgroup field for real (in thread_wrapper in
271 syswrap-linux.c), really kill the new thread. a.k.a this avoids
272 a race condition in which the thread is unkillable (via
273 exit_group) because its threadgroup is not set. The race window
274 is probably only a few hundred or a few thousand cycles long.
275 See #226116. */
276 ctst->os_state.threadgroup = ptst->os_state.threadgroup;
277
278 /* We don't really know where the client stack is, because its
279 allocated by the client. The best we can do is look at the
280 memory mappings and try to derive some useful information. We
281 assume that esp starts near its highest possible value, and can
282 only go down to the start of the mmaped segment. */
283 seg = VG_(am_find_nsegment)((Addr)esp);
284 if (seg && seg->kind != SkResvn) {
285 ctst->client_stack_highest_word = (Addr)VG_PGROUNDUP(esp);
286 ctst->client_stack_szB = ctst->client_stack_highest_word - seg->start;
287
288 VG_(register_stack)(seg->start, ctst->client_stack_highest_word);
289
290 if (debug)
291 VG_(printf)("tid %d: guessed client stack range %#lx-%#lx\n",
292 ctid, seg->start, VG_PGROUNDUP(esp));
293 } else {
294 VG_(message)(Vg_UserMsg,
295 "!? New thread %d starts with ESP(%#lx) unmapped\n",
296 ctid, esp);
297 ctst->client_stack_szB = 0;
298 }
299
300 /* Assume the clone will succeed, and tell any tool that wants to
301 know that this thread has come into existence. We cannot defer
302 it beyond this point because sys_set_thread_area, just below,
303 causes tCheck to assert by making references to the new ThreadId
304 if we don't state the new thread exists prior to that point.
305 If the clone fails, we'll send out a ll_exit notification for it
306 at the out: label below, to clean up. */
307 vg_assert(VG_(owns_BigLock_LL)(ptid));
308 VG_TRACK ( pre_thread_ll_create, ptid, ctid );
309
310 if (flags & VKI_CLONE_SETTLS) {
311 if (debug)
312 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%d "
313 "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
314 tlsinfo, tlsinfo->entry_number,
315 tlsinfo->base_addr, tlsinfo->limit,
316 ptst->arch.vex.guest_ESP,
317 ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
318 res = sys_set_thread_area(ctid, tlsinfo);
319 if (sr_isError(res))
320 goto out;
321 }
322
323 flags &= ~VKI_CLONE_SETTLS;
324
325 /* start the thread with everything blocked */
326 VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
327
328 /* Create the new thread */
329 eax = do_syscall_clone_x86_linux(
330 ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
331 child_tidptr, parent_tidptr, NULL
332 );
333 res = VG_(mk_SysRes_x86_linux)( eax );
334
335 VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
336
337 out:
338 if (sr_isError(res)) {
339 /* clone failed */
340 VG_(cleanup_thread)(&ctst->arch);
341 ctst->status = VgTs_Empty;
342 /* oops. Better tell the tool the thread exited in a hurry :-) */
343 VG_TRACK( pre_thread_ll_exit, ctid );
344 }
345
346 return res;
347 }
348
349
350 /* ---------------------------------------------------------------------
351 LDT/GDT simulation
352 ------------------------------------------------------------------ */
353
354 /* Details of the LDT simulation
355 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
356
357 When a program runs natively, the linux kernel allows each *thread*
358 in it to have its own LDT. Almost all programs never do this --
359 it's wildly unportable, after all -- and so the kernel never
360 allocates the structure, which is just as well as an LDT occupies
361 64k of memory (8192 entries of size 8 bytes).
362
363 A thread may choose to modify its LDT entries, by doing the
364 __NR_modify_ldt syscall. In such a situation the kernel will then
365 allocate an LDT structure for it. Each LDT entry is basically a
366 (base, limit) pair. A virtual address in a specific segment is
367 translated to a linear address by adding the segment's base value.
368 In addition, the virtual address must not exceed the limit value.
369
370 To use an LDT entry, a thread loads one of the segment registers
371 (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
372 .. 8191) it wants to use. In fact, the required value is (index <<
373 3) + 7, but that's not important right now. Any normal instruction
374 which includes an addressing mode can then be made relative to that
375 LDT entry by prefixing the insn with a so-called segment-override
376 prefix, a byte which indicates which of the 6 segment registers
377 holds the LDT index.
378
379 Now, a key constraint is that valgrind's address checks operate in
380 terms of linear addresses. So we have to explicitly translate
381 virtual addrs into linear addrs, and that means doing a complete
382 LDT simulation.
383
384 Calls to modify_ldt are intercepted. For each thread, we maintain
385 an LDT (with the same normally-never-allocated optimisation that
386 the kernel does). This is updated as expected via calls to
387 modify_ldt.
388
389 When a thread does an amode calculation involving a segment
390 override prefix, the relevant LDT entry for the thread is
391 consulted. It all works.
392
393 There is a conceptual problem, which appears when switching back to
394 native execution, either temporarily to pass syscalls to the
395 kernel, or permanently, when debugging V. Problem at such points
396 is that it's pretty pointless to copy the simulated machine's
397 segment registers to the real machine, because we'd also need to
398 copy the simulated LDT into the real one, and that's prohibitively
399 expensive.
400
401 Fortunately it looks like no syscalls rely on the segment regs or
402 LDT being correct, so we can get away with it. Apart from that the
403 simulation is pretty straightforward. All 6 segment registers are
404 tracked, although only %ds, %es, %fs and %gs are allowed as
405 prefixes. Perhaps it could be restricted even more than that -- I
406 am not sure what is and isn't allowed in user-mode.
407 */
408
409 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
410 the Linux kernel's logic (cut-n-paste of code in
411 linux/kernel/ldt.c). */
412
413 static
translate_to_hw_format(vki_modify_ldt_t * inn,VexGuestX86SegDescr * out,Int oldmode)414 void translate_to_hw_format ( /* IN */ vki_modify_ldt_t* inn,
415 /* OUT */ VexGuestX86SegDescr* out,
416 Int oldmode )
417 {
418 UInt entry_1, entry_2;
419 vg_assert(8 == sizeof(VexGuestX86SegDescr));
420
421 if (0)
422 VG_(printf)("translate_to_hw_format: base %#lx, limit %d\n",
423 inn->base_addr, inn->limit );
424
425 /* Allow LDTs to be cleared by the user. */
426 if (inn->base_addr == 0 && inn->limit == 0) {
427 if (oldmode ||
428 (inn->contents == 0 &&
429 inn->read_exec_only == 1 &&
430 inn->seg_32bit == 0 &&
431 inn->limit_in_pages == 0 &&
432 inn->seg_not_present == 1 &&
433 inn->useable == 0 )) {
434 entry_1 = 0;
435 entry_2 = 0;
436 goto install;
437 }
438 }
439
440 entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
441 (inn->limit & 0x0ffff);
442 entry_2 = (inn->base_addr & 0xff000000) |
443 ((inn->base_addr & 0x00ff0000) >> 16) |
444 (inn->limit & 0xf0000) |
445 ((inn->read_exec_only ^ 1) << 9) |
446 (inn->contents << 10) |
447 ((inn->seg_not_present ^ 1) << 15) |
448 (inn->seg_32bit << 22) |
449 (inn->limit_in_pages << 23) |
450 0x7000;
451 if (!oldmode)
452 entry_2 |= (inn->useable << 20);
453
454 /* Install the new entry ... */
455 install:
456 out->LdtEnt.Words.word1 = entry_1;
457 out->LdtEnt.Words.word2 = entry_2;
458 }
459
460 /* Create a zeroed-out GDT. */
alloc_zeroed_x86_GDT(void)461 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
462 {
463 Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
464 return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxG.1", nbytes, 1);
465 }
466
467 /* Create a zeroed-out LDT. */
alloc_zeroed_x86_LDT(void)468 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
469 {
470 Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
471 return VG_(arena_calloc)(VG_AR_CORE, "di.syswrap-x86.azxL.1", nbytes, 1);
472 }
473
474 /* Free up an LDT or GDT allocated by the above fns. */
free_LDT_or_GDT(VexGuestX86SegDescr * dt)475 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
476 {
477 vg_assert(dt);
478 VG_(arena_free)(VG_AR_CORE, (void*)dt);
479 }
480
481 /* Copy contents between two existing LDTs. */
copy_LDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)482 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
483 VexGuestX86SegDescr* dst )
484 {
485 Int i;
486 vg_assert(src);
487 vg_assert(dst);
488 for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
489 dst[i] = src[i];
490 }
491
492 /* Copy contents between two existing GDTs. */
copy_GDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)493 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
494 VexGuestX86SegDescr* dst )
495 {
496 Int i;
497 vg_assert(src);
498 vg_assert(dst);
499 for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
500 dst[i] = src[i];
501 }
502
503 /* Free this thread's DTs, if it has any. */
deallocate_LGDTs_for_thread(VexGuestX86State * vex)504 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
505 {
506 vg_assert(sizeof(HWord) == sizeof(void*));
507
508 if (0)
509 VG_(printf)("deallocate_LGDTs_for_thread: "
510 "ldt = 0x%lx, gdt = 0x%lx\n",
511 vex->guest_LDT, vex->guest_GDT );
512
513 if (vex->guest_LDT != (HWord)NULL) {
514 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
515 vex->guest_LDT = (HWord)NULL;
516 }
517
518 if (vex->guest_GDT != (HWord)NULL) {
519 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
520 vex->guest_GDT = (HWord)NULL;
521 }
522 }
523
524
525 /*
526 * linux/kernel/ldt.c
527 *
528 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
529 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
530 */
531
532 /*
533 * read_ldt() is not really atomic - this is not a problem since
534 * synchronization of reads and writes done to the LDT has to be
535 * assured by user-space anyway. Writes are atomic, to protect
536 * the security checks done on new descriptors.
537 */
538 static
read_ldt(ThreadId tid,UChar * ptr,UInt bytecount)539 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
540 {
541 SysRes res;
542 UInt i, size;
543 UChar* ldt;
544
545 if (0)
546 VG_(printf)("read_ldt: tid = %d, ptr = %p, bytecount = %d\n",
547 tid, ptr, bytecount );
548
549 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
550 vg_assert(8 == sizeof(VexGuestX86SegDescr));
551
552 ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
553 res = VG_(mk_SysRes_Success)( 0 );
554 if (ldt == NULL)
555 /* LDT not allocated, meaning all entries are null */
556 goto out;
557
558 size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
559 if (size > bytecount)
560 size = bytecount;
561
562 res = VG_(mk_SysRes_Success)( size );
563 for (i = 0; i < size; i++)
564 ptr[i] = ldt[i];
565
566 out:
567 return res;
568 }
569
570
571 static
write_ldt(ThreadId tid,void * ptr,UInt bytecount,Int oldmode)572 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
573 {
574 SysRes res;
575 VexGuestX86SegDescr* ldt;
576 vki_modify_ldt_t* ldt_info;
577
578 if (0)
579 VG_(printf)("write_ldt: tid = %d, ptr = %p, "
580 "bytecount = %d, oldmode = %d\n",
581 tid, ptr, bytecount, oldmode );
582
583 vg_assert(8 == sizeof(VexGuestX86SegDescr));
584 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
585
586 ldt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
587 ldt_info = (vki_modify_ldt_t*)ptr;
588
589 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
590 if (bytecount != sizeof(vki_modify_ldt_t))
591 goto out;
592
593 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
594 if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
595 goto out;
596 if (ldt_info->contents == 3) {
597 if (oldmode)
598 goto out;
599 if (ldt_info->seg_not_present == 0)
600 goto out;
601 }
602
603 /* If this thread doesn't have an LDT, we'd better allocate it
604 now. */
605 if (ldt == NULL) {
606 ldt = alloc_zeroed_x86_LDT();
607 VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
608 }
609
610 /* Install the new entry ... */
611 translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
612 res = VG_(mk_SysRes_Success)( 0 );
613
614 out:
615 return res;
616 }
617
618
sys_modify_ldt(ThreadId tid,Int func,void * ptr,UInt bytecount)619 static SysRes sys_modify_ldt ( ThreadId tid,
620 Int func, void* ptr, UInt bytecount )
621 {
622 SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
623
624 switch (func) {
625 case 0:
626 ret = read_ldt(tid, ptr, bytecount);
627 break;
628 case 1:
629 ret = write_ldt(tid, ptr, bytecount, 1);
630 break;
631 case 2:
632 VG_(unimplemented)("sys_modify_ldt: func == 2");
633 /* god knows what this is about */
634 /* ret = read_default_ldt(ptr, bytecount); */
635 /*UNREACHED*/
636 break;
637 case 0x11:
638 ret = write_ldt(tid, ptr, bytecount, 0);
639 break;
640 }
641 return ret;
642 }
643
644
sys_set_thread_area(ThreadId tid,vki_modify_ldt_t * info)645 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
646 {
647 Int idx;
648 VexGuestX86SegDescr* gdt;
649
650 vg_assert(8 == sizeof(VexGuestX86SegDescr));
651 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
652
653 if (info == NULL)
654 return VG_(mk_SysRes_Error)( VKI_EFAULT );
655
656 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
657
658 /* If the thread doesn't have a GDT, allocate it now. */
659 if (!gdt) {
660 gdt = alloc_zeroed_x86_GDT();
661 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
662 }
663
664 idx = info->entry_number;
665
666 if (idx == -1) {
667 /* Find and use the first free entry. Don't allocate entry
668 zero, because the hardware will never do that, and apparently
669 doing so confuses some code (perhaps stuff running on
670 Wine). */
671 for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
672 if (gdt[idx].LdtEnt.Words.word1 == 0
673 && gdt[idx].LdtEnt.Words.word2 == 0)
674 break;
675 }
676
677 if (idx == VEX_GUEST_X86_GDT_NENT)
678 return VG_(mk_SysRes_Error)( VKI_ESRCH );
679 } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
680 /* Similarly, reject attempts to use GDT[0]. */
681 return VG_(mk_SysRes_Error)( VKI_EINVAL );
682 }
683
684 translate_to_hw_format(info, &gdt[idx], 0);
685
686 VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
687 "set_thread_area(info->entry)",
688 (Addr) & info->entry_number, sizeof(unsigned int) );
689 info->entry_number = idx;
690 VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
691 (Addr) & info->entry_number, sizeof(unsigned int) );
692
693 return VG_(mk_SysRes_Success)( 0 );
694 }
695
696
sys_get_thread_area(ThreadId tid,vki_modify_ldt_t * info)697 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
698 {
699 Int idx;
700 VexGuestX86SegDescr* gdt;
701
702 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
703 vg_assert(8 == sizeof(VexGuestX86SegDescr));
704
705 if (info == NULL)
706 return VG_(mk_SysRes_Error)( VKI_EFAULT );
707
708 idx = info->entry_number;
709
710 if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
711 return VG_(mk_SysRes_Error)( VKI_EINVAL );
712
713 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
714
715 /* If the thread doesn't have a GDT, allocate it now. */
716 if (!gdt) {
717 gdt = alloc_zeroed_x86_GDT();
718 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
719 }
720
721 info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
722 ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
723 gdt[idx].LdtEnt.Bits.BaseLow;
724 info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
725 gdt[idx].LdtEnt.Bits.LimitLow;
726 info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
727 info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
728 info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
729 info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
730 info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
731 info->useable = gdt[idx].LdtEnt.Bits.Sys;
732 info->reserved = 0;
733
734 return VG_(mk_SysRes_Success)( 0 );
735 }
736
737 /* ---------------------------------------------------------------------
738 More thread stuff
739 ------------------------------------------------------------------ */
740
VG_(cleanup_thread)741 void VG_(cleanup_thread) ( ThreadArchState* arch )
742 {
743 /* Release arch-specific resources held by this thread. */
744 /* On x86, we have to dump the LDT and GDT. */
745 deallocate_LGDTs_for_thread( &arch->vex );
746 }
747
748
setup_child(ThreadArchState * child,ThreadArchState * parent,Bool inherit_parents_GDT)749 static void setup_child ( /*OUT*/ ThreadArchState *child,
750 /*IN*/ ThreadArchState *parent,
751 Bool inherit_parents_GDT )
752 {
753 /* We inherit our parent's guest state. */
754 child->vex = parent->vex;
755 child->vex_shadow1 = parent->vex_shadow1;
756 child->vex_shadow2 = parent->vex_shadow2;
757
758 /* We inherit our parent's LDT. */
759 if (parent->vex.guest_LDT == (HWord)NULL) {
760 /* We hope this is the common case. */
761 child->vex.guest_LDT = (HWord)NULL;
762 } else {
763 /* No luck .. we have to take a copy of the parent's. */
764 child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
765 copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
766 (VexGuestX86SegDescr*)child->vex.guest_LDT );
767 }
768
769 /* Either we start with an empty GDT (the usual case) or inherit a
770 copy of our parents' one (Quadrics Elan3 driver -style clone
771 only). */
772 child->vex.guest_GDT = (HWord)NULL;
773
774 if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
775 child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
776 copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
777 (VexGuestX86SegDescr*)child->vex.guest_GDT );
778 }
779 }
780
781
782 /* ---------------------------------------------------------------------
783 PRE/POST wrappers for x86/Linux-specific syscalls
784 ------------------------------------------------------------------ */
785
786 #define PRE(name) DEFN_PRE_TEMPLATE(x86_linux, name)
787 #define POST(name) DEFN_POST_TEMPLATE(x86_linux, name)
788
789 /* Add prototypes for the wrappers declared here, so that gcc doesn't
790 harass us for not having prototypes. Really this is a kludge --
791 the right thing to do is to make these wrappers 'static' since they
792 aren't visible outside this file, but that requires even more macro
793 magic. */
794 DECL_TEMPLATE(x86_linux, sys_stat64);
795 DECL_TEMPLATE(x86_linux, sys_fstatat64);
796 DECL_TEMPLATE(x86_linux, sys_fstat64);
797 DECL_TEMPLATE(x86_linux, sys_lstat64);
798 DECL_TEMPLATE(x86_linux, sys_clone);
799 DECL_TEMPLATE(x86_linux, old_mmap);
800 DECL_TEMPLATE(x86_linux, sys_mmap2);
801 DECL_TEMPLATE(x86_linux, sys_sigreturn);
802 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
803 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
804 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
805 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
806 DECL_TEMPLATE(x86_linux, sys_ptrace);
807 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
808 DECL_TEMPLATE(x86_linux, old_select);
809 DECL_TEMPLATE(x86_linux, sys_vm86old);
810 DECL_TEMPLATE(x86_linux, sys_vm86);
811 DECL_TEMPLATE(x86_linux, sys_syscall223);
812
PRE(old_select)813 PRE(old_select)
814 {
815 /* struct sel_arg_struct {
816 unsigned long n;
817 fd_set *inp, *outp, *exp;
818 struct timeval *tvp;
819 };
820 */
821 PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
822 PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
823 *flags |= SfMayBlock;
824 {
825 UInt* arg_struct = (UInt*)ARG1;
826 UInt a1, a2, a3, a4, a5;
827
828 a1 = arg_struct[0];
829 a2 = arg_struct[1];
830 a3 = arg_struct[2];
831 a4 = arg_struct[3];
832 a5 = arg_struct[4];
833
834 PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", a1,a2,a3,a4,a5);
835 if (a2 != (Addr)NULL)
836 PRE_MEM_READ( "old_select(readfds)", a2, a1/8 /* __FD_SETSIZE/8 */ );
837 if (a3 != (Addr)NULL)
838 PRE_MEM_READ( "old_select(writefds)", a3, a1/8 /* __FD_SETSIZE/8 */ );
839 if (a4 != (Addr)NULL)
840 PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
841 if (a5 != (Addr)NULL)
842 PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
843 }
844 }
845
PRE(sys_clone)846 PRE(sys_clone)
847 {
848 UInt cloneflags;
849 Bool badarg = False;
850
851 PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
852 PRE_REG_READ2(int, "clone",
853 unsigned long, flags,
854 void *, child_stack);
855
856 if (ARG1 & VKI_CLONE_PARENT_SETTID) {
857 if (VG_(tdict).track_pre_reg_read) {
858 PRA3("clone", int *, parent_tidptr);
859 }
860 PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
861 if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
862 VKI_PROT_WRITE)) {
863 badarg = True;
864 }
865 }
866 if (ARG1 & VKI_CLONE_SETTLS) {
867 if (VG_(tdict).track_pre_reg_read) {
868 PRA4("clone", vki_modify_ldt_t *, tlsinfo);
869 }
870 PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
871 if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
872 VKI_PROT_READ)) {
873 badarg = True;
874 }
875 }
876 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
877 if (VG_(tdict).track_pre_reg_read) {
878 PRA5("clone", int *, child_tidptr);
879 }
880 PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
881 if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
882 VKI_PROT_WRITE)) {
883 badarg = True;
884 }
885 }
886
887 if (badarg) {
888 SET_STATUS_Failure( VKI_EFAULT );
889 return;
890 }
891
892 cloneflags = ARG1;
893
894 if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
895 SET_STATUS_Failure( VKI_EINVAL );
896 return;
897 }
898
899 /* Be ultra-paranoid and filter out any clone-variants we don't understand:
900 - ??? specifies clone flags of 0x100011
901 - ??? specifies clone flags of 0x1200011.
902 - NPTL specifies clone flags of 0x7D0F00.
903 - The Quadrics Elan3 driver specifies clone flags of 0xF00.
904 - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
905 Everything else is rejected.
906 */
907 if (
908 1 ||
909 /* 11 Nov 05: for the time being, disable this ultra-paranoia.
910 The switch below probably does a good enough job. */
911 (cloneflags == 0x100011 || cloneflags == 0x1200011
912 || cloneflags == 0x7D0F00
913 || cloneflags == 0x790F00
914 || cloneflags == 0x3D0F00
915 || cloneflags == 0x410F00
916 || cloneflags == 0xF00
917 || cloneflags == 0xF21)) {
918 /* OK */
919 }
920 else {
921 /* Nah. We don't like it. Go away. */
922 goto reject;
923 }
924
925 /* Only look at the flags we really care about */
926 switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
927 | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
928 case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
929 /* thread creation */
930 SET_STATUS_from_SysRes(
931 do_clone(tid,
932 ARG1, /* flags */
933 (Addr)ARG2, /* child ESP */
934 (Int *)ARG3, /* parent_tidptr */
935 (Int *)ARG5, /* child_tidptr */
936 (vki_modify_ldt_t *)ARG4)); /* set_tls */
937 break;
938
939 case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
940 /* FALLTHROUGH - assume vfork == fork */
941 cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
942
943 case 0: /* plain fork */
944 SET_STATUS_from_SysRes(
945 ML_(do_fork_clone)(tid,
946 cloneflags, /* flags */
947 (Int *)ARG3, /* parent_tidptr */
948 (Int *)ARG5)); /* child_tidptr */
949 break;
950
951 default:
952 reject:
953 /* should we just ENOSYS? */
954 VG_(message)(Vg_UserMsg, "\n");
955 VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
956 VG_(message)(Vg_UserMsg, "\n");
957 VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
958 VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
959 VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
960 VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
961 VG_(unimplemented)
962 ("Valgrind does not support general clone().");
963 }
964
965 if (SUCCESS) {
966 if (ARG1 & VKI_CLONE_PARENT_SETTID)
967 POST_MEM_WRITE(ARG3, sizeof(Int));
968 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
969 POST_MEM_WRITE(ARG5, sizeof(Int));
970
971 /* Thread creation was successful; let the child have the chance
972 to run */
973 *flags |= SfYieldAfter;
974 }
975 }
976
PRE(sys_sigreturn)977 PRE(sys_sigreturn)
978 {
979 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
980 an explanation of what follows. */
981
982 ThreadState* tst;
983 PRINT("sys_sigreturn ( )");
984
985 vg_assert(VG_(is_valid_tid)(tid));
986 vg_assert(tid >= 1 && tid < VG_N_THREADS);
987 vg_assert(VG_(is_running_thread)(tid));
988
989 /* Adjust esp to point to start of frame; skip back up over
990 sigreturn sequence's "popl %eax" and handler ret addr */
991 tst = VG_(get_ThreadState)(tid);
992 tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
993 /* XXX why does ESP change differ from rt_sigreturn case below? */
994
995 /* This is only so that the EIP is (might be) useful to report if
996 something goes wrong in the sigreturn */
997 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
998
999 /* Restore register state from frame and remove it */
1000 VG_(sigframe_destroy)(tid, False);
1001
1002 /* Tell the driver not to update the guest state with the "result",
1003 and set a bogus result to keep it happy. */
1004 *flags |= SfNoWriteResult;
1005 SET_STATUS_Success(0);
1006
1007 /* Check to see if any signals arose as a result of this. */
1008 *flags |= SfPollAfter;
1009 }
1010
PRE(sys_rt_sigreturn)1011 PRE(sys_rt_sigreturn)
1012 {
1013 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
1014 an explanation of what follows. */
1015
1016 ThreadState* tst;
1017 PRINT("sys_rt_sigreturn ( )");
1018
1019 vg_assert(VG_(is_valid_tid)(tid));
1020 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1021 vg_assert(VG_(is_running_thread)(tid));
1022
1023 /* Adjust esp to point to start of frame; skip back up over handler
1024 ret addr */
1025 tst = VG_(get_ThreadState)(tid);
1026 tst->arch.vex.guest_ESP -= sizeof(Addr);
1027 /* XXX why does ESP change differ from sigreturn case above? */
1028
1029 /* This is only so that the EIP is (might be) useful to report if
1030 something goes wrong in the sigreturn */
1031 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1032
1033 /* Restore register state from frame and remove it */
1034 VG_(sigframe_destroy)(tid, True);
1035
1036 /* Tell the driver not to update the guest state with the "result",
1037 and set a bogus result to keep it happy. */
1038 *flags |= SfNoWriteResult;
1039 SET_STATUS_Success(0);
1040
1041 /* Check to see if any signals arose as a result of this. */
1042 *flags |= SfPollAfter;
1043 }
1044
PRE(sys_modify_ldt)1045 PRE(sys_modify_ldt)
1046 {
1047 PRINT("sys_modify_ldt ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
1048 PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1049 unsigned long, bytecount);
1050
1051 if (ARG1 == 0) {
1052 /* read the LDT into ptr */
1053 PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1054 }
1055 if (ARG1 == 1 || ARG1 == 0x11) {
1056 /* write the LDT with the entry pointed at by ptr */
1057 PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1058 }
1059 /* "do" the syscall ourselves; the kernel never sees it */
1060 SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1061
1062 if (ARG1 == 0 && SUCCESS && RES > 0) {
1063 POST_MEM_WRITE( ARG2, RES );
1064 }
1065 }
1066
PRE(sys_set_thread_area)1067 PRE(sys_set_thread_area)
1068 {
1069 PRINT("sys_set_thread_area ( %#lx )", ARG1);
1070 PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1071 PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1072
1073 /* "do" the syscall ourselves; the kernel never sees it */
1074 SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1075 }
1076
PRE(sys_get_thread_area)1077 PRE(sys_get_thread_area)
1078 {
1079 PRINT("sys_get_thread_area ( %#lx )", ARG1);
1080 PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1081 PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1082
1083 /* "do" the syscall ourselves; the kernel never sees it */
1084 SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1085
1086 if (SUCCESS) {
1087 POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1088 }
1089 }
1090
1091 // Parts of this are x86-specific, but the *PEEK* cases are generic.
1092 //
1093 // ARG3 is only used for pointers into the traced process's address
1094 // space and for offsets into the traced process's struct
1095 // user_regs_struct. It is never a pointer into this process's memory
1096 // space, and we should therefore not check anything it points to.
PRE(sys_ptrace)1097 PRE(sys_ptrace)
1098 {
1099 PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4);
1100 PRE_REG_READ4(int, "ptrace",
1101 long, request, long, pid, long, addr, long, data);
1102 switch (ARG1) {
1103 case VKI_PTRACE_PEEKTEXT:
1104 case VKI_PTRACE_PEEKDATA:
1105 case VKI_PTRACE_PEEKUSR:
1106 PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1107 sizeof (long));
1108 break;
1109 case VKI_PTRACE_GETREGS:
1110 PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1111 sizeof (struct vki_user_regs_struct));
1112 break;
1113 case VKI_PTRACE_GETFPREGS:
1114 PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1115 sizeof (struct vki_user_i387_struct));
1116 break;
1117 case VKI_PTRACE_GETFPXREGS:
1118 PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1119 sizeof(struct vki_user_fxsr_struct) );
1120 break;
1121 case VKI_PTRACE_GET_THREAD_AREA:
1122 PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1123 sizeof(struct vki_user_desc) );
1124 break;
1125 case VKI_PTRACE_SETREGS:
1126 PRE_MEM_READ( "ptrace(setregs)", ARG4,
1127 sizeof (struct vki_user_regs_struct));
1128 break;
1129 case VKI_PTRACE_SETFPREGS:
1130 PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1131 sizeof (struct vki_user_i387_struct));
1132 break;
1133 case VKI_PTRACE_SETFPXREGS:
1134 PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1135 sizeof(struct vki_user_fxsr_struct) );
1136 break;
1137 case VKI_PTRACE_SET_THREAD_AREA:
1138 PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1139 sizeof(struct vki_user_desc) );
1140 break;
1141 case VKI_PTRACE_GETEVENTMSG:
1142 PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1143 break;
1144 case VKI_PTRACE_GETSIGINFO:
1145 PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1146 break;
1147 case VKI_PTRACE_SETSIGINFO:
1148 PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1149 break;
1150 case VKI_PTRACE_GETREGSET:
1151 ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1152 break;
1153 case VKI_PTRACE_SETREGSET:
1154 ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1155 break;
1156 default:
1157 break;
1158 }
1159 }
1160
POST(sys_ptrace)1161 POST(sys_ptrace)
1162 {
1163 switch (ARG1) {
1164 case VKI_PTRACE_PEEKTEXT:
1165 case VKI_PTRACE_PEEKDATA:
1166 case VKI_PTRACE_PEEKUSR:
1167 POST_MEM_WRITE( ARG4, sizeof (long));
1168 break;
1169 case VKI_PTRACE_GETREGS:
1170 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1171 break;
1172 case VKI_PTRACE_GETFPREGS:
1173 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1174 break;
1175 case VKI_PTRACE_GETFPXREGS:
1176 POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1177 break;
1178 case VKI_PTRACE_GET_THREAD_AREA:
1179 POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1180 break;
1181 case VKI_PTRACE_GETEVENTMSG:
1182 POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1183 break;
1184 case VKI_PTRACE_GETSIGINFO:
1185 /* XXX: This is a simplification. Different parts of the
1186 * siginfo_t are valid depending on the type of signal.
1187 */
1188 POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1189 break;
1190 case VKI_PTRACE_GETREGSET:
1191 ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1192 break;
1193 default:
1194 break;
1195 }
1196 }
1197
PRE(old_mmap)1198 PRE(old_mmap)
1199 {
1200 /* struct mmap_arg_struct {
1201 unsigned long addr;
1202 unsigned long len;
1203 unsigned long prot;
1204 unsigned long flags;
1205 unsigned long fd;
1206 unsigned long offset;
1207 }; */
1208 UWord a1, a2, a3, a4, a5, a6;
1209 SysRes r;
1210
1211 UWord* args = (UWord*)ARG1;
1212 PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1213 PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1214
1215 a1 = args[1-1];
1216 a2 = args[2-1];
1217 a3 = args[3-1];
1218 a4 = args[4-1];
1219 a5 = args[5-1];
1220 a6 = args[6-1];
1221
1222 PRINT("old_mmap ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1223 a1, (ULong)a2, a3, a4, a5, a6 );
1224
1225 r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1226 SET_STATUS_from_SysRes(r);
1227 }
1228
PRE(sys_mmap2)1229 PRE(sys_mmap2)
1230 {
1231 SysRes r;
1232
1233 // Exactly like old_mmap() except:
1234 // - all 6 args are passed in regs, rather than in a memory-block.
1235 // - the file offset is specified in pagesize units rather than bytes,
1236 // so that it can be used for files bigger than 2^32 bytes.
1237 // pagesize or 4K-size units in offset? For ppc32/64-linux, this is
1238 // 4K-sized. Assert that the page size is 4K here for safety.
1239 vg_assert(VKI_PAGE_SIZE == 4096);
1240 PRINT("sys_mmap2 ( %#lx, %llu, %ld, %ld, %ld, %ld )",
1241 ARG1, (ULong)ARG2, ARG3, ARG4, ARG5, ARG6 );
1242 PRE_REG_READ6(long, "mmap2",
1243 unsigned long, start, unsigned long, length,
1244 unsigned long, prot, unsigned long, flags,
1245 unsigned long, fd, unsigned long, offset);
1246
1247 r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1248 4096 * (Off64T)ARG6 );
1249 SET_STATUS_from_SysRes(r);
1250 }
1251
1252 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1253 // applicable to every architecture -- I think only to 32-bit archs.
1254 // We're going to need something like linux/core_os32.h for such
1255 // things, eventually, I think. --njn
PRE(sys_lstat64)1256 PRE(sys_lstat64)
1257 {
1258 PRINT("sys_lstat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1259 PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1260 PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1261 PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1262 }
1263
POST(sys_lstat64)1264 POST(sys_lstat64)
1265 {
1266 vg_assert(SUCCESS);
1267 if (RES == 0) {
1268 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1269 }
1270 }
1271
PRE(sys_stat64)1272 PRE(sys_stat64)
1273 {
1274 FUSE_COMPATIBLE_MAY_BLOCK();
1275 PRINT("sys_stat64 ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
1276 PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1277 PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1278 PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1279 }
1280
POST(sys_stat64)1281 POST(sys_stat64)
1282 {
1283 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1284 }
1285
PRE(sys_fstatat64)1286 PRE(sys_fstatat64)
1287 {
1288 FUSE_COMPATIBLE_MAY_BLOCK();
1289 PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx )",ARG1,ARG2,(char*)ARG2,ARG3);
1290 PRE_REG_READ3(long, "fstatat64",
1291 int, dfd, char *, file_name, struct stat64 *, buf);
1292 PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1293 PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1294 }
1295
POST(sys_fstatat64)1296 POST(sys_fstatat64)
1297 {
1298 POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1299 }
1300
PRE(sys_fstat64)1301 PRE(sys_fstat64)
1302 {
1303 PRINT("sys_fstat64 ( %ld, %#lx )",ARG1,ARG2);
1304 PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1305 PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1306 }
1307
POST(sys_fstat64)1308 POST(sys_fstat64)
1309 {
1310 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1311 }
1312
1313 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1314 identical version. */
PRE(sys_sigsuspend)1315 PRE(sys_sigsuspend)
1316 {
1317 /* The C library interface to sigsuspend just takes a pointer to
1318 a signal mask but this system call has three arguments - the first
1319 two don't appear to be used by the kernel and are always passed as
1320 zero by glibc and the third is the first word of the signal mask
1321 so only 32 signals are supported.
1322
1323 In fact glibc normally uses rt_sigsuspend if it is available as
1324 that takes a pointer to the signal mask so supports more signals.
1325 */
1326 *flags |= SfMayBlock;
1327 PRINT("sys_sigsuspend ( %ld, %ld, %ld )", ARG1,ARG2,ARG3 );
1328 PRE_REG_READ3(int, "sigsuspend",
1329 int, history0, int, history1,
1330 vki_old_sigset_t, mask);
1331 }
1332
PRE(sys_vm86old)1333 PRE(sys_vm86old)
1334 {
1335 PRINT("sys_vm86old ( %#lx )", ARG1);
1336 PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1337 PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1338 }
1339
POST(sys_vm86old)1340 POST(sys_vm86old)
1341 {
1342 POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1343 }
1344
PRE(sys_vm86)1345 PRE(sys_vm86)
1346 {
1347 PRINT("sys_vm86 ( %ld, %#lx )", ARG1,ARG2);
1348 PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1349 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1350 PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1351 }
1352
POST(sys_vm86)1353 POST(sys_vm86)
1354 {
1355 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1356 POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1357 }
1358
1359
1360 /* ---------------------------------------------------------------
1361 PRE/POST wrappers for x86/Linux-variant specific syscalls
1362 ------------------------------------------------------------ */
1363
PRE(sys_syscall223)1364 PRE(sys_syscall223)
1365 {
1366 Int err;
1367
1368 /* 223 is used by sys_bproc. If we're not on a declared bproc
1369 variant, fail in the usual way. */
1370
1371 if (!VG_(strstr)(VG_(clo_kernel_variant), "bproc")) {
1372 PRINT("non-existent syscall! (syscall 223)");
1373 PRE_REG_READ0(long, "ni_syscall(223)");
1374 SET_STATUS_Failure( VKI_ENOSYS );
1375 return;
1376 }
1377
1378 err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1379 ARG4, ARG5, ARG6 );
1380 if (err) {
1381 SET_STATUS_Failure( err );
1382 return;
1383 }
1384 /* Let it go through. */
1385 *flags |= SfMayBlock; /* who knows? play safe. */
1386 }
1387
POST(sys_syscall223)1388 POST(sys_syscall223)
1389 {
1390 ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1391 ARG4, ARG5, ARG6 );
1392 }
1393
1394 #undef PRE
1395 #undef POST
1396
1397
1398 /* ---------------------------------------------------------------------
1399 The x86/Linux syscall table
1400 ------------------------------------------------------------------ */
1401
1402 /* Add an x86-linux specific wrapper to a syscall table. */
1403 #define PLAX_(sysno, name) WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1404 #define PLAXY(sysno, name) WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1405
1406
1407 // This table maps from __NR_xxx syscall numbers (from
1408 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1409 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1410 //
1411 // For those syscalls not handled by Valgrind, the annotation indicate its
1412 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1413 // (unknown).
1414
1415 static SyscallTableEntry syscall_table[] = {
1416 //zz // (restart_syscall) // 0
1417 GENX_(__NR_exit, sys_exit), // 1
1418 GENX_(__NR_fork, sys_fork), // 2
1419 GENXY(__NR_read, sys_read), // 3
1420 GENX_(__NR_write, sys_write), // 4
1421
1422 GENXY(__NR_open, sys_open), // 5
1423 GENXY(__NR_close, sys_close), // 6
1424 GENXY(__NR_waitpid, sys_waitpid), // 7
1425 GENXY(__NR_creat, sys_creat), // 8
1426 GENX_(__NR_link, sys_link), // 9
1427
1428 GENX_(__NR_unlink, sys_unlink), // 10
1429 GENX_(__NR_execve, sys_execve), // 11
1430 GENX_(__NR_chdir, sys_chdir), // 12
1431 GENXY(__NR_time, sys_time), // 13
1432 GENX_(__NR_mknod, sys_mknod), // 14
1433
1434 GENX_(__NR_chmod, sys_chmod), // 15
1435 //zz LINX_(__NR_lchown, sys_lchown16), // 16
1436 GENX_(__NR_break, sys_ni_syscall), // 17
1437 //zz // (__NR_oldstat, sys_stat), // 18 (obsolete)
1438 LINX_(__NR_lseek, sys_lseek), // 19
1439
1440 GENX_(__NR_getpid, sys_getpid), // 20
1441 LINX_(__NR_mount, sys_mount), // 21
1442 LINX_(__NR_umount, sys_oldumount), // 22
1443 LINX_(__NR_setuid, sys_setuid16), // 23 ## P
1444 LINX_(__NR_getuid, sys_getuid16), // 24 ## P
1445
1446 LINX_(__NR_stime, sys_stime), // 25 * (SVr4,SVID,X/OPEN)
1447 PLAXY(__NR_ptrace, sys_ptrace), // 26
1448 GENX_(__NR_alarm, sys_alarm), // 27
1449 //zz // (__NR_oldfstat, sys_fstat), // 28 * L -- obsolete
1450 GENX_(__NR_pause, sys_pause), // 29
1451
1452 LINX_(__NR_utime, sys_utime), // 30
1453 GENX_(__NR_stty, sys_ni_syscall), // 31
1454 GENX_(__NR_gtty, sys_ni_syscall), // 32
1455 GENX_(__NR_access, sys_access), // 33
1456 GENX_(__NR_nice, sys_nice), // 34
1457
1458 GENX_(__NR_ftime, sys_ni_syscall), // 35
1459 GENX_(__NR_sync, sys_sync), // 36
1460 GENX_(__NR_kill, sys_kill), // 37
1461 GENX_(__NR_rename, sys_rename), // 38
1462 GENX_(__NR_mkdir, sys_mkdir), // 39
1463
1464 GENX_(__NR_rmdir, sys_rmdir), // 40
1465 GENXY(__NR_dup, sys_dup), // 41
1466 LINXY(__NR_pipe, sys_pipe), // 42
1467 GENXY(__NR_times, sys_times), // 43
1468 GENX_(__NR_prof, sys_ni_syscall), // 44
1469 //zz
1470 GENX_(__NR_brk, sys_brk), // 45
1471 LINX_(__NR_setgid, sys_setgid16), // 46
1472 LINX_(__NR_getgid, sys_getgid16), // 47
1473 //zz // (__NR_signal, sys_signal), // 48 */* (ANSI C)
1474 LINX_(__NR_geteuid, sys_geteuid16), // 49
1475
1476 LINX_(__NR_getegid, sys_getegid16), // 50
1477 GENX_(__NR_acct, sys_acct), // 51
1478 LINX_(__NR_umount2, sys_umount), // 52
1479 GENX_(__NR_lock, sys_ni_syscall), // 53
1480 LINXY(__NR_ioctl, sys_ioctl), // 54
1481
1482 LINXY(__NR_fcntl, sys_fcntl), // 55
1483 GENX_(__NR_mpx, sys_ni_syscall), // 56
1484 GENX_(__NR_setpgid, sys_setpgid), // 57
1485 GENX_(__NR_ulimit, sys_ni_syscall), // 58
1486 //zz // (__NR_oldolduname, sys_olduname), // 59 Linux -- obsolete
1487 //zz
1488 GENX_(__NR_umask, sys_umask), // 60
1489 GENX_(__NR_chroot, sys_chroot), // 61
1490 //zz // (__NR_ustat, sys_ustat) // 62 SVr4 -- deprecated
1491 GENXY(__NR_dup2, sys_dup2), // 63
1492 GENX_(__NR_getppid, sys_getppid), // 64
1493
1494 GENX_(__NR_getpgrp, sys_getpgrp), // 65
1495 GENX_(__NR_setsid, sys_setsid), // 66
1496 LINXY(__NR_sigaction, sys_sigaction), // 67
1497 //zz // (__NR_sgetmask, sys_sgetmask), // 68 */* (ANSI C)
1498 //zz // (__NR_ssetmask, sys_ssetmask), // 69 */* (ANSI C)
1499 //zz
1500 LINX_(__NR_setreuid, sys_setreuid16), // 70
1501 LINX_(__NR_setregid, sys_setregid16), // 71
1502 PLAX_(__NR_sigsuspend, sys_sigsuspend), // 72
1503 LINXY(__NR_sigpending, sys_sigpending), // 73
1504 GENX_(__NR_sethostname, sys_sethostname), // 74
1505 //zz
1506 GENX_(__NR_setrlimit, sys_setrlimit), // 75
1507 GENXY(__NR_getrlimit, sys_old_getrlimit), // 76
1508 GENXY(__NR_getrusage, sys_getrusage), // 77
1509 GENXY(__NR_gettimeofday, sys_gettimeofday), // 78
1510 GENX_(__NR_settimeofday, sys_settimeofday), // 79
1511
1512 LINXY(__NR_getgroups, sys_getgroups16), // 80
1513 LINX_(__NR_setgroups, sys_setgroups16), // 81
1514 PLAX_(__NR_select, old_select), // 82
1515 GENX_(__NR_symlink, sys_symlink), // 83
1516 //zz // (__NR_oldlstat, sys_lstat), // 84 -- obsolete
1517 //zz
1518 GENX_(__NR_readlink, sys_readlink), // 85
1519 //zz // (__NR_uselib, sys_uselib), // 86 */Linux
1520 //zz // (__NR_swapon, sys_swapon), // 87 */Linux
1521 //zz // (__NR_reboot, sys_reboot), // 88 */Linux
1522 //zz // (__NR_readdir, old_readdir), // 89 -- superseded
1523 //zz
1524 PLAX_(__NR_mmap, old_mmap), // 90
1525 GENXY(__NR_munmap, sys_munmap), // 91
1526 GENX_(__NR_truncate, sys_truncate), // 92
1527 GENX_(__NR_ftruncate, sys_ftruncate), // 93
1528 GENX_(__NR_fchmod, sys_fchmod), // 94
1529
1530 LINX_(__NR_fchown, sys_fchown16), // 95
1531 GENX_(__NR_getpriority, sys_getpriority), // 96
1532 GENX_(__NR_setpriority, sys_setpriority), // 97
1533 GENX_(__NR_profil, sys_ni_syscall), // 98
1534 GENXY(__NR_statfs, sys_statfs), // 99
1535
1536 GENXY(__NR_fstatfs, sys_fstatfs), // 100
1537 LINX_(__NR_ioperm, sys_ioperm), // 101
1538 LINXY(__NR_socketcall, sys_socketcall), // 102 x86/Linux-only
1539 LINXY(__NR_syslog, sys_syslog), // 103
1540 GENXY(__NR_setitimer, sys_setitimer), // 104
1541
1542 GENXY(__NR_getitimer, sys_getitimer), // 105
1543 GENXY(__NR_stat, sys_newstat), // 106
1544 GENXY(__NR_lstat, sys_newlstat), // 107
1545 GENXY(__NR_fstat, sys_newfstat), // 108
1546 //zz // (__NR_olduname, sys_uname), // 109 -- obsolete
1547 //zz
1548 GENX_(__NR_iopl, sys_iopl), // 110
1549 LINX_(__NR_vhangup, sys_vhangup), // 111
1550 GENX_(__NR_idle, sys_ni_syscall), // 112
1551 PLAXY(__NR_vm86old, sys_vm86old), // 113 x86/Linux-only
1552 GENXY(__NR_wait4, sys_wait4), // 114
1553 //zz
1554 //zz // (__NR_swapoff, sys_swapoff), // 115 */Linux
1555 LINXY(__NR_sysinfo, sys_sysinfo), // 116
1556 LINXY(__NR_ipc, sys_ipc), // 117
1557 GENX_(__NR_fsync, sys_fsync), // 118
1558 PLAX_(__NR_sigreturn, sys_sigreturn), // 119 ?/Linux
1559
1560 PLAX_(__NR_clone, sys_clone), // 120
1561 //zz // (__NR_setdomainname, sys_setdomainname), // 121 */*(?)
1562 GENXY(__NR_uname, sys_newuname), // 122
1563 PLAX_(__NR_modify_ldt, sys_modify_ldt), // 123
1564 LINXY(__NR_adjtimex, sys_adjtimex), // 124
1565
1566 GENXY(__NR_mprotect, sys_mprotect), // 125
1567 LINXY(__NR_sigprocmask, sys_sigprocmask), // 126
1568 //zz // Nb: create_module() was removed 2.4-->2.6
1569 GENX_(__NR_create_module, sys_ni_syscall), // 127
1570 LINX_(__NR_init_module, sys_init_module), // 128
1571 LINX_(__NR_delete_module, sys_delete_module), // 129
1572 //zz
1573 //zz // Nb: get_kernel_syms() was removed 2.4-->2.6
1574 GENX_(__NR_get_kernel_syms, sys_ni_syscall), // 130
1575 LINX_(__NR_quotactl, sys_quotactl), // 131
1576 GENX_(__NR_getpgid, sys_getpgid), // 132
1577 GENX_(__NR_fchdir, sys_fchdir), // 133
1578 //zz // (__NR_bdflush, sys_bdflush), // 134 */Linux
1579 //zz
1580 //zz // (__NR_sysfs, sys_sysfs), // 135 SVr4
1581 LINX_(__NR_personality, sys_personality), // 136
1582 GENX_(__NR_afs_syscall, sys_ni_syscall), // 137
1583 LINX_(__NR_setfsuid, sys_setfsuid16), // 138
1584 LINX_(__NR_setfsgid, sys_setfsgid16), // 139
1585
1586 LINXY(__NR__llseek, sys_llseek), // 140
1587 GENXY(__NR_getdents, sys_getdents), // 141
1588 GENX_(__NR__newselect, sys_select), // 142
1589 GENX_(__NR_flock, sys_flock), // 143
1590 GENX_(__NR_msync, sys_msync), // 144
1591
1592 GENXY(__NR_readv, sys_readv), // 145
1593 GENX_(__NR_writev, sys_writev), // 146
1594 GENX_(__NR_getsid, sys_getsid), // 147
1595 GENX_(__NR_fdatasync, sys_fdatasync), // 148
1596 LINXY(__NR__sysctl, sys_sysctl), // 149
1597
1598 GENX_(__NR_mlock, sys_mlock), // 150
1599 GENX_(__NR_munlock, sys_munlock), // 151
1600 GENX_(__NR_mlockall, sys_mlockall), // 152
1601 LINX_(__NR_munlockall, sys_munlockall), // 153
1602 LINXY(__NR_sched_setparam, sys_sched_setparam), // 154
1603
1604 LINXY(__NR_sched_getparam, sys_sched_getparam), // 155
1605 LINX_(__NR_sched_setscheduler, sys_sched_setscheduler), // 156
1606 LINX_(__NR_sched_getscheduler, sys_sched_getscheduler), // 157
1607 LINX_(__NR_sched_yield, sys_sched_yield), // 158
1608 LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1609
1610 LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1611 LINXY(__NR_sched_rr_get_interval, sys_sched_rr_get_interval), // 161
1612 GENXY(__NR_nanosleep, sys_nanosleep), // 162
1613 GENX_(__NR_mremap, sys_mremap), // 163
1614 LINX_(__NR_setresuid, sys_setresuid16), // 164
1615
1616 LINXY(__NR_getresuid, sys_getresuid16), // 165
1617 PLAXY(__NR_vm86, sys_vm86), // 166 x86/Linux-only
1618 GENX_(__NR_query_module, sys_ni_syscall), // 167
1619 GENXY(__NR_poll, sys_poll), // 168
1620 //zz // (__NR_nfsservctl, sys_nfsservctl), // 169 */Linux
1621 //zz
1622 LINX_(__NR_setresgid, sys_setresgid16), // 170
1623 LINXY(__NR_getresgid, sys_getresgid16), // 171
1624 LINXY(__NR_prctl, sys_prctl), // 172
1625 PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn), // 173 x86/Linux only?
1626 LINXY(__NR_rt_sigaction, sys_rt_sigaction), // 174
1627
1628 LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask), // 175
1629 LINXY(__NR_rt_sigpending, sys_rt_sigpending), // 176
1630 LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait),// 177
1631 LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),// 178
1632 LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 179
1633
1634 GENXY(__NR_pread64, sys_pread64), // 180
1635 GENX_(__NR_pwrite64, sys_pwrite64), // 181
1636 LINX_(__NR_chown, sys_chown16), // 182
1637 GENXY(__NR_getcwd, sys_getcwd), // 183
1638 LINXY(__NR_capget, sys_capget), // 184
1639
1640 LINX_(__NR_capset, sys_capset), // 185
1641 GENXY(__NR_sigaltstack, sys_sigaltstack), // 186
1642 LINXY(__NR_sendfile, sys_sendfile), // 187
1643 GENXY(__NR_getpmsg, sys_getpmsg), // 188
1644 GENX_(__NR_putpmsg, sys_putpmsg), // 189
1645
1646 // Nb: we treat vfork as fork
1647 GENX_(__NR_vfork, sys_fork), // 190
1648 GENXY(__NR_ugetrlimit, sys_getrlimit), // 191
1649 PLAX_(__NR_mmap2, sys_mmap2), // 192
1650 GENX_(__NR_truncate64, sys_truncate64), // 193
1651 GENX_(__NR_ftruncate64, sys_ftruncate64), // 194
1652
1653 PLAXY(__NR_stat64, sys_stat64), // 195
1654 PLAXY(__NR_lstat64, sys_lstat64), // 196
1655 PLAXY(__NR_fstat64, sys_fstat64), // 197
1656 GENX_(__NR_lchown32, sys_lchown), // 198
1657 GENX_(__NR_getuid32, sys_getuid), // 199
1658
1659 GENX_(__NR_getgid32, sys_getgid), // 200
1660 GENX_(__NR_geteuid32, sys_geteuid), // 201
1661 GENX_(__NR_getegid32, sys_getegid), // 202
1662 GENX_(__NR_setreuid32, sys_setreuid), // 203
1663 GENX_(__NR_setregid32, sys_setregid), // 204
1664
1665 GENXY(__NR_getgroups32, sys_getgroups), // 205
1666 GENX_(__NR_setgroups32, sys_setgroups), // 206
1667 GENX_(__NR_fchown32, sys_fchown), // 207
1668 LINX_(__NR_setresuid32, sys_setresuid), // 208
1669 LINXY(__NR_getresuid32, sys_getresuid), // 209
1670
1671 LINX_(__NR_setresgid32, sys_setresgid), // 210
1672 LINXY(__NR_getresgid32, sys_getresgid), // 211
1673 GENX_(__NR_chown32, sys_chown), // 212
1674 GENX_(__NR_setuid32, sys_setuid), // 213
1675 GENX_(__NR_setgid32, sys_setgid), // 214
1676
1677 LINX_(__NR_setfsuid32, sys_setfsuid), // 215
1678 LINX_(__NR_setfsgid32, sys_setfsgid), // 216
1679 //zz // (__NR_pivot_root, sys_pivot_root), // 217 */Linux
1680 GENXY(__NR_mincore, sys_mincore), // 218
1681 GENX_(__NR_madvise, sys_madvise), // 219
1682
1683 GENXY(__NR_getdents64, sys_getdents64), // 220
1684 LINXY(__NR_fcntl64, sys_fcntl64), // 221
1685 GENX_(222, sys_ni_syscall), // 222
1686 PLAXY(223, sys_syscall223), // 223 // sys_bproc?
1687 LINX_(__NR_gettid, sys_gettid), // 224
1688
1689 LINX_(__NR_readahead, sys_readahead), // 225 */Linux
1690 LINX_(__NR_setxattr, sys_setxattr), // 226
1691 LINX_(__NR_lsetxattr, sys_lsetxattr), // 227
1692 LINX_(__NR_fsetxattr, sys_fsetxattr), // 228
1693 LINXY(__NR_getxattr, sys_getxattr), // 229
1694
1695 LINXY(__NR_lgetxattr, sys_lgetxattr), // 230
1696 LINXY(__NR_fgetxattr, sys_fgetxattr), // 231
1697 LINXY(__NR_listxattr, sys_listxattr), // 232
1698 LINXY(__NR_llistxattr, sys_llistxattr), // 233
1699 LINXY(__NR_flistxattr, sys_flistxattr), // 234
1700
1701 LINX_(__NR_removexattr, sys_removexattr), // 235
1702 LINX_(__NR_lremovexattr, sys_lremovexattr), // 236
1703 LINX_(__NR_fremovexattr, sys_fremovexattr), // 237
1704 LINXY(__NR_tkill, sys_tkill), // 238 */Linux
1705 LINXY(__NR_sendfile64, sys_sendfile64), // 239
1706
1707 LINXY(__NR_futex, sys_futex), // 240
1708 LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1709 LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1710 PLAX_(__NR_set_thread_area, sys_set_thread_area), // 243
1711 PLAX_(__NR_get_thread_area, sys_get_thread_area), // 244
1712
1713 LINXY(__NR_io_setup, sys_io_setup), // 245
1714 LINX_(__NR_io_destroy, sys_io_destroy), // 246
1715 LINXY(__NR_io_getevents, sys_io_getevents), // 247
1716 LINX_(__NR_io_submit, sys_io_submit), // 248
1717 LINXY(__NR_io_cancel, sys_io_cancel), // 249
1718
1719 LINX_(__NR_fadvise64, sys_fadvise64), // 250 */(Linux?)
1720 GENX_(251, sys_ni_syscall), // 251
1721 LINX_(__NR_exit_group, sys_exit_group), // 252
1722 LINXY(__NR_lookup_dcookie, sys_lookup_dcookie), // 253
1723 LINXY(__NR_epoll_create, sys_epoll_create), // 254
1724
1725 LINX_(__NR_epoll_ctl, sys_epoll_ctl), // 255
1726 LINXY(__NR_epoll_wait, sys_epoll_wait), // 256
1727 //zz // (__NR_remap_file_pages, sys_remap_file_pages), // 257 */Linux
1728 LINX_(__NR_set_tid_address, sys_set_tid_address), // 258
1729 LINXY(__NR_timer_create, sys_timer_create), // 259
1730
1731 LINXY(__NR_timer_settime, sys_timer_settime), // (timer_create+1)
1732 LINXY(__NR_timer_gettime, sys_timer_gettime), // (timer_create+2)
1733 LINX_(__NR_timer_getoverrun, sys_timer_getoverrun),//(timer_create+3)
1734 LINX_(__NR_timer_delete, sys_timer_delete), // (timer_create+4)
1735 LINX_(__NR_clock_settime, sys_clock_settime), // (timer_create+5)
1736
1737 LINXY(__NR_clock_gettime, sys_clock_gettime), // (timer_create+6)
1738 LINXY(__NR_clock_getres, sys_clock_getres), // (timer_create+7)
1739 LINXY(__NR_clock_nanosleep, sys_clock_nanosleep),// (timer_create+8) */*
1740 GENXY(__NR_statfs64, sys_statfs64), // 268
1741 GENXY(__NR_fstatfs64, sys_fstatfs64), // 269
1742
1743 LINX_(__NR_tgkill, sys_tgkill), // 270 */Linux
1744 GENX_(__NR_utimes, sys_utimes), // 271
1745 LINX_(__NR_fadvise64_64, sys_fadvise64_64), // 272 */(Linux?)
1746 GENX_(__NR_vserver, sys_ni_syscall), // 273
1747 LINX_(__NR_mbind, sys_mbind), // 274 ?/?
1748
1749 LINXY(__NR_get_mempolicy, sys_get_mempolicy), // 275 ?/?
1750 LINX_(__NR_set_mempolicy, sys_set_mempolicy), // 276 ?/?
1751 LINXY(__NR_mq_open, sys_mq_open), // 277
1752 LINX_(__NR_mq_unlink, sys_mq_unlink), // (mq_open+1)
1753 LINX_(__NR_mq_timedsend, sys_mq_timedsend), // (mq_open+2)
1754
1755 LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),// (mq_open+3)
1756 LINX_(__NR_mq_notify, sys_mq_notify), // (mq_open+4)
1757 LINXY(__NR_mq_getsetattr, sys_mq_getsetattr), // (mq_open+5)
1758 GENX_(__NR_sys_kexec_load, sys_ni_syscall), // 283
1759 LINXY(__NR_waitid, sys_waitid), // 284
1760
1761 GENX_(285, sys_ni_syscall), // 285
1762 LINX_(__NR_add_key, sys_add_key), // 286
1763 LINX_(__NR_request_key, sys_request_key), // 287
1764 LINXY(__NR_keyctl, sys_keyctl), // 288
1765 LINX_(__NR_ioprio_set, sys_ioprio_set), // 289
1766
1767 LINX_(__NR_ioprio_get, sys_ioprio_get), // 290
1768 LINX_(__NR_inotify_init, sys_inotify_init), // 291
1769 LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1770 LINX_(__NR_inotify_rm_watch, sys_inotify_rm_watch), // 293
1771 // LINX_(__NR_migrate_pages, sys_migrate_pages), // 294
1772
1773 LINXY(__NR_openat, sys_openat), // 295
1774 LINX_(__NR_mkdirat, sys_mkdirat), // 296
1775 LINX_(__NR_mknodat, sys_mknodat), // 297
1776 LINX_(__NR_fchownat, sys_fchownat), // 298
1777 LINX_(__NR_futimesat, sys_futimesat), // 299
1778
1779 PLAXY(__NR_fstatat64, sys_fstatat64), // 300
1780 LINX_(__NR_unlinkat, sys_unlinkat), // 301
1781 LINX_(__NR_renameat, sys_renameat), // 302
1782 LINX_(__NR_linkat, sys_linkat), // 303
1783 LINX_(__NR_symlinkat, sys_symlinkat), // 304
1784
1785 LINX_(__NR_readlinkat, sys_readlinkat), // 305
1786 LINX_(__NR_fchmodat, sys_fchmodat), // 306
1787 LINX_(__NR_faccessat, sys_faccessat), // 307
1788 LINX_(__NR_pselect6, sys_pselect6), // 308
1789 LINXY(__NR_ppoll, sys_ppoll), // 309
1790
1791 // LINX_(__NR_unshare, sys_unshare), // 310
1792 LINX_(__NR_set_robust_list, sys_set_robust_list), // 311
1793 LINXY(__NR_get_robust_list, sys_get_robust_list), // 312
1794 LINX_(__NR_splice, sys_splice), // 313
1795 LINX_(__NR_sync_file_range, sys_sync_file_range), // 314
1796
1797 LINX_(__NR_tee, sys_tee), // 315
1798 LINXY(__NR_vmsplice, sys_vmsplice), // 316
1799 LINXY(__NR_move_pages, sys_move_pages), // 317
1800 LINXY(__NR_getcpu, sys_getcpu), // 318
1801 LINXY(__NR_epoll_pwait, sys_epoll_pwait), // 319
1802
1803 LINX_(__NR_utimensat, sys_utimensat), // 320
1804 LINXY(__NR_signalfd, sys_signalfd), // 321
1805 LINXY(__NR_timerfd_create, sys_timerfd_create), // 322
1806 LINXY(__NR_eventfd, sys_eventfd), // 323
1807 LINX_(__NR_fallocate, sys_fallocate), // 324
1808
1809 LINXY(__NR_timerfd_settime, sys_timerfd_settime), // 325
1810 LINXY(__NR_timerfd_gettime, sys_timerfd_gettime), // 326
1811 LINXY(__NR_signalfd4, sys_signalfd4), // 327
1812 LINXY(__NR_eventfd2, sys_eventfd2), // 328
1813 LINXY(__NR_epoll_create1, sys_epoll_create1), // 329
1814
1815 LINXY(__NR_dup3, sys_dup3), // 330
1816 LINXY(__NR_pipe2, sys_pipe2), // 331
1817 LINXY(__NR_inotify_init1, sys_inotify_init1), // 332
1818 LINXY(__NR_preadv, sys_preadv), // 333
1819 LINX_(__NR_pwritev, sys_pwritev), // 334
1820
1821 LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1822 LINXY(__NR_perf_event_open, sys_perf_event_open), // 336
1823 LINXY(__NR_recvmmsg, sys_recvmmsg), // 337
1824 LINXY(__NR_fanotify_init, sys_fanotify_init), // 338
1825 LINX_(__NR_fanotify_mark, sys_fanotify_mark), // 339
1826
1827 LINXY(__NR_prlimit64, sys_prlimit64), // 340
1828 LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1829 LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1830 LINXY(__NR_clock_adjtime, sys_clock_adjtime), // 343
1831 // LINX_(__NR_syncfs, sys_ni_syscall), // 344
1832
1833 LINXY(__NR_sendmmsg, sys_sendmmsg), // 345
1834 // LINX_(__NR_setns, sys_ni_syscall), // 346
1835 LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 347
1836 LINX_(__NR_process_vm_writev, sys_process_vm_writev) // 348
1837 };
1838
ML_(get_linux_syscall_entry)1839 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1840 {
1841 const UInt syscall_table_size
1842 = sizeof(syscall_table) / sizeof(syscall_table[0]);
1843
1844 /* Is it in the contiguous initial section of the table? */
1845 if (sysno < syscall_table_size) {
1846 SyscallTableEntry* sys = &syscall_table[sysno];
1847 if (sys->before == NULL)
1848 return NULL; /* no entry */
1849 else
1850 return sys;
1851 }
1852
1853 /* Can't find a wrapper */
1854 return NULL;
1855 }
1856
1857 #endif // defined(VGP_x86_linux)
1858
1859 /*--------------------------------------------------------------------*/
1860 /*--- end ---*/
1861 /*--------------------------------------------------------------------*/
1862