1
2 /*--------------------------------------------------------------------*/
3 /*--- Platform-specific syscalls stuff. syswrap-x86-linux.c ---*/
4 /*--------------------------------------------------------------------*/
5
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
9
10 Copyright (C) 2000-2015 Nicholas Nethercote
11 njn@valgrind.org
12
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
17
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 02111-1307, USA.
27
28 The GNU General Public License is contained in the file COPYING.
29 */
30
31 #if defined(VGP_x86_linux)
32
33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
34 in interrupted_syscall() need to be reviewed. They don't seem
35 to assign the shadow state.
36 */
37
38 #include "pub_core_basics.h"
39 #include "pub_core_vki.h"
40 #include "pub_core_vkiscnums.h"
41 #include "pub_core_threadstate.h"
42 #include "pub_core_aspacemgr.h"
43 #include "pub_core_debuglog.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcprint.h"
47 #include "pub_core_libcproc.h"
48 #include "pub_core_libcsignal.h"
49 #include "pub_core_mallocfree.h"
50 #include "pub_core_options.h"
51 #include "pub_core_scheduler.h"
52 #include "pub_core_sigframe.h" // For VG_(sigframe_destroy)()
53 #include "pub_core_signals.h"
54 #include "pub_core_syscall.h"
55 #include "pub_core_syswrap.h"
56 #include "pub_core_tooliface.h"
57
58 #include "priv_types_n_macros.h"
59 #include "priv_syswrap-generic.h" /* for decls of generic wrappers */
60 #include "priv_syswrap-linux.h" /* for decls of linux-ish wrappers */
61 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
62 #include "priv_syswrap-main.h"
63
64
65 /* ---------------------------------------------------------------------
66 clone() handling
67 ------------------------------------------------------------------ */
68
69 /* Call f(arg1), but first switch stacks, using 'stack' as the new
70 stack, and use 'retaddr' as f's return-to address. Also, clear all
71 the integer registers before entering f.*/
72 __attribute__((noreturn))
73 void ML_(call_on_new_stack_0_1) ( Addr stack,
74 Addr retaddr,
75 void (*f)(Word),
76 Word arg1 );
77 // 4(%esp) == stack
78 // 8(%esp) == retaddr
79 // 12(%esp) == f
80 // 16(%esp) == arg1
81 asm(
82 ".text\n"
83 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
84 "vgModuleLocal_call_on_new_stack_0_1:\n"
85 " movl %esp, %esi\n" // remember old stack pointer
86 " movl 4(%esi), %esp\n" // set stack, assume %esp is now 16-byte aligned
87 " subl $12, %esp\n" // skip 12 bytes
88 " pushl 16(%esi)\n" // arg1 to stack, %esp is 16-byte aligned
89 " pushl 8(%esi)\n" // retaddr to stack
90 " pushl 12(%esi)\n" // f to stack
91 " movl $0, %eax\n" // zero all GP regs
92 " movl $0, %ebx\n"
93 " movl $0, %ecx\n"
94 " movl $0, %edx\n"
95 " movl $0, %esi\n"
96 " movl $0, %edi\n"
97 " movl $0, %ebp\n"
98 " ret\n" // jump to f
99 " ud2\n" // should never get here
100 ".previous\n"
101 );
102
103
104 /*
105 Perform a clone system call. clone is strange because it has
106 fork()-like return-twice semantics, so it needs special
107 handling here.
108
109 Upon entry, we have:
110
111 int (fn)(void*) in 0+FSZ(%esp)
112 void* child_stack in 4+FSZ(%esp)
113 int flags in 8+FSZ(%esp)
114 void* arg in 12+FSZ(%esp)
115 pid_t* child_tid in 16+FSZ(%esp)
116 pid_t* parent_tid in 20+FSZ(%esp)
117 void* tls_ptr in 24+FSZ(%esp)
118
119 System call requires:
120
121 int $__NR_clone in %eax
122 int flags in %ebx
123 void* child_stack in %ecx
124 pid_t* parent_tid in %edx
125 pid_t* child_tid in %edi
126 void* tls_ptr in %esi
127
128 Returns an Int encoded in the linux-x86 way, not a SysRes.
129 */
130 #define FSZ "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
131 #define __NR_CLONE VG_STRINGIFY(__NR_clone)
132 #define __NR_EXIT VG_STRINGIFY(__NR_exit)
133
134 extern
135 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
136 void* stack,
137 Int flags,
138 void* arg,
139 Int* child_tid,
140 Int* parent_tid,
141 vki_modify_ldt_t * );
142 asm(
143 ".text\n"
144 ".globl do_syscall_clone_x86_linux\n"
145 "do_syscall_clone_x86_linux:\n"
146 " push %ebx\n"
147 " push %edi\n"
148 " push %esi\n"
149
150 /* set up child stack with function and arg */
151 " movl 4+"FSZ"(%esp), %ecx\n" /* syscall arg2: child stack */
152 " movl 12+"FSZ"(%esp), %ebx\n" /* fn arg */
153 " movl 0+"FSZ"(%esp), %eax\n" /* fn */
154 " andl $-16, %ecx\n" /* align to 16-byte */
155 " lea -20(%ecx), %ecx\n" /* allocate 16*n+4 bytes on stack */
156 " movl %ebx, 4(%ecx)\n" /* fn arg */
157 " movl %eax, 0(%ecx)\n" /* fn */
158
159 /* get other args to clone */
160 " movl 8+"FSZ"(%esp), %ebx\n" /* syscall arg1: flags */
161 " movl 20+"FSZ"(%esp), %edx\n" /* syscall arg3: parent tid * */
162 " movl 16+"FSZ"(%esp), %edi\n" /* syscall arg5: child tid * */
163 " movl 24+"FSZ"(%esp), %esi\n" /* syscall arg4: tls_ptr * */
164 " movl $"__NR_CLONE", %eax\n"
165 " int $0x80\n" /* clone() */
166 " testl %eax, %eax\n" /* child if retval == 0 */
167 " jnz 1f\n"
168
169 /* CHILD - call thread function */
170 " popl %eax\n" /* child %esp is 16-byte aligned */
171 " call *%eax\n" /* call fn */
172
173 /* exit with result */
174 " movl %eax, %ebx\n" /* arg1: return value from fn */
175 " movl $"__NR_EXIT", %eax\n"
176 " int $0x80\n"
177
178 /* Hm, exit returned */
179 " ud2\n"
180
181 "1:\n" /* PARENT or ERROR */
182 " pop %esi\n"
183 " pop %edi\n"
184 " pop %ebx\n"
185 " ret\n"
186 ".previous\n"
187 );
188
189 #undef FSZ
190 #undef __NR_CLONE
191 #undef __NR_EXIT
192
193
194 // forward declarations
195 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
196 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
197
198 /*
199 When a client clones, we need to keep track of the new thread. This means:
200 1. allocate a ThreadId+ThreadState+stack for the thread
201
202 2. initialize the thread's new VCPU state
203
204 3. create the thread using the same args as the client requested,
205 but using the scheduler entrypoint for EIP, and a separate stack
206 for ESP.
207 */
do_clone(ThreadId ptid,UInt flags,Addr esp,Int * parent_tidptr,Int * child_tidptr,vki_modify_ldt_t * tlsinfo)208 static SysRes do_clone ( ThreadId ptid,
209 UInt flags, Addr esp,
210 Int* parent_tidptr,
211 Int* child_tidptr,
212 vki_modify_ldt_t *tlsinfo)
213 {
214 static const Bool debug = False;
215
216 ThreadId ctid = VG_(alloc_ThreadState)();
217 ThreadState* ptst = VG_(get_ThreadState)(ptid);
218 ThreadState* ctst = VG_(get_ThreadState)(ctid);
219 UWord* stack;
220 SysRes res;
221 Int eax;
222 vki_sigset_t blockall, savedmask;
223
224 VG_(sigfillset)(&blockall);
225
226 vg_assert(VG_(is_running_thread)(ptid));
227 vg_assert(VG_(is_valid_tid)(ctid));
228
229 stack = (UWord*)ML_(allocstack)(ctid);
230 if (stack == NULL) {
231 res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
232 goto out;
233 }
234
235 /* Copy register state
236
237 Both parent and child return to the same place, and the code
238 following the clone syscall works out which is which, so we
239 don't need to worry about it.
240
241 The parent gets the child's new tid returned from clone, but the
242 child gets 0.
243
244 If the clone call specifies a NULL esp for the new thread, then
245 it actually gets a copy of the parent's esp.
246 */
247 /* Note: the clone call done by the Quadrics Elan3 driver specifies
248 clone flags of 0xF00, and it seems to rely on the assumption
249 that the child inherits a copy of the parent's GDT.
250 setup_child takes care of setting that up. */
251 setup_child( &ctst->arch, &ptst->arch, True );
252
253 /* Make sys_clone appear to have returned Success(0) in the
254 child. */
255 ctst->arch.vex.guest_EAX = 0;
256
257 if (esp != 0)
258 ctst->arch.vex.guest_ESP = esp;
259
260 ctst->os_state.parent = ptid;
261
262 /* inherit signal mask */
263 ctst->sig_mask = ptst->sig_mask;
264 ctst->tmp_sig_mask = ptst->sig_mask;
265
266 /* Start the child with its threadgroup being the same as the
267 parent's. This is so that any exit_group calls that happen
268 after the child is created but before it sets its
269 os_state.threadgroup field for real (in thread_wrapper in
270 syswrap-linux.c), really kill the new thread. a.k.a this avoids
271 a race condition in which the thread is unkillable (via
272 exit_group) because its threadgroup is not set. The race window
273 is probably only a few hundred or a few thousand cycles long.
274 See #226116. */
275 ctst->os_state.threadgroup = ptst->os_state.threadgroup;
276
277 ML_(guess_and_register_stack) (esp, ctst);
278
279 /* Assume the clone will succeed, and tell any tool that wants to
280 know that this thread has come into existence. We cannot defer
281 it beyond this point because sys_set_thread_area, just below,
282 causes tCheck to assert by making references to the new ThreadId
283 if we don't state the new thread exists prior to that point.
284 If the clone fails, we'll send out a ll_exit notification for it
285 at the out: label below, to clean up. */
286 vg_assert(VG_(owns_BigLock_LL)(ptid));
287 VG_TRACK ( pre_thread_ll_create, ptid, ctid );
288
289 if (flags & VKI_CLONE_SETTLS) {
290 if (debug)
291 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%u "
292 "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
293 tlsinfo, tlsinfo->entry_number,
294 tlsinfo->base_addr, tlsinfo->limit,
295 ptst->arch.vex.guest_ESP,
296 ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
297 res = sys_set_thread_area(ctid, tlsinfo);
298 if (sr_isError(res))
299 goto out;
300 }
301
302 flags &= ~VKI_CLONE_SETTLS;
303
304 /* start the thread with everything blocked */
305 VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
306
307 /* Create the new thread */
308 eax = do_syscall_clone_x86_linux(
309 ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
310 child_tidptr, parent_tidptr, NULL
311 );
312 res = VG_(mk_SysRes_x86_linux)( eax );
313
314 VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
315
316 out:
317 if (sr_isError(res)) {
318 /* clone failed */
319 VG_(cleanup_thread)(&ctst->arch);
320 ctst->status = VgTs_Empty;
321 /* oops. Better tell the tool the thread exited in a hurry :-) */
322 VG_TRACK( pre_thread_ll_exit, ctid );
323 }
324
325 return res;
326 }
327
328
329 /* ---------------------------------------------------------------------
330 LDT/GDT simulation
331 ------------------------------------------------------------------ */
332
333 /* Details of the LDT simulation
334 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335
336 When a program runs natively, the linux kernel allows each *thread*
337 in it to have its own LDT. Almost all programs never do this --
338 it's wildly unportable, after all -- and so the kernel never
339 allocates the structure, which is just as well as an LDT occupies
340 64k of memory (8192 entries of size 8 bytes).
341
342 A thread may choose to modify its LDT entries, by doing the
343 __NR_modify_ldt syscall. In such a situation the kernel will then
344 allocate an LDT structure for it. Each LDT entry is basically a
345 (base, limit) pair. A virtual address in a specific segment is
346 translated to a linear address by adding the segment's base value.
347 In addition, the virtual address must not exceed the limit value.
348
349 To use an LDT entry, a thread loads one of the segment registers
350 (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
351 .. 8191) it wants to use. In fact, the required value is (index <<
352 3) + 7, but that's not important right now. Any normal instruction
353 which includes an addressing mode can then be made relative to that
354 LDT entry by prefixing the insn with a so-called segment-override
355 prefix, a byte which indicates which of the 6 segment registers
356 holds the LDT index.
357
358 Now, a key constraint is that valgrind's address checks operate in
359 terms of linear addresses. So we have to explicitly translate
360 virtual addrs into linear addrs, and that means doing a complete
361 LDT simulation.
362
363 Calls to modify_ldt are intercepted. For each thread, we maintain
364 an LDT (with the same normally-never-allocated optimisation that
365 the kernel does). This is updated as expected via calls to
366 modify_ldt.
367
368 When a thread does an amode calculation involving a segment
369 override prefix, the relevant LDT entry for the thread is
370 consulted. It all works.
371
372 There is a conceptual problem, which appears when switching back to
373 native execution, either temporarily to pass syscalls to the
374 kernel, or permanently, when debugging V. Problem at such points
375 is that it's pretty pointless to copy the simulated machine's
376 segment registers to the real machine, because we'd also need to
377 copy the simulated LDT into the real one, and that's prohibitively
378 expensive.
379
380 Fortunately it looks like no syscalls rely on the segment regs or
381 LDT being correct, so we can get away with it. Apart from that the
382 simulation is pretty straightforward. All 6 segment registers are
383 tracked, although only %ds, %es, %fs and %gs are allowed as
384 prefixes. Perhaps it could be restricted even more than that -- I
385 am not sure what is and isn't allowed in user-mode.
386 */
387
388 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
389 the Linux kernel's logic (cut-n-paste of code in
390 linux/kernel/ldt.c). */
391
392 static
translate_to_hw_format(vki_modify_ldt_t * inn,VexGuestX86SegDescr * out,Int oldmode)393 void translate_to_hw_format ( /* IN */ vki_modify_ldt_t* inn,
394 /* OUT */ VexGuestX86SegDescr* out,
395 Int oldmode )
396 {
397 UInt entry_1, entry_2;
398 vg_assert(8 == sizeof(VexGuestX86SegDescr));
399
400 if (0)
401 VG_(printf)("translate_to_hw_format: base %#lx, limit %u\n",
402 inn->base_addr, inn->limit );
403
404 /* Allow LDTs to be cleared by the user. */
405 if (inn->base_addr == 0 && inn->limit == 0) {
406 if (oldmode ||
407 (inn->contents == 0 &&
408 inn->read_exec_only == 1 &&
409 inn->seg_32bit == 0 &&
410 inn->limit_in_pages == 0 &&
411 inn->seg_not_present == 1 &&
412 inn->useable == 0 )) {
413 entry_1 = 0;
414 entry_2 = 0;
415 goto install;
416 }
417 }
418
419 entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
420 (inn->limit & 0x0ffff);
421 entry_2 = (inn->base_addr & 0xff000000) |
422 ((inn->base_addr & 0x00ff0000) >> 16) |
423 (inn->limit & 0xf0000) |
424 ((inn->read_exec_only ^ 1) << 9) |
425 (inn->contents << 10) |
426 ((inn->seg_not_present ^ 1) << 15) |
427 (inn->seg_32bit << 22) |
428 (inn->limit_in_pages << 23) |
429 0x7000;
430 if (!oldmode)
431 entry_2 |= (inn->useable << 20);
432
433 /* Install the new entry ... */
434 install:
435 out->LdtEnt.Words.word1 = entry_1;
436 out->LdtEnt.Words.word2 = entry_2;
437 }
438
439 /* Create a zeroed-out GDT. */
alloc_zeroed_x86_GDT(void)440 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
441 {
442 Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
443 return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
444 }
445
446 /* Create a zeroed-out LDT. */
alloc_zeroed_x86_LDT(void)447 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
448 {
449 Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
450 return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
451 }
452
453 /* Free up an LDT or GDT allocated by the above fns. */
free_LDT_or_GDT(VexGuestX86SegDescr * dt)454 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
455 {
456 vg_assert(dt);
457 VG_(free)(dt);
458 }
459
460 /* Copy contents between two existing LDTs. */
copy_LDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)461 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
462 VexGuestX86SegDescr* dst )
463 {
464 Int i;
465 vg_assert(src);
466 vg_assert(dst);
467 for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
468 dst[i] = src[i];
469 }
470
471 /* Copy contents between two existing GDTs. */
copy_GDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)472 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
473 VexGuestX86SegDescr* dst )
474 {
475 Int i;
476 vg_assert(src);
477 vg_assert(dst);
478 for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
479 dst[i] = src[i];
480 }
481
482 /* Free this thread's DTs, if it has any. */
deallocate_LGDTs_for_thread(VexGuestX86State * vex)483 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
484 {
485 vg_assert(sizeof(HWord) == sizeof(void*));
486
487 if (0)
488 VG_(printf)("deallocate_LGDTs_for_thread: "
489 "ldt = 0x%lx, gdt = 0x%lx\n",
490 vex->guest_LDT, vex->guest_GDT );
491
492 if (vex->guest_LDT != (HWord)NULL) {
493 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
494 vex->guest_LDT = (HWord)NULL;
495 }
496
497 if (vex->guest_GDT != (HWord)NULL) {
498 free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
499 vex->guest_GDT = (HWord)NULL;
500 }
501 }
502
503
504 /*
505 * linux/kernel/ldt.c
506 *
507 * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
508 * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
509 */
510
511 /*
512 * read_ldt() is not really atomic - this is not a problem since
513 * synchronization of reads and writes done to the LDT has to be
514 * assured by user-space anyway. Writes are atomic, to protect
515 * the security checks done on new descriptors.
516 */
517 static
read_ldt(ThreadId tid,UChar * ptr,UInt bytecount)518 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
519 {
520 SysRes res;
521 UInt i, size;
522 UChar* ldt;
523
524 if (0)
525 VG_(printf)("read_ldt: tid = %u, ptr = %p, bytecount = %u\n",
526 tid, ptr, bytecount );
527
528 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
529 vg_assert(8 == sizeof(VexGuestX86SegDescr));
530
531 ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
532 res = VG_(mk_SysRes_Success)( 0 );
533 if (ldt == NULL)
534 /* LDT not allocated, meaning all entries are null */
535 goto out;
536
537 size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
538 if (size > bytecount)
539 size = bytecount;
540
541 res = VG_(mk_SysRes_Success)( size );
542 for (i = 0; i < size; i++)
543 ptr[i] = ldt[i];
544
545 out:
546 return res;
547 }
548
549
550 static
write_ldt(ThreadId tid,void * ptr,UInt bytecount,Int oldmode)551 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
552 {
553 SysRes res;
554 VexGuestX86SegDescr* ldt;
555 vki_modify_ldt_t* ldt_info;
556
557 if (0)
558 VG_(printf)("write_ldt: tid = %u, ptr = %p, "
559 "bytecount = %u, oldmode = %d\n",
560 tid, ptr, bytecount, oldmode );
561
562 vg_assert(8 == sizeof(VexGuestX86SegDescr));
563 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
564
565 ldt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
566 ldt_info = (vki_modify_ldt_t*)ptr;
567
568 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
569 if (bytecount != sizeof(vki_modify_ldt_t))
570 goto out;
571
572 res = VG_(mk_SysRes_Error)( VKI_EINVAL );
573 if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
574 goto out;
575 if (ldt_info->contents == 3) {
576 if (oldmode)
577 goto out;
578 if (ldt_info->seg_not_present == 0)
579 goto out;
580 }
581
582 /* If this thread doesn't have an LDT, we'd better allocate it
583 now. */
584 if (ldt == NULL) {
585 ldt = alloc_zeroed_x86_LDT();
586 VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
587 }
588
589 /* Install the new entry ... */
590 translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
591 res = VG_(mk_SysRes_Success)( 0 );
592
593 out:
594 return res;
595 }
596
597
sys_modify_ldt(ThreadId tid,Int func,void * ptr,UInt bytecount)598 static SysRes sys_modify_ldt ( ThreadId tid,
599 Int func, void* ptr, UInt bytecount )
600 {
601 SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
602
603 switch (func) {
604 case 0:
605 ret = read_ldt(tid, ptr, bytecount);
606 break;
607 case 1:
608 ret = write_ldt(tid, ptr, bytecount, 1);
609 break;
610 case 2:
611 VG_(unimplemented)("sys_modify_ldt: func == 2");
612 /* god knows what this is about */
613 /* ret = read_default_ldt(ptr, bytecount); */
614 /*UNREACHED*/
615 break;
616 case 0x11:
617 ret = write_ldt(tid, ptr, bytecount, 0);
618 break;
619 }
620 return ret;
621 }
622
623
sys_set_thread_area(ThreadId tid,vki_modify_ldt_t * info)624 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
625 {
626 Int idx;
627 VexGuestX86SegDescr* gdt;
628
629 vg_assert(8 == sizeof(VexGuestX86SegDescr));
630 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
631
632 if (info == NULL)
633 return VG_(mk_SysRes_Error)( VKI_EFAULT );
634
635 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
636
637 /* If the thread doesn't have a GDT, allocate it now. */
638 if (!gdt) {
639 gdt = alloc_zeroed_x86_GDT();
640 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
641 }
642
643 idx = info->entry_number;
644
645 if (idx == -1) {
646 /* Find and use the first free entry. Don't allocate entry
647 zero, because the hardware will never do that, and apparently
648 doing so confuses some code (perhaps stuff running on
649 Wine). */
650 for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
651 if (gdt[idx].LdtEnt.Words.word1 == 0
652 && gdt[idx].LdtEnt.Words.word2 == 0)
653 break;
654 }
655
656 if (idx == VEX_GUEST_X86_GDT_NENT)
657 return VG_(mk_SysRes_Error)( VKI_ESRCH );
658 } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
659 /* Similarly, reject attempts to use GDT[0]. */
660 return VG_(mk_SysRes_Error)( VKI_EINVAL );
661 }
662
663 translate_to_hw_format(info, &gdt[idx], 0);
664
665 VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
666 "set_thread_area(info->entry)",
667 (Addr) & info->entry_number, sizeof(unsigned int) );
668 info->entry_number = idx;
669 VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
670 (Addr) & info->entry_number, sizeof(unsigned int) );
671
672 return VG_(mk_SysRes_Success)( 0 );
673 }
674
675
sys_get_thread_area(ThreadId tid,vki_modify_ldt_t * info)676 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
677 {
678 Int idx;
679 VexGuestX86SegDescr* gdt;
680
681 vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
682 vg_assert(8 == sizeof(VexGuestX86SegDescr));
683
684 if (info == NULL)
685 return VG_(mk_SysRes_Error)( VKI_EFAULT );
686
687 idx = info->entry_number;
688
689 if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
690 return VG_(mk_SysRes_Error)( VKI_EINVAL );
691
692 gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
693
694 /* If the thread doesn't have a GDT, allocate it now. */
695 if (!gdt) {
696 gdt = alloc_zeroed_x86_GDT();
697 VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
698 }
699
700 info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
701 ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
702 gdt[idx].LdtEnt.Bits.BaseLow;
703 info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
704 gdt[idx].LdtEnt.Bits.LimitLow;
705 info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
706 info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
707 info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
708 info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
709 info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
710 info->useable = gdt[idx].LdtEnt.Bits.Sys;
711 info->reserved = 0;
712
713 return VG_(mk_SysRes_Success)( 0 );
714 }
715
716 /* ---------------------------------------------------------------------
717 More thread stuff
718 ------------------------------------------------------------------ */
719
VG_(cleanup_thread)720 void VG_(cleanup_thread) ( ThreadArchState* arch )
721 {
722 /* Release arch-specific resources held by this thread. */
723 /* On x86, we have to dump the LDT and GDT. */
724 deallocate_LGDTs_for_thread( &arch->vex );
725 }
726
727
setup_child(ThreadArchState * child,ThreadArchState * parent,Bool inherit_parents_GDT)728 static void setup_child ( /*OUT*/ ThreadArchState *child,
729 /*IN*/ ThreadArchState *parent,
730 Bool inherit_parents_GDT )
731 {
732 /* We inherit our parent's guest state. */
733 child->vex = parent->vex;
734 child->vex_shadow1 = parent->vex_shadow1;
735 child->vex_shadow2 = parent->vex_shadow2;
736
737 /* We inherit our parent's LDT. */
738 if (parent->vex.guest_LDT == (HWord)NULL) {
739 /* We hope this is the common case. */
740 child->vex.guest_LDT = (HWord)NULL;
741 } else {
742 /* No luck .. we have to take a copy of the parent's. */
743 child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
744 copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
745 (VexGuestX86SegDescr*)child->vex.guest_LDT );
746 }
747
748 /* Either we start with an empty GDT (the usual case) or inherit a
749 copy of our parents' one (Quadrics Elan3 driver -style clone
750 only). */
751 child->vex.guest_GDT = (HWord)NULL;
752
753 if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
754 child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
755 copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
756 (VexGuestX86SegDescr*)child->vex.guest_GDT );
757 }
758 }
759
760
761 /* ---------------------------------------------------------------------
762 PRE/POST wrappers for x86/Linux-specific syscalls
763 ------------------------------------------------------------------ */
764
765 #define PRE(name) DEFN_PRE_TEMPLATE(x86_linux, name)
766 #define POST(name) DEFN_POST_TEMPLATE(x86_linux, name)
767
768 /* Add prototypes for the wrappers declared here, so that gcc doesn't
769 harass us for not having prototypes. Really this is a kludge --
770 the right thing to do is to make these wrappers 'static' since they
771 aren't visible outside this file, but that requires even more macro
772 magic. */
773 DECL_TEMPLATE(x86_linux, sys_stat64);
774 DECL_TEMPLATE(x86_linux, sys_fstatat64);
775 DECL_TEMPLATE(x86_linux, sys_fstat64);
776 DECL_TEMPLATE(x86_linux, sys_lstat64);
777 DECL_TEMPLATE(x86_linux, sys_clone);
778 DECL_TEMPLATE(x86_linux, old_mmap);
779 DECL_TEMPLATE(x86_linux, sys_mmap2);
780 DECL_TEMPLATE(x86_linux, sys_sigreturn);
781 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
782 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
783 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
784 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
785 DECL_TEMPLATE(x86_linux, sys_ptrace);
786 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
787 DECL_TEMPLATE(x86_linux, old_select);
788 DECL_TEMPLATE(x86_linux, sys_vm86old);
789 DECL_TEMPLATE(x86_linux, sys_vm86);
790 DECL_TEMPLATE(x86_linux, sys_syscall223);
791
PRE(old_select)792 PRE(old_select)
793 {
794 /* struct sel_arg_struct {
795 unsigned long n;
796 fd_set *inp, *outp, *exp;
797 struct timeval *tvp;
798 };
799 */
800 PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
801 PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
802 *flags |= SfMayBlock;
803 {
804 UInt* arg_struct = (UInt*)ARG1;
805 UInt a1, a2, a3, a4, a5;
806
807 a1 = arg_struct[0];
808 a2 = arg_struct[1];
809 a3 = arg_struct[2];
810 a4 = arg_struct[3];
811 a5 = arg_struct[4];
812
813 PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", (Int)a1,a2,a3,a4,a5);
814 if (a2 != (Addr)NULL)
815 PRE_MEM_READ( "old_select(readfds)", a2, a1/8 /* __FD_SETSIZE/8 */ );
816 if (a3 != (Addr)NULL)
817 PRE_MEM_READ( "old_select(writefds)", a3, a1/8 /* __FD_SETSIZE/8 */ );
818 if (a4 != (Addr)NULL)
819 PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
820 if (a5 != (Addr)NULL)
821 PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
822 }
823 }
824
PRE(sys_clone)825 PRE(sys_clone)
826 {
827 UInt cloneflags;
828 Bool badarg = False;
829
830 PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
831 PRE_REG_READ2(int, "clone",
832 unsigned long, flags,
833 void *, child_stack);
834
835 if (ARG1 & VKI_CLONE_PARENT_SETTID) {
836 if (VG_(tdict).track_pre_reg_read) {
837 PRA3("clone", int *, parent_tidptr);
838 }
839 PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
840 if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
841 VKI_PROT_WRITE)) {
842 badarg = True;
843 }
844 }
845 if (ARG1 & VKI_CLONE_SETTLS) {
846 if (VG_(tdict).track_pre_reg_read) {
847 PRA4("clone", vki_modify_ldt_t *, tlsinfo);
848 }
849 PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
850 if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
851 VKI_PROT_READ)) {
852 badarg = True;
853 }
854 }
855 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
856 if (VG_(tdict).track_pre_reg_read) {
857 PRA5("clone", int *, child_tidptr);
858 }
859 PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
860 if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
861 VKI_PROT_WRITE)) {
862 badarg = True;
863 }
864 }
865
866 if (badarg) {
867 SET_STATUS_Failure( VKI_EFAULT );
868 return;
869 }
870
871 cloneflags = ARG1;
872
873 if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
874 SET_STATUS_Failure( VKI_EINVAL );
875 return;
876 }
877
878 /* Be ultra-paranoid and filter out any clone-variants we don't understand:
879 - ??? specifies clone flags of 0x100011
880 - ??? specifies clone flags of 0x1200011.
881 - NPTL specifies clone flags of 0x7D0F00.
882 - The Quadrics Elan3 driver specifies clone flags of 0xF00.
883 - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
884 Everything else is rejected.
885 */
886 if (
887 1 ||
888 /* 11 Nov 05: for the time being, disable this ultra-paranoia.
889 The switch below probably does a good enough job. */
890 (cloneflags == 0x100011 || cloneflags == 0x1200011
891 || cloneflags == 0x7D0F00
892 || cloneflags == 0x790F00
893 || cloneflags == 0x3D0F00
894 || cloneflags == 0x410F00
895 || cloneflags == 0xF00
896 || cloneflags == 0xF21)) {
897 /* OK */
898 }
899 else {
900 /* Nah. We don't like it. Go away. */
901 goto reject;
902 }
903
904 /* Only look at the flags we really care about */
905 switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
906 | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
907 case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
908 /* thread creation */
909 SET_STATUS_from_SysRes(
910 do_clone(tid,
911 ARG1, /* flags */
912 (Addr)ARG2, /* child ESP */
913 (Int *)ARG3, /* parent_tidptr */
914 (Int *)ARG5, /* child_tidptr */
915 (vki_modify_ldt_t *)ARG4)); /* set_tls */
916 break;
917
918 case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
919 /* FALLTHROUGH - assume vfork == fork */
920 cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
921
922 case 0: /* plain fork */
923 SET_STATUS_from_SysRes(
924 ML_(do_fork_clone)(tid,
925 cloneflags, /* flags */
926 (Int *)ARG3, /* parent_tidptr */
927 (Int *)ARG5)); /* child_tidptr */
928 break;
929
930 default:
931 reject:
932 /* should we just ENOSYS? */
933 VG_(message)(Vg_UserMsg, "\n");
934 VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
935 VG_(message)(Vg_UserMsg, "\n");
936 VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
937 VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
938 VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
939 VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
940 VG_(unimplemented)
941 ("Valgrind does not support general clone().");
942 }
943
944 if (SUCCESS) {
945 if (ARG1 & VKI_CLONE_PARENT_SETTID)
946 POST_MEM_WRITE(ARG3, sizeof(Int));
947 if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
948 POST_MEM_WRITE(ARG5, sizeof(Int));
949
950 /* Thread creation was successful; let the child have the chance
951 to run */
952 *flags |= SfYieldAfter;
953 }
954 }
955
PRE(sys_sigreturn)956 PRE(sys_sigreturn)
957 {
958 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
959 an explanation of what follows. */
960
961 ThreadState* tst;
962 PRINT("sys_sigreturn ( )");
963
964 vg_assert(VG_(is_valid_tid)(tid));
965 vg_assert(tid >= 1 && tid < VG_N_THREADS);
966 vg_assert(VG_(is_running_thread)(tid));
967
968 /* Adjust esp to point to start of frame; skip back up over
969 sigreturn sequence's "popl %eax" and handler ret addr */
970 tst = VG_(get_ThreadState)(tid);
971 tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
972 /* XXX why does ESP change differ from rt_sigreturn case below? */
973
974 /* This is only so that the EIP is (might be) useful to report if
975 something goes wrong in the sigreturn */
976 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
977
978 /* Restore register state from frame and remove it */
979 VG_(sigframe_destroy)(tid, False);
980
981 /* Tell the driver not to update the guest state with the "result",
982 and set a bogus result to keep it happy. */
983 *flags |= SfNoWriteResult;
984 SET_STATUS_Success(0);
985
986 /* Check to see if any signals arose as a result of this. */
987 *flags |= SfPollAfter;
988 }
989
PRE(sys_rt_sigreturn)990 PRE(sys_rt_sigreturn)
991 {
992 /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
993 an explanation of what follows. */
994
995 ThreadState* tst;
996 PRINT("sys_rt_sigreturn ( )");
997
998 vg_assert(VG_(is_valid_tid)(tid));
999 vg_assert(tid >= 1 && tid < VG_N_THREADS);
1000 vg_assert(VG_(is_running_thread)(tid));
1001
1002 /* Adjust esp to point to start of frame; skip back up over handler
1003 ret addr */
1004 tst = VG_(get_ThreadState)(tid);
1005 tst->arch.vex.guest_ESP -= sizeof(Addr);
1006 /* XXX why does ESP change differ from sigreturn case above? */
1007
1008 /* This is only so that the EIP is (might be) useful to report if
1009 something goes wrong in the sigreturn */
1010 ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1011
1012 /* Restore register state from frame and remove it */
1013 VG_(sigframe_destroy)(tid, True);
1014
1015 /* Tell the driver not to update the guest state with the "result",
1016 and set a bogus result to keep it happy. */
1017 *flags |= SfNoWriteResult;
1018 SET_STATUS_Success(0);
1019
1020 /* Check to see if any signals arose as a result of this. */
1021 *flags |= SfPollAfter;
1022 }
1023
PRE(sys_modify_ldt)1024 PRE(sys_modify_ldt)
1025 {
1026 PRINT("sys_modify_ldt ( %ld, %#lx, %lu )", SARG1, ARG2, ARG3);
1027 PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1028 unsigned long, bytecount);
1029
1030 if (ARG1 == 0) {
1031 /* read the LDT into ptr */
1032 PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1033 }
1034 if (ARG1 == 1 || ARG1 == 0x11) {
1035 /* write the LDT with the entry pointed at by ptr */
1036 PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1037 }
1038 /* "do" the syscall ourselves; the kernel never sees it */
1039 SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1040
1041 if (ARG1 == 0 && SUCCESS && RES > 0) {
1042 POST_MEM_WRITE( ARG2, RES );
1043 }
1044 }
1045
PRE(sys_set_thread_area)1046 PRE(sys_set_thread_area)
1047 {
1048 PRINT("sys_set_thread_area ( %#lx )", ARG1);
1049 PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1050 PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1051
1052 /* "do" the syscall ourselves; the kernel never sees it */
1053 SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1054 }
1055
PRE(sys_get_thread_area)1056 PRE(sys_get_thread_area)
1057 {
1058 PRINT("sys_get_thread_area ( %#lx )", ARG1);
1059 PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1060 PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1061
1062 /* "do" the syscall ourselves; the kernel never sees it */
1063 SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1064
1065 if (SUCCESS) {
1066 POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1067 }
1068 }
1069
1070 // Parts of this are x86-specific, but the *PEEK* cases are generic.
1071 //
1072 // ARG3 is only used for pointers into the traced process's address
1073 // space and for offsets into the traced process's struct
1074 // user_regs_struct. It is never a pointer into this process's memory
1075 // space, and we should therefore not check anything it points to.
PRE(sys_ptrace)1076 PRE(sys_ptrace)
1077 {
1078 PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
1079 PRE_REG_READ4(int, "ptrace",
1080 long, request, long, pid, unsigned long, addr,
1081 unsigned long, data);
1082 switch (ARG1) {
1083 case VKI_PTRACE_PEEKTEXT:
1084 case VKI_PTRACE_PEEKDATA:
1085 case VKI_PTRACE_PEEKUSR:
1086 PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1087 sizeof (long));
1088 break;
1089 case VKI_PTRACE_GETREGS:
1090 PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1091 sizeof (struct vki_user_regs_struct));
1092 break;
1093 case VKI_PTRACE_GETFPREGS:
1094 PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1095 sizeof (struct vki_user_i387_struct));
1096 break;
1097 case VKI_PTRACE_GETFPXREGS:
1098 PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1099 sizeof(struct vki_user_fxsr_struct) );
1100 break;
1101 case VKI_PTRACE_GET_THREAD_AREA:
1102 PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1103 sizeof(struct vki_user_desc) );
1104 break;
1105 case VKI_PTRACE_SETREGS:
1106 PRE_MEM_READ( "ptrace(setregs)", ARG4,
1107 sizeof (struct vki_user_regs_struct));
1108 break;
1109 case VKI_PTRACE_SETFPREGS:
1110 PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1111 sizeof (struct vki_user_i387_struct));
1112 break;
1113 case VKI_PTRACE_SETFPXREGS:
1114 PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1115 sizeof(struct vki_user_fxsr_struct) );
1116 break;
1117 case VKI_PTRACE_SET_THREAD_AREA:
1118 PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1119 sizeof(struct vki_user_desc) );
1120 break;
1121 case VKI_PTRACE_GETEVENTMSG:
1122 PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1123 break;
1124 case VKI_PTRACE_GETSIGINFO:
1125 PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1126 break;
1127 case VKI_PTRACE_SETSIGINFO:
1128 PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1129 break;
1130 case VKI_PTRACE_GETREGSET:
1131 ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1132 break;
1133 case VKI_PTRACE_SETREGSET:
1134 ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1135 break;
1136 default:
1137 break;
1138 }
1139 }
1140
POST(sys_ptrace)1141 POST(sys_ptrace)
1142 {
1143 switch (ARG1) {
1144 case VKI_PTRACE_PEEKTEXT:
1145 case VKI_PTRACE_PEEKDATA:
1146 case VKI_PTRACE_PEEKUSR:
1147 POST_MEM_WRITE( ARG4, sizeof (long));
1148 break;
1149 case VKI_PTRACE_GETREGS:
1150 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1151 break;
1152 case VKI_PTRACE_GETFPREGS:
1153 POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1154 break;
1155 case VKI_PTRACE_GETFPXREGS:
1156 POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1157 break;
1158 case VKI_PTRACE_GET_THREAD_AREA:
1159 POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1160 break;
1161 case VKI_PTRACE_GETEVENTMSG:
1162 POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1163 break;
1164 case VKI_PTRACE_GETSIGINFO:
1165 /* XXX: This is a simplification. Different parts of the
1166 * siginfo_t are valid depending on the type of signal.
1167 */
1168 POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1169 break;
1170 case VKI_PTRACE_GETREGSET:
1171 ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1172 break;
1173 default:
1174 break;
1175 }
1176 }
1177
PRE(old_mmap)1178 PRE(old_mmap)
1179 {
1180 /* struct mmap_arg_struct {
1181 unsigned long addr;
1182 unsigned long len;
1183 unsigned long prot;
1184 unsigned long flags;
1185 unsigned long fd;
1186 unsigned long offset;
1187 }; */
1188 UWord a1, a2, a3, a4, a5, a6;
1189 SysRes r;
1190
1191 UWord* args = (UWord*)ARG1;
1192 PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1193 PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1194
1195 a1 = args[1-1];
1196 a2 = args[2-1];
1197 a3 = args[3-1];
1198 a4 = args[4-1];
1199 a5 = args[5-1];
1200 a6 = args[6-1];
1201
1202 PRINT("old_mmap ( %#lx, %lu, %ld, %ld, %ld, %ld )",
1203 a1, a2, (Word)a3, (Word)a4, (Word)a5, (Word)a6 );
1204
1205 r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1206 SET_STATUS_from_SysRes(r);
1207 }
1208
PRE(sys_mmap2)1209 PRE(sys_mmap2)
1210 {
1211 SysRes r;
1212
1213 // Exactly like old_mmap() except:
1214 // - all 6 args are passed in regs, rather than in a memory-block.
1215 // - the file offset is specified in pagesize units rather than bytes,
1216 // so that it can be used for files bigger than 2^32 bytes.
1217 // pagesize or 4K-size units in offset? For ppc32/64-linux, this is
1218 // 4K-sized. Assert that the page size is 4K here for safety.
1219 vg_assert(VKI_PAGE_SIZE == 4096);
1220 PRINT("sys_mmap2 ( %#lx, %lu, %lu, %lu, %lu, %lu )",
1221 ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
1222 PRE_REG_READ6(long, "mmap2",
1223 unsigned long, start, unsigned long, length,
1224 unsigned long, prot, unsigned long, flags,
1225 unsigned long, fd, unsigned long, offset);
1226
1227 r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1228 4096 * (Off64T)ARG6 );
1229 SET_STATUS_from_SysRes(r);
1230 }
1231
1232 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1233 // applicable to every architecture -- I think only to 32-bit archs.
1234 // We're going to need something like linux/core_os32.h for such
1235 // things, eventually, I think. --njn
PRE(sys_lstat64)1236 PRE(sys_lstat64)
1237 {
1238 PRINT("sys_lstat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1239 PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1240 PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1241 PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1242 }
1243
POST(sys_lstat64)1244 POST(sys_lstat64)
1245 {
1246 vg_assert(SUCCESS);
1247 if (RES == 0) {
1248 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1249 }
1250 }
1251
PRE(sys_stat64)1252 PRE(sys_stat64)
1253 {
1254 FUSE_COMPATIBLE_MAY_BLOCK();
1255 PRINT("sys_stat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1256 PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1257 PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1258 PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1259 }
1260
POST(sys_stat64)1261 POST(sys_stat64)
1262 {
1263 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1264 }
1265
PRE(sys_fstatat64)1266 PRE(sys_fstatat64)
1267 {
1268 FUSE_COMPATIBLE_MAY_BLOCK();
1269 // ARG4 = int flags; Flags are or'ed together, therefore writing them
1270 // as a hex constant is more meaningful.
1271 PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx, %#lx )",
1272 SARG1, ARG2, (HChar*)ARG2, ARG3, ARG4);
1273 PRE_REG_READ4(long, "fstatat64",
1274 int, dfd, char *, file_name, struct stat64 *, buf, int, flags);
1275 PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1276 PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1277 }
1278
POST(sys_fstatat64)1279 POST(sys_fstatat64)
1280 {
1281 POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1282 }
1283
PRE(sys_fstat64)1284 PRE(sys_fstat64)
1285 {
1286 PRINT("sys_fstat64 ( %lu, %#lx )", ARG1, ARG2);
1287 PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1288 PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1289 }
1290
POST(sys_fstat64)1291 POST(sys_fstat64)
1292 {
1293 POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1294 }
1295
1296 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1297 identical version. */
PRE(sys_sigsuspend)1298 PRE(sys_sigsuspend)
1299 {
1300 /* The C library interface to sigsuspend just takes a pointer to
1301 a signal mask but this system call has three arguments - the first
1302 two don't appear to be used by the kernel and are always passed as
1303 zero by glibc and the third is the first word of the signal mask
1304 so only 32 signals are supported.
1305
1306 In fact glibc normally uses rt_sigsuspend if it is available as
1307 that takes a pointer to the signal mask so supports more signals.
1308 */
1309 *flags |= SfMayBlock;
1310 PRINT("sys_sigsuspend ( %ld, %ld, %lu )", SARG1, SARG2, ARG3 );
1311 PRE_REG_READ3(int, "sigsuspend",
1312 int, history0, int, history1,
1313 vki_old_sigset_t, mask);
1314 }
1315
PRE(sys_vm86old)1316 PRE(sys_vm86old)
1317 {
1318 PRINT("sys_vm86old ( %#lx )", ARG1);
1319 PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1320 PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1321 }
1322
POST(sys_vm86old)1323 POST(sys_vm86old)
1324 {
1325 POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1326 }
1327
PRE(sys_vm86)1328 PRE(sys_vm86)
1329 {
1330 PRINT("sys_vm86 ( %lu, %#lx )", ARG1, ARG2);
1331 PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1332 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1333 PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1334 }
1335
POST(sys_vm86)1336 POST(sys_vm86)
1337 {
1338 if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1339 POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1340 }
1341
1342
1343 /* ---------------------------------------------------------------
1344 PRE/POST wrappers for x86/Linux-variant specific syscalls
1345 ------------------------------------------------------------ */
1346
PRE(sys_syscall223)1347 PRE(sys_syscall223)
1348 {
1349 Int err;
1350
1351 /* 223 is used by sys_bproc. If we're not on a declared bproc
1352 variant, fail in the usual way. */
1353
1354 if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
1355 PRINT("non-existent syscall! (syscall 223)");
1356 PRE_REG_READ0(long, "ni_syscall(223)");
1357 SET_STATUS_Failure( VKI_ENOSYS );
1358 return;
1359 }
1360
1361 err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1362 ARG4, ARG5, ARG6 );
1363 if (err) {
1364 SET_STATUS_Failure( err );
1365 return;
1366 }
1367 /* Let it go through. */
1368 *flags |= SfMayBlock; /* who knows? play safe. */
1369 }
1370
POST(sys_syscall223)1371 POST(sys_syscall223)
1372 {
1373 ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1374 ARG4, ARG5, ARG6 );
1375 }
1376
1377 #undef PRE
1378 #undef POST
1379
1380
1381 /* ---------------------------------------------------------------------
1382 The x86/Linux syscall table
1383 ------------------------------------------------------------------ */
1384
1385 /* Add an x86-linux specific wrapper to a syscall table. */
1386 #define PLAX_(sysno, name) WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1387 #define PLAXY(sysno, name) WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1388
1389
1390 // This table maps from __NR_xxx syscall numbers (from
1391 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1392 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1393 //
1394 // For those syscalls not handled by Valgrind, the annotation indicate its
1395 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1396 // (unknown).
1397
1398 static SyscallTableEntry syscall_table[] = {
1399 //zz // (restart_syscall) // 0
1400 GENX_(__NR_exit, sys_exit), // 1
1401 GENX_(__NR_fork, sys_fork), // 2
1402 GENXY(__NR_read, sys_read), // 3
1403 GENX_(__NR_write, sys_write), // 4
1404
1405 GENXY(__NR_open, sys_open), // 5
1406 GENXY(__NR_close, sys_close), // 6
1407 GENXY(__NR_waitpid, sys_waitpid), // 7
1408 GENXY(__NR_creat, sys_creat), // 8
1409 GENX_(__NR_link, sys_link), // 9
1410
1411 GENX_(__NR_unlink, sys_unlink), // 10
1412 GENX_(__NR_execve, sys_execve), // 11
1413 GENX_(__NR_chdir, sys_chdir), // 12
1414 GENXY(__NR_time, sys_time), // 13
1415 GENX_(__NR_mknod, sys_mknod), // 14
1416
1417 GENX_(__NR_chmod, sys_chmod), // 15
1418 //zz LINX_(__NR_lchown, sys_lchown16), // 16
1419 GENX_(__NR_break, sys_ni_syscall), // 17
1420 //zz // (__NR_oldstat, sys_stat), // 18 (obsolete)
1421 LINX_(__NR_lseek, sys_lseek), // 19
1422
1423 GENX_(__NR_getpid, sys_getpid), // 20
1424 LINX_(__NR_mount, sys_mount), // 21
1425 LINX_(__NR_umount, sys_oldumount), // 22
1426 LINX_(__NR_setuid, sys_setuid16), // 23 ## P
1427 LINX_(__NR_getuid, sys_getuid16), // 24 ## P
1428
1429 LINX_(__NR_stime, sys_stime), // 25 * (SVr4,SVID,X/OPEN)
1430 PLAXY(__NR_ptrace, sys_ptrace), // 26
1431 GENX_(__NR_alarm, sys_alarm), // 27
1432 //zz // (__NR_oldfstat, sys_fstat), // 28 * L -- obsolete
1433 GENX_(__NR_pause, sys_pause), // 29
1434
1435 LINX_(__NR_utime, sys_utime), // 30
1436 GENX_(__NR_stty, sys_ni_syscall), // 31
1437 GENX_(__NR_gtty, sys_ni_syscall), // 32
1438 GENX_(__NR_access, sys_access), // 33
1439 GENX_(__NR_nice, sys_nice), // 34
1440
1441 GENX_(__NR_ftime, sys_ni_syscall), // 35
1442 GENX_(__NR_sync, sys_sync), // 36
1443 GENX_(__NR_kill, sys_kill), // 37
1444 GENX_(__NR_rename, sys_rename), // 38
1445 GENX_(__NR_mkdir, sys_mkdir), // 39
1446
1447 GENX_(__NR_rmdir, sys_rmdir), // 40
1448 GENXY(__NR_dup, sys_dup), // 41
1449 LINXY(__NR_pipe, sys_pipe), // 42
1450 GENXY(__NR_times, sys_times), // 43
1451 GENX_(__NR_prof, sys_ni_syscall), // 44
1452 //zz
1453 GENX_(__NR_brk, sys_brk), // 45
1454 LINX_(__NR_setgid, sys_setgid16), // 46
1455 LINX_(__NR_getgid, sys_getgid16), // 47
1456 //zz // (__NR_signal, sys_signal), // 48 */* (ANSI C)
1457 LINX_(__NR_geteuid, sys_geteuid16), // 49
1458
1459 LINX_(__NR_getegid, sys_getegid16), // 50
1460 GENX_(__NR_acct, sys_acct), // 51
1461 LINX_(__NR_umount2, sys_umount), // 52
1462 GENX_(__NR_lock, sys_ni_syscall), // 53
1463 LINXY(__NR_ioctl, sys_ioctl), // 54
1464
1465 LINXY(__NR_fcntl, sys_fcntl), // 55
1466 GENX_(__NR_mpx, sys_ni_syscall), // 56
1467 GENX_(__NR_setpgid, sys_setpgid), // 57
1468 GENX_(__NR_ulimit, sys_ni_syscall), // 58
1469 //zz // (__NR_oldolduname, sys_olduname), // 59 Linux -- obsolete
1470 //zz
1471 GENX_(__NR_umask, sys_umask), // 60
1472 GENX_(__NR_chroot, sys_chroot), // 61
1473 //zz // (__NR_ustat, sys_ustat) // 62 SVr4 -- deprecated
1474 GENXY(__NR_dup2, sys_dup2), // 63
1475 GENX_(__NR_getppid, sys_getppid), // 64
1476
1477 GENX_(__NR_getpgrp, sys_getpgrp), // 65
1478 GENX_(__NR_setsid, sys_setsid), // 66
1479 LINXY(__NR_sigaction, sys_sigaction), // 67
1480 //zz // (__NR_sgetmask, sys_sgetmask), // 68 */* (ANSI C)
1481 //zz // (__NR_ssetmask, sys_ssetmask), // 69 */* (ANSI C)
1482 //zz
1483 LINX_(__NR_setreuid, sys_setreuid16), // 70
1484 LINX_(__NR_setregid, sys_setregid16), // 71
1485 PLAX_(__NR_sigsuspend, sys_sigsuspend), // 72
1486 LINXY(__NR_sigpending, sys_sigpending), // 73
1487 GENX_(__NR_sethostname, sys_sethostname), // 74
1488 //zz
1489 GENX_(__NR_setrlimit, sys_setrlimit), // 75
1490 GENXY(__NR_getrlimit, sys_old_getrlimit), // 76
1491 GENXY(__NR_getrusage, sys_getrusage), // 77
1492 GENXY(__NR_gettimeofday, sys_gettimeofday), // 78
1493 GENX_(__NR_settimeofday, sys_settimeofday), // 79
1494
1495 LINXY(__NR_getgroups, sys_getgroups16), // 80
1496 LINX_(__NR_setgroups, sys_setgroups16), // 81
1497 PLAX_(__NR_select, old_select), // 82
1498 GENX_(__NR_symlink, sys_symlink), // 83
1499 //zz // (__NR_oldlstat, sys_lstat), // 84 -- obsolete
1500 //zz
1501 GENX_(__NR_readlink, sys_readlink), // 85
1502 //zz // (__NR_uselib, sys_uselib), // 86 */Linux
1503 //zz // (__NR_swapon, sys_swapon), // 87 */Linux
1504 //zz // (__NR_reboot, sys_reboot), // 88 */Linux
1505 //zz // (__NR_readdir, old_readdir), // 89 -- superseded
1506 //zz
1507 PLAX_(__NR_mmap, old_mmap), // 90
1508 GENXY(__NR_munmap, sys_munmap), // 91
1509 GENX_(__NR_truncate, sys_truncate), // 92
1510 GENX_(__NR_ftruncate, sys_ftruncate), // 93
1511 GENX_(__NR_fchmod, sys_fchmod), // 94
1512
1513 LINX_(__NR_fchown, sys_fchown16), // 95
1514 GENX_(__NR_getpriority, sys_getpriority), // 96
1515 GENX_(__NR_setpriority, sys_setpriority), // 97
1516 GENX_(__NR_profil, sys_ni_syscall), // 98
1517 GENXY(__NR_statfs, sys_statfs), // 99
1518
1519 GENXY(__NR_fstatfs, sys_fstatfs), // 100
1520 LINX_(__NR_ioperm, sys_ioperm), // 101
1521 LINXY(__NR_socketcall, sys_socketcall), // 102 x86/Linux-only
1522 LINXY(__NR_syslog, sys_syslog), // 103
1523 GENXY(__NR_setitimer, sys_setitimer), // 104
1524
1525 GENXY(__NR_getitimer, sys_getitimer), // 105
1526 GENXY(__NR_stat, sys_newstat), // 106
1527 GENXY(__NR_lstat, sys_newlstat), // 107
1528 GENXY(__NR_fstat, sys_newfstat), // 108
1529 //zz // (__NR_olduname, sys_uname), // 109 -- obsolete
1530 //zz
1531 GENX_(__NR_iopl, sys_iopl), // 110
1532 LINX_(__NR_vhangup, sys_vhangup), // 111
1533 GENX_(__NR_idle, sys_ni_syscall), // 112
1534 PLAXY(__NR_vm86old, sys_vm86old), // 113 x86/Linux-only
1535 GENXY(__NR_wait4, sys_wait4), // 114
1536 //zz
1537 //zz // (__NR_swapoff, sys_swapoff), // 115 */Linux
1538 LINXY(__NR_sysinfo, sys_sysinfo), // 116
1539 LINXY(__NR_ipc, sys_ipc), // 117
1540 GENX_(__NR_fsync, sys_fsync), // 118
1541 PLAX_(__NR_sigreturn, sys_sigreturn), // 119 ?/Linux
1542
1543 PLAX_(__NR_clone, sys_clone), // 120
1544 //zz // (__NR_setdomainname, sys_setdomainname), // 121 */*(?)
1545 GENXY(__NR_uname, sys_newuname), // 122
1546 PLAX_(__NR_modify_ldt, sys_modify_ldt), // 123
1547 LINXY(__NR_adjtimex, sys_adjtimex), // 124
1548
1549 GENXY(__NR_mprotect, sys_mprotect), // 125
1550 LINXY(__NR_sigprocmask, sys_sigprocmask), // 126
1551 //zz // Nb: create_module() was removed 2.4-->2.6
1552 GENX_(__NR_create_module, sys_ni_syscall), // 127
1553 LINX_(__NR_init_module, sys_init_module), // 128
1554 LINX_(__NR_delete_module, sys_delete_module), // 129
1555 //zz
1556 //zz // Nb: get_kernel_syms() was removed 2.4-->2.6
1557 GENX_(__NR_get_kernel_syms, sys_ni_syscall), // 130
1558 LINX_(__NR_quotactl, sys_quotactl), // 131
1559 GENX_(__NR_getpgid, sys_getpgid), // 132
1560 GENX_(__NR_fchdir, sys_fchdir), // 133
1561 //zz // (__NR_bdflush, sys_bdflush), // 134 */Linux
1562 //zz
1563 //zz // (__NR_sysfs, sys_sysfs), // 135 SVr4
1564 LINX_(__NR_personality, sys_personality), // 136
1565 GENX_(__NR_afs_syscall, sys_ni_syscall), // 137
1566 LINX_(__NR_setfsuid, sys_setfsuid16), // 138
1567 LINX_(__NR_setfsgid, sys_setfsgid16), // 139
1568
1569 LINXY(__NR__llseek, sys_llseek), // 140
1570 GENXY(__NR_getdents, sys_getdents), // 141
1571 GENX_(__NR__newselect, sys_select), // 142
1572 GENX_(__NR_flock, sys_flock), // 143
1573 GENX_(__NR_msync, sys_msync), // 144
1574
1575 GENXY(__NR_readv, sys_readv), // 145
1576 GENX_(__NR_writev, sys_writev), // 146
1577 GENX_(__NR_getsid, sys_getsid), // 147
1578 GENX_(__NR_fdatasync, sys_fdatasync), // 148
1579 LINXY(__NR__sysctl, sys_sysctl), // 149
1580
1581 GENX_(__NR_mlock, sys_mlock), // 150
1582 GENX_(__NR_munlock, sys_munlock), // 151
1583 GENX_(__NR_mlockall, sys_mlockall), // 152
1584 LINX_(__NR_munlockall, sys_munlockall), // 153
1585 LINXY(__NR_sched_setparam, sys_sched_setparam), // 154
1586
1587 LINXY(__NR_sched_getparam, sys_sched_getparam), // 155
1588 LINX_(__NR_sched_setscheduler, sys_sched_setscheduler), // 156
1589 LINX_(__NR_sched_getscheduler, sys_sched_getscheduler), // 157
1590 LINX_(__NR_sched_yield, sys_sched_yield), // 158
1591 LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1592
1593 LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1594 LINXY(__NR_sched_rr_get_interval, sys_sched_rr_get_interval), // 161
1595 GENXY(__NR_nanosleep, sys_nanosleep), // 162
1596 GENX_(__NR_mremap, sys_mremap), // 163
1597 LINX_(__NR_setresuid, sys_setresuid16), // 164
1598
1599 LINXY(__NR_getresuid, sys_getresuid16), // 165
1600 PLAXY(__NR_vm86, sys_vm86), // 166 x86/Linux-only
1601 GENX_(__NR_query_module, sys_ni_syscall), // 167
1602 GENXY(__NR_poll, sys_poll), // 168
1603 //zz // (__NR_nfsservctl, sys_nfsservctl), // 169 */Linux
1604 //zz
1605 LINX_(__NR_setresgid, sys_setresgid16), // 170
1606 LINXY(__NR_getresgid, sys_getresgid16), // 171
1607 LINXY(__NR_prctl, sys_prctl), // 172
1608 PLAX_(__NR_rt_sigreturn, sys_rt_sigreturn), // 173 x86/Linux only?
1609 LINXY(__NR_rt_sigaction, sys_rt_sigaction), // 174
1610
1611 LINXY(__NR_rt_sigprocmask, sys_rt_sigprocmask), // 175
1612 LINXY(__NR_rt_sigpending, sys_rt_sigpending), // 176
1613 LINXY(__NR_rt_sigtimedwait, sys_rt_sigtimedwait),// 177
1614 LINXY(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo),// 178
1615 LINX_(__NR_rt_sigsuspend, sys_rt_sigsuspend), // 179
1616
1617 GENXY(__NR_pread64, sys_pread64), // 180
1618 GENX_(__NR_pwrite64, sys_pwrite64), // 181
1619 LINX_(__NR_chown, sys_chown16), // 182
1620 GENXY(__NR_getcwd, sys_getcwd), // 183
1621 LINXY(__NR_capget, sys_capget), // 184
1622
1623 LINX_(__NR_capset, sys_capset), // 185
1624 GENXY(__NR_sigaltstack, sys_sigaltstack), // 186
1625 LINXY(__NR_sendfile, sys_sendfile), // 187
1626 GENXY(__NR_getpmsg, sys_getpmsg), // 188
1627 GENX_(__NR_putpmsg, sys_putpmsg), // 189
1628
1629 // Nb: we treat vfork as fork
1630 GENX_(__NR_vfork, sys_fork), // 190
1631 GENXY(__NR_ugetrlimit, sys_getrlimit), // 191
1632 PLAX_(__NR_mmap2, sys_mmap2), // 192
1633 GENX_(__NR_truncate64, sys_truncate64), // 193
1634 GENX_(__NR_ftruncate64, sys_ftruncate64), // 194
1635
1636 PLAXY(__NR_stat64, sys_stat64), // 195
1637 PLAXY(__NR_lstat64, sys_lstat64), // 196
1638 PLAXY(__NR_fstat64, sys_fstat64), // 197
1639 GENX_(__NR_lchown32, sys_lchown), // 198
1640 GENX_(__NR_getuid32, sys_getuid), // 199
1641
1642 GENX_(__NR_getgid32, sys_getgid), // 200
1643 GENX_(__NR_geteuid32, sys_geteuid), // 201
1644 GENX_(__NR_getegid32, sys_getegid), // 202
1645 GENX_(__NR_setreuid32, sys_setreuid), // 203
1646 GENX_(__NR_setregid32, sys_setregid), // 204
1647
1648 GENXY(__NR_getgroups32, sys_getgroups), // 205
1649 GENX_(__NR_setgroups32, sys_setgroups), // 206
1650 GENX_(__NR_fchown32, sys_fchown), // 207
1651 LINX_(__NR_setresuid32, sys_setresuid), // 208
1652 LINXY(__NR_getresuid32, sys_getresuid), // 209
1653
1654 LINX_(__NR_setresgid32, sys_setresgid), // 210
1655 LINXY(__NR_getresgid32, sys_getresgid), // 211
1656 GENX_(__NR_chown32, sys_chown), // 212
1657 GENX_(__NR_setuid32, sys_setuid), // 213
1658 GENX_(__NR_setgid32, sys_setgid), // 214
1659
1660 LINX_(__NR_setfsuid32, sys_setfsuid), // 215
1661 LINX_(__NR_setfsgid32, sys_setfsgid), // 216
1662 LINX_(__NR_pivot_root, sys_pivot_root), // 217
1663 GENXY(__NR_mincore, sys_mincore), // 218
1664 GENX_(__NR_madvise, sys_madvise), // 219
1665
1666 GENXY(__NR_getdents64, sys_getdents64), // 220
1667 LINXY(__NR_fcntl64, sys_fcntl64), // 221
1668 GENX_(222, sys_ni_syscall), // 222
1669 PLAXY(223, sys_syscall223), // 223 // sys_bproc?
1670 LINX_(__NR_gettid, sys_gettid), // 224
1671
1672 LINX_(__NR_readahead, sys_readahead), // 225 */Linux
1673 LINX_(__NR_setxattr, sys_setxattr), // 226
1674 LINX_(__NR_lsetxattr, sys_lsetxattr), // 227
1675 LINX_(__NR_fsetxattr, sys_fsetxattr), // 228
1676 LINXY(__NR_getxattr, sys_getxattr), // 229
1677
1678 LINXY(__NR_lgetxattr, sys_lgetxattr), // 230
1679 LINXY(__NR_fgetxattr, sys_fgetxattr), // 231
1680 LINXY(__NR_listxattr, sys_listxattr), // 232
1681 LINXY(__NR_llistxattr, sys_llistxattr), // 233
1682 LINXY(__NR_flistxattr, sys_flistxattr), // 234
1683
1684 LINX_(__NR_removexattr, sys_removexattr), // 235
1685 LINX_(__NR_lremovexattr, sys_lremovexattr), // 236
1686 LINX_(__NR_fremovexattr, sys_fremovexattr), // 237
1687 LINXY(__NR_tkill, sys_tkill), // 238 */Linux
1688 LINXY(__NR_sendfile64, sys_sendfile64), // 239
1689
1690 LINXY(__NR_futex, sys_futex), // 240
1691 LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1692 LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1693 PLAX_(__NR_set_thread_area, sys_set_thread_area), // 243
1694 PLAX_(__NR_get_thread_area, sys_get_thread_area), // 244
1695
1696 LINXY(__NR_io_setup, sys_io_setup), // 245
1697 LINX_(__NR_io_destroy, sys_io_destroy), // 246
1698 LINXY(__NR_io_getevents, sys_io_getevents), // 247
1699 LINX_(__NR_io_submit, sys_io_submit), // 248
1700 LINXY(__NR_io_cancel, sys_io_cancel), // 249
1701
1702 LINX_(__NR_fadvise64, sys_fadvise64), // 250 */(Linux?)
1703 GENX_(251, sys_ni_syscall), // 251
1704 LINX_(__NR_exit_group, sys_exit_group), // 252
1705 LINXY(__NR_lookup_dcookie, sys_lookup_dcookie), // 253
1706 LINXY(__NR_epoll_create, sys_epoll_create), // 254
1707
1708 LINX_(__NR_epoll_ctl, sys_epoll_ctl), // 255
1709 LINXY(__NR_epoll_wait, sys_epoll_wait), // 256
1710 //zz // (__NR_remap_file_pages, sys_remap_file_pages), // 257 */Linux
1711 LINX_(__NR_set_tid_address, sys_set_tid_address), // 258
1712 LINXY(__NR_timer_create, sys_timer_create), // 259
1713
1714 LINXY(__NR_timer_settime, sys_timer_settime), // (timer_create+1)
1715 LINXY(__NR_timer_gettime, sys_timer_gettime), // (timer_create+2)
1716 LINX_(__NR_timer_getoverrun, sys_timer_getoverrun),//(timer_create+3)
1717 LINX_(__NR_timer_delete, sys_timer_delete), // (timer_create+4)
1718 LINX_(__NR_clock_settime, sys_clock_settime), // (timer_create+5)
1719
1720 LINXY(__NR_clock_gettime, sys_clock_gettime), // (timer_create+6)
1721 LINXY(__NR_clock_getres, sys_clock_getres), // (timer_create+7)
1722 LINXY(__NR_clock_nanosleep, sys_clock_nanosleep),// (timer_create+8) */*
1723 GENXY(__NR_statfs64, sys_statfs64), // 268
1724 GENXY(__NR_fstatfs64, sys_fstatfs64), // 269
1725
1726 LINX_(__NR_tgkill, sys_tgkill), // 270 */Linux
1727 GENX_(__NR_utimes, sys_utimes), // 271
1728 LINX_(__NR_fadvise64_64, sys_fadvise64_64), // 272 */(Linux?)
1729 GENX_(__NR_vserver, sys_ni_syscall), // 273
1730 LINX_(__NR_mbind, sys_mbind), // 274 ?/?
1731
1732 LINXY(__NR_get_mempolicy, sys_get_mempolicy), // 275 ?/?
1733 LINX_(__NR_set_mempolicy, sys_set_mempolicy), // 276 ?/?
1734 LINXY(__NR_mq_open, sys_mq_open), // 277
1735 LINX_(__NR_mq_unlink, sys_mq_unlink), // (mq_open+1)
1736 LINX_(__NR_mq_timedsend, sys_mq_timedsend), // (mq_open+2)
1737
1738 LINXY(__NR_mq_timedreceive, sys_mq_timedreceive),// (mq_open+3)
1739 LINX_(__NR_mq_notify, sys_mq_notify), // (mq_open+4)
1740 LINXY(__NR_mq_getsetattr, sys_mq_getsetattr), // (mq_open+5)
1741 GENX_(__NR_sys_kexec_load, sys_ni_syscall), // 283
1742 LINXY(__NR_waitid, sys_waitid), // 284
1743
1744 GENX_(285, sys_ni_syscall), // 285
1745 LINX_(__NR_add_key, sys_add_key), // 286
1746 LINX_(__NR_request_key, sys_request_key), // 287
1747 LINXY(__NR_keyctl, sys_keyctl), // 288
1748 LINX_(__NR_ioprio_set, sys_ioprio_set), // 289
1749
1750 LINX_(__NR_ioprio_get, sys_ioprio_get), // 290
1751 LINX_(__NR_inotify_init, sys_inotify_init), // 291
1752 LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1753 LINX_(__NR_inotify_rm_watch, sys_inotify_rm_watch), // 293
1754 // LINX_(__NR_migrate_pages, sys_migrate_pages), // 294
1755
1756 LINXY(__NR_openat, sys_openat), // 295
1757 LINX_(__NR_mkdirat, sys_mkdirat), // 296
1758 LINX_(__NR_mknodat, sys_mknodat), // 297
1759 LINX_(__NR_fchownat, sys_fchownat), // 298
1760 LINX_(__NR_futimesat, sys_futimesat), // 299
1761
1762 PLAXY(__NR_fstatat64, sys_fstatat64), // 300
1763 LINX_(__NR_unlinkat, sys_unlinkat), // 301
1764 LINX_(__NR_renameat, sys_renameat), // 302
1765 LINX_(__NR_linkat, sys_linkat), // 303
1766 LINX_(__NR_symlinkat, sys_symlinkat), // 304
1767
1768 LINX_(__NR_readlinkat, sys_readlinkat), // 305
1769 LINX_(__NR_fchmodat, sys_fchmodat), // 306
1770 LINX_(__NR_faccessat, sys_faccessat), // 307
1771 LINX_(__NR_pselect6, sys_pselect6), // 308
1772 LINXY(__NR_ppoll, sys_ppoll), // 309
1773
1774 LINX_(__NR_unshare, sys_unshare), // 310
1775 LINX_(__NR_set_robust_list, sys_set_robust_list), // 311
1776 LINXY(__NR_get_robust_list, sys_get_robust_list), // 312
1777 LINX_(__NR_splice, sys_splice), // 313
1778 LINX_(__NR_sync_file_range, sys_sync_file_range), // 314
1779
1780 LINX_(__NR_tee, sys_tee), // 315
1781 LINXY(__NR_vmsplice, sys_vmsplice), // 316
1782 LINXY(__NR_move_pages, sys_move_pages), // 317
1783 LINXY(__NR_getcpu, sys_getcpu), // 318
1784 LINXY(__NR_epoll_pwait, sys_epoll_pwait), // 319
1785
1786 LINX_(__NR_utimensat, sys_utimensat), // 320
1787 LINXY(__NR_signalfd, sys_signalfd), // 321
1788 LINXY(__NR_timerfd_create, sys_timerfd_create), // 322
1789 LINXY(__NR_eventfd, sys_eventfd), // 323
1790 LINX_(__NR_fallocate, sys_fallocate), // 324
1791
1792 LINXY(__NR_timerfd_settime, sys_timerfd_settime), // 325
1793 LINXY(__NR_timerfd_gettime, sys_timerfd_gettime), // 326
1794 LINXY(__NR_signalfd4, sys_signalfd4), // 327
1795 LINXY(__NR_eventfd2, sys_eventfd2), // 328
1796 LINXY(__NR_epoll_create1, sys_epoll_create1), // 329
1797
1798 LINXY(__NR_dup3, sys_dup3), // 330
1799 LINXY(__NR_pipe2, sys_pipe2), // 331
1800 LINXY(__NR_inotify_init1, sys_inotify_init1), // 332
1801 LINXY(__NR_preadv, sys_preadv), // 333
1802 LINX_(__NR_pwritev, sys_pwritev), // 334
1803
1804 LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1805 LINXY(__NR_perf_event_open, sys_perf_event_open), // 336
1806 LINXY(__NR_recvmmsg, sys_recvmmsg), // 337
1807 LINXY(__NR_fanotify_init, sys_fanotify_init), // 338
1808 LINX_(__NR_fanotify_mark, sys_fanotify_mark), // 339
1809
1810 LINXY(__NR_prlimit64, sys_prlimit64), // 340
1811 LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1812 LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1813 LINXY(__NR_clock_adjtime, sys_clock_adjtime), // 343
1814 LINX_(__NR_syncfs, sys_syncfs), // 344
1815
1816 LINXY(__NR_sendmmsg, sys_sendmmsg), // 345
1817 // LINX_(__NR_setns, sys_ni_syscall), // 346
1818 LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 347
1819 LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
1820 LINX_(__NR_kcmp, sys_kcmp), // 349
1821
1822 // LIN__(__NR_finit_module, sys_ni_syscall), // 350
1823 // LIN__(__NR_sched_setattr, sys_ni_syscall), // 351
1824 // LIN__(__NR_sched_getattr, sys_ni_syscall), // 352
1825 // LIN__(__NR_renameat2, sys_ni_syscall), // 353
1826 // LIN__(__NR_seccomp, sys_ni_syscall), // 354
1827
1828 LINXY(__NR_getrandom, sys_getrandom), // 355
1829 LINXY(__NR_memfd_create, sys_memfd_create) // 356
1830 // LIN__(__NR_bpf, sys_ni_syscall) // 357
1831 };
1832
ML_(get_linux_syscall_entry)1833 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1834 {
1835 const UInt syscall_table_size
1836 = sizeof(syscall_table) / sizeof(syscall_table[0]);
1837
1838 /* Is it in the contiguous initial section of the table? */
1839 if (sysno < syscall_table_size) {
1840 SyscallTableEntry* sys = &syscall_table[sysno];
1841 if (sys->before == NULL)
1842 return NULL; /* no entry */
1843 else
1844 return sys;
1845 }
1846
1847 /* Can't find a wrapper */
1848 return NULL;
1849 }
1850
1851 #endif // defined(VGP_x86_linux)
1852
1853 /*--------------------------------------------------------------------*/
1854 /*--- end ---*/
1855 /*--------------------------------------------------------------------*/
1856