• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Wrappers for generic Unix system calls                       ---*/
5 /*---                                            syswrap-generic.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2017 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
34 
35 #include "pub_core_basics.h"
36 #include "pub_core_vki.h"
37 #include "pub_core_vkiscnums.h"
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 #include "pub_core_stacks.h"
64 
65 #include "priv_types_n_macros.h"
66 #include "priv_syswrap-generic.h"
67 
68 #include "config.h"
69 
70 
ML_(guess_and_register_stack)71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
72 {
73    Bool debug = False;
74    NSegment const* seg;
75 
76    /* We don't really know where the client stack is, because its
77       allocated by the client.  The best we can do is look at the
78       memory mappings and try to derive some useful information.  We
79       assume that sp starts near its highest possible value, and can
80       only go down to the start of the mmaped segment. */
81    seg = VG_(am_find_nsegment)(sp);
82    if (seg
83        && VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
86 
87       tst->os_state.stk_id
88          = VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
89 
90       if (debug)
91 	 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]"
92                      " as stk_id %lu\n",
93 		     tst->tid, seg->start, tst->client_stack_highest_byte,
94                      tst->os_state.stk_id);
95    } else {
96       VG_(message)(Vg_UserMsg,
97                    "!? New thread %u starts with SP(%#lx) unmapped\n",
98 		   tst->tid, sp);
99       tst->client_stack_highest_byte = 0;
100       tst->client_stack_szB  = 0;
101    }
102 }
103 
104 /* Returns True iff address range is something the client can
105    plausibly mess with: all of it is either already belongs to the
106    client or is free or a reservation. */
107 
ML_(valid_client_addr)108 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
109                                    const HChar *syscallname)
110 {
111    Bool ret;
112 
113    if (size == 0)
114       return True;
115 
116    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
117             (start,size,VKI_PROT_NONE);
118 
119    if (0)
120       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
121 		  syscallname, start, start+size-1, (Int)ret);
122 
123    if (!ret && syscallname != NULL) {
124       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
125                                "to modify addresses %#lx-%#lx\n",
126                                syscallname, start, start+size-1);
127       if (VG_(clo_verbosity) > 1) {
128          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
129       }
130    }
131 
132    return ret;
133 }
134 
135 
ML_(client_signal_OK)136 Bool ML_(client_signal_OK)(Int sigNo)
137 {
138    /* signal 0 is OK for kill */
139    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
140 
141    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
142 
143    return ret;
144 }
145 
146 
147 /* Handy small function to help stop wrappers from segfaulting when
148    presented with bogus client addresses.  Is not used for generating
149    user-visible errors. */
150 
ML_(safe_to_deref)151 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
152 {
153    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
154 }
155 
156 
157 /* ---------------------------------------------------------------------
158    Doing mmap, mremap
159    ------------------------------------------------------------------ */
160 
161 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
162    munmap, mprotect (and mremap??) work at the page level.  So addresses
163    and lengths must be adjusted for this. */
164 
165 /* Mash around start and length so that the area exactly covers
166    an integral number of pages.  If we don't do that, memcheck's
167    idea of addressible memory diverges from that of the
168    kernel's, which causes the leak detector to crash. */
169 static
page_align_addr_and_len(Addr * a,SizeT * len)170 void page_align_addr_and_len( Addr* a, SizeT* len)
171 {
172    Addr ra;
173 
174    ra = VG_PGROUNDDN(*a);
175    *len = VG_PGROUNDUP(*a + *len) - ra;
176    *a = ra;
177 }
178 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)179 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
180                                 UInt flags, Int fd, Off64T offset)
181 {
182    Bool d;
183 
184    /* 'a' is the return value from a real kernel mmap, hence: */
185    vg_assert(VG_IS_PAGE_ALIGNED(a));
186    /* whereas len is whatever the syscall supplied.  So: */
187    len = VG_PGROUNDUP(len);
188 
189    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
190 
191    if (d)
192       VG_(discard_translations)( a, (ULong)len,
193                                  "notify_core_of_mmap" );
194 }
195 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)196 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
197 {
198    Bool rr, ww, xx;
199 
200    /* 'a' is the return value from a real kernel mmap, hence: */
201    vg_assert(VG_IS_PAGE_ALIGNED(a));
202    /* whereas len is whatever the syscall supplied.  So: */
203    len = VG_PGROUNDUP(len);
204 
205    rr = toBool(prot & VKI_PROT_READ);
206    ww = toBool(prot & VKI_PROT_WRITE);
207    xx = toBool(prot & VKI_PROT_EXEC);
208 
209    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
210 }
211 
212 
213 /* When a client mmap has been successfully done, this function must
214    be called.  It notifies both aspacem and the tool of the new
215    mapping.
216 
217    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
218    it is called from is POST(sys_io_setup).  In particular,
219    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
220    client mmap.  But it doesn't call this function; instead it does the
221    relevant notifications itself.  Here, we just pass di_handle=0 to
222    notify_tool_of_mmap as we have no better information.  But really this
223    function should be done away with; problem is I don't understand what
224    POST(sys_io_setup) does or how it works.
225 
226    [However, this function is used lots for Darwin, because
227     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
228  */
229 void
ML_(notify_core_and_tool_of_mmap)230 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
231                                     UInt flags, Int fd, Off64T offset )
232 {
233    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
234    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
235    // Should it?  --njn
236    notify_core_of_mmap(a, len, prot, flags, fd, offset);
237    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
238 }
239 
240 void
ML_(notify_core_and_tool_of_munmap)241 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
242 {
243    Bool d;
244 
245    page_align_addr_and_len(&a, &len);
246    d = VG_(am_notify_munmap)(a, len);
247    VG_TRACK( die_mem_munmap, a, len );
248    VG_(di_notify_munmap)( a, len );
249    if (d)
250       VG_(discard_translations)( a, (ULong)len,
251                                  "ML_(notify_core_and_tool_of_munmap)" );
252 }
253 
254 void
ML_(notify_core_and_tool_of_mprotect)255 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
256 {
257    Bool rr = toBool(prot & VKI_PROT_READ);
258    Bool ww = toBool(prot & VKI_PROT_WRITE);
259    Bool xx = toBool(prot & VKI_PROT_EXEC);
260    Bool d;
261 
262    page_align_addr_and_len(&a, &len);
263    d = VG_(am_notify_mprotect)(a, len, prot);
264    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
265    VG_(di_notify_mprotect)( a, len, prot );
266    if (d)
267       VG_(discard_translations)( a, (ULong)len,
268                                  "ML_(notify_core_and_tool_of_mprotect)" );
269 }
270 
271 
272 
273 #if HAVE_MREMAP
274 /* Expand (or shrink) an existing mapping, potentially moving it at
275    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
276 */
277 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)278 SysRes do_mremap( Addr old_addr, SizeT old_len,
279                   Addr new_addr, SizeT new_len,
280                   UWord flags, ThreadId tid )
281 {
282 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
283 
284    Bool      ok, d;
285    NSegment const* old_seg;
286    Addr      advised;
287    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
288    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
289 
290    if (0)
291       VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
292                   old_addr,old_len,new_addr,new_len,
293                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
294                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
295    if (0)
296       VG_(am_show_nsegments)(0, "do_remap: before");
297 
298    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
299       goto eINVAL;
300 
301    if (!VG_IS_PAGE_ALIGNED(old_addr))
302       goto eINVAL;
303 
304    old_len = VG_PGROUNDUP(old_len);
305    new_len = VG_PGROUNDUP(new_len);
306 
307    if (new_len == 0)
308       goto eINVAL;
309 
310    /* kernel doesn't reject this, but we do. */
311    if (old_len == 0)
312       goto eINVAL;
313 
314    /* reject wraparounds */
315    if (old_addr + old_len < old_addr)
316       goto eINVAL;
317    if (f_fixed == True && new_addr + new_len < new_len)
318       goto eINVAL;
319 
320    /* kernel rejects all fixed, no-move requests (which are
321       meaningless). */
322    if (f_fixed == True && f_maymove == False)
323       goto eINVAL;
324 
325    /* Stay away from non-client areas. */
326    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
327       goto eINVAL;
328 
329    /* In all remaining cases, if the old range does not fall within a
330       single segment, fail. */
331    old_seg = VG_(am_find_nsegment)( old_addr );
332    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
333       goto eINVAL;
334    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC
335        && old_seg->kind != SkShmC)
336       goto eINVAL;
337 
338    vg_assert(old_len > 0);
339    vg_assert(new_len > 0);
340    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
341    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
342    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
343 
344    /* There are 3 remaining cases:
345 
346       * maymove == False
347 
348         new space has to be at old address, so:
349             - shrink    -> unmap end
350             - same size -> do nothing
351             - grow      -> if can grow in-place, do so, else fail
352 
353       * maymove == True, fixed == False
354 
355         new space can be anywhere, so:
356             - shrink    -> unmap end
357             - same size -> do nothing
358             - grow      -> if can grow in-place, do so, else
359                            move to anywhere large enough, else fail
360 
361       * maymove == True, fixed == True
362 
363         new space must be at new address, so:
364 
365             - if new address is not page aligned, fail
366             - if new address range overlaps old one, fail
367             - if new address range cannot be allocated, fail
368             - else move to new address range with new size
369             - else fail
370    */
371 
372    if (f_maymove == False) {
373       /* new space has to be at old address */
374       if (new_len < old_len)
375          goto shrink_in_place;
376       if (new_len > old_len)
377          goto grow_in_place_or_fail;
378       goto same_in_place;
379    }
380 
381    if (f_maymove == True && f_fixed == False) {
382       /* new space can be anywhere */
383       if (new_len < old_len)
384          goto shrink_in_place;
385       if (new_len > old_len)
386          goto grow_in_place_or_move_anywhere_or_fail;
387       goto same_in_place;
388    }
389 
390    if (f_maymove == True && f_fixed == True) {
391       /* new space can only be at the new address */
392       if (!VG_IS_PAGE_ALIGNED(new_addr))
393          goto eINVAL;
394       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
395          /* no overlap */
396       } else {
397          goto eINVAL;
398       }
399       if (new_addr == 0)
400          goto eINVAL;
401          /* VG_(am_get_advisory_client_simple) interprets zero to mean
402             non-fixed, which is not what we want */
403       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
404       if (!ok || advised != new_addr)
405          goto eNOMEM;
406       ok = VG_(am_relocate_nooverlap_client)
407               ( &d, old_addr, old_len, new_addr, new_len );
408       if (ok) {
409          VG_TRACK( copy_mem_remap, old_addr, new_addr,
410                                    MIN_SIZET(old_len,new_len) );
411          if (new_len > old_len)
412             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
413                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
414                       0/*di_handle*/ );
415          VG_TRACK(die_mem_munmap, old_addr, old_len);
416          if (d) {
417             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
418             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
419          }
420          return VG_(mk_SysRes_Success)( new_addr );
421       }
422       goto eNOMEM;
423    }
424 
425    /* end of the 3 cases */
426    /*NOTREACHED*/ vg_assert(0);
427 
428   grow_in_place_or_move_anywhere_or_fail:
429    {
430    /* try growing it in-place */
431    Addr   needA = old_addr + old_len;
432    SSizeT needL = new_len - old_len;
433 
434    vg_assert(needL > 0);
435    vg_assert(needA > 0);
436 
437    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
438    if (ok) {
439       /* Fixes bug #129866. */
440       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
441    }
442    if (ok && advised == needA) {
443       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
444       if (new_seg) {
445          VG_TRACK( new_mem_mmap, needA, needL,
446                                  new_seg->hasR,
447                                  new_seg->hasW, new_seg->hasX,
448                                  0/*di_handle*/ );
449          return VG_(mk_SysRes_Success)( old_addr );
450       }
451    }
452 
453    /* that failed.  Look elsewhere. */
454    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
455    if (ok) {
456       Bool oldR = old_seg->hasR;
457       Bool oldW = old_seg->hasW;
458       Bool oldX = old_seg->hasX;
459       /* assert new area does not overlap old */
460       vg_assert(advised+new_len-1 < old_addr
461                 || advised > old_addr+old_len-1);
462       ok = VG_(am_relocate_nooverlap_client)
463               ( &d, old_addr, old_len, advised, new_len );
464       if (ok) {
465          VG_TRACK( copy_mem_remap, old_addr, advised,
466                                    MIN_SIZET(old_len,new_len) );
467          if (new_len > old_len)
468             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
469                       oldR, oldW, oldX, 0/*di_handle*/ );
470          VG_TRACK(die_mem_munmap, old_addr, old_len);
471          if (d) {
472             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
473             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
474          }
475          return VG_(mk_SysRes_Success)( advised );
476       }
477    }
478    goto eNOMEM;
479    }
480    /*NOTREACHED*/ vg_assert(0);
481 
482   grow_in_place_or_fail:
483    {
484    Addr  needA = old_addr + old_len;
485    SizeT needL = new_len - old_len;
486 
487    vg_assert(needA > 0);
488 
489    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
490    if (ok) {
491       /* Fixes bug #129866. */
492       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
493    }
494    if (!ok || advised != needA)
495       goto eNOMEM;
496    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
497    if (!new_seg)
498       goto eNOMEM;
499    VG_TRACK( new_mem_mmap, needA, needL,
500                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
501                            0/*di_handle*/ );
502 
503    return VG_(mk_SysRes_Success)( old_addr );
504    }
505    /*NOTREACHED*/ vg_assert(0);
506 
507   shrink_in_place:
508    {
509    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
510    if (sr_isError(sres))
511       return sres;
512    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
513    if (d)
514       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
515                                  "do_remap(7)" );
516    return VG_(mk_SysRes_Success)( old_addr );
517    }
518    /*NOTREACHED*/ vg_assert(0);
519 
520   same_in_place:
521    return VG_(mk_SysRes_Success)( old_addr );
522    /*NOTREACHED*/ vg_assert(0);
523 
524   eINVAL:
525    return VG_(mk_SysRes_Error)( VKI_EINVAL );
526   eNOMEM:
527    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
528 
529 #  undef MIN_SIZET
530 }
531 #endif /* HAVE_MREMAP */
532 
533 
534 /* ---------------------------------------------------------------------
535    File-descriptor tracking
536    ------------------------------------------------------------------ */
537 
538 /* One of these is allocated for each open file descriptor.  */
539 typedef struct OpenFd
540 {
541    Int fd;                        /* The file descriptor */
542    HChar *pathname;               /* NULL if not a regular file or unknown */
543    ExeContext *where;             /* NULL if inherited from parent */
544    struct OpenFd *next, *prev;
545 } OpenFd;
546 
547 /* List of allocated file descriptors. */
548 static OpenFd *allocated_fds = NULL;
549 
550 /* Count of open file descriptors. */
551 static Int fd_count = 0;
552 
553 
554 /* Note the fact that a file descriptor was just closed. */
ML_(record_fd_close)555 void ML_(record_fd_close)(Int fd)
556 {
557    OpenFd *i = allocated_fds;
558 
559    if (fd >= VG_(fd_hard_limit))
560       return;			/* Valgrind internal */
561 
562    while(i) {
563       if(i->fd == fd) {
564          if(i->prev)
565             i->prev->next = i->next;
566          else
567             allocated_fds = i->next;
568          if(i->next)
569             i->next->prev = i->prev;
570          if(i->pathname)
571             VG_(free) (i->pathname);
572          VG_(free) (i);
573          fd_count--;
574          break;
575       }
576       i = i->next;
577    }
578 }
579 
580 /* Note the fact that a file descriptor was just opened.  If the
581    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
582    this either indicates a non-standard file (i.e. a pipe or socket or
583    some such thing) or that we don't know the filename.  If the fd is
584    already open, then we're probably doing a dup2() to an existing fd,
585    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)586 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
587                                          const HChar *pathname)
588 {
589    OpenFd *i;
590 
591    if (fd >= VG_(fd_hard_limit))
592       return;			/* Valgrind internal */
593 
594    /* Check to see if this fd is already open. */
595    i = allocated_fds;
596    while (i) {
597       if (i->fd == fd) {
598          if (i->pathname) VG_(free)(i->pathname);
599          break;
600       }
601       i = i->next;
602    }
603 
604    /* Not already one: allocate an OpenFd */
605    if (i == NULL) {
606       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
607 
608       i->prev = NULL;
609       i->next = allocated_fds;
610       if(allocated_fds) allocated_fds->prev = i;
611       allocated_fds = i;
612       fd_count++;
613    }
614 
615    i->fd = fd;
616    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
617    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
618 }
619 
620 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)621 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
622 {
623    const HChar* buf;
624    const HChar* name;
625    if (VG_(resolve_filename)(fd, &buf))
626       name = buf;
627    else
628       name = NULL;
629 
630    ML_(record_fd_open_with_given_name)(tid, fd, name);
631 }
632 
633 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)634 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
635 {
636    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
637 }
638 
639 // Return if a given file descriptor is already recorded.
ML_(fd_recorded)640 Bool ML_(fd_recorded)(Int fd)
641 {
642    OpenFd *i = allocated_fds;
643    while (i) {
644       if (i->fd == fd)
645          return True;
646       i = i->next;
647    }
648    return False;
649 }
650 
651 /* Returned string must not be modified nor free'd. */
ML_(find_fd_recorded_by_fd)652 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
653 {
654    OpenFd *i = allocated_fds;
655 
656    while (i) {
657       if (i->fd == fd)
658          return i->pathname;
659       i = i->next;
660    }
661 
662    return NULL;
663 }
664 
665 static
unix_to_name(struct vki_sockaddr_un * sa,UInt len,HChar * name)666 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
667 {
668    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
669       VG_(sprintf)(name, "<unknown>");
670    } else {
671       VG_(sprintf)(name, "%s", sa->sun_path);
672    }
673 
674    return name;
675 }
676 
677 static
inet_to_name(struct vki_sockaddr_in * sa,UInt len,HChar * name)678 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
679 {
680    if (sa == NULL || len == 0) {
681       VG_(sprintf)(name, "<unknown>");
682    } else if (sa->sin_port == 0) {
683       VG_(sprintf)(name, "<unbound>");
684    } else {
685       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
686       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
687                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
688                    (addr>>8) & 0xFF, addr & 0xFF,
689                    VG_(ntohs)(sa->sin_port));
690    }
691 
692    return name;
693 }
694 
695 static
inet6_format(HChar * s,const UChar ip[16])696 void inet6_format(HChar *s, const UChar ip[16])
697 {
698    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
699 
700    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
701       const struct vki_in_addr *sin_addr =
702           (const struct vki_in_addr *)(ip + 12);
703       UInt addr = VG_(ntohl)(sin_addr->s_addr);
704 
705       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
706                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
707                    (addr>>8) & 0xFF, addr & 0xFF);
708    } else {
709       Bool compressing = False;
710       Bool compressed = False;
711       Int len = 0;
712       Int i;
713 
714       for (i = 0; i < 16; i += 2) {
715          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
716          if (word == 0 && !compressed) {
717             compressing = True;
718          } else {
719             if (compressing) {
720                compressing = False;
721                compressed = True;
722                s[len++] = ':';
723             }
724             if (i > 0) {
725                s[len++] = ':';
726             }
727             len += VG_(sprintf)(s + len, "%x", word);
728          }
729       }
730 
731       if (compressing) {
732          s[len++] = ':';
733          s[len++] = ':';
734       }
735 
736       s[len++] = 0;
737    }
738 
739    return;
740 }
741 
742 static
inet6_to_name(struct vki_sockaddr_in6 * sa,UInt len,HChar * name)743 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
744 {
745    if (sa == NULL || len == 0) {
746       VG_(sprintf)(name, "<unknown>");
747    } else if (sa->sin6_port == 0) {
748       VG_(sprintf)(name, "<unbound>");
749    } else {
750       HChar addr[100];    // large enough
751       inet6_format(addr, (void *)&(sa->sin6_addr));
752       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
753    }
754 
755    return name;
756 }
757 
758 /*
759  * Try get some details about a socket.
760  */
761 static void
getsockdetails(Int fd)762 getsockdetails(Int fd)
763 {
764    union u {
765       struct vki_sockaddr a;
766       struct vki_sockaddr_in in;
767       struct vki_sockaddr_in6 in6;
768       struct vki_sockaddr_un un;
769    } laddr;
770    Int llen;
771 
772    llen = sizeof(laddr);
773    VG_(memset)(&laddr, 0, llen);
774 
775    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
776       switch(laddr.a.sa_family) {
777       case VKI_AF_INET: {
778          HChar lname[32];   // large enough
779          HChar pname[32];   // large enough
780          struct vki_sockaddr_in paddr;
781          Int plen = sizeof(struct vki_sockaddr_in);
782 
783          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
784             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
785                          inet_to_name(&(laddr.in), llen, lname),
786                          inet_to_name(&paddr, plen, pname));
787          } else {
788             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
789                          fd, inet_to_name(&(laddr.in), llen, lname));
790          }
791          return;
792          }
793       case VKI_AF_INET6: {
794          HChar lname[128];  // large enough
795          HChar pname[128];  // large enough
796          struct vki_sockaddr_in6 paddr;
797          Int plen = sizeof(struct vki_sockaddr_in6);
798 
799          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
800             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
801                          inet6_to_name(&(laddr.in6), llen, lname),
802                          inet6_to_name(&paddr, plen, pname));
803          } else {
804             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
805                          fd, inet6_to_name(&(laddr.in6), llen, lname));
806          }
807          return;
808          }
809       case VKI_AF_UNIX: {
810          static char lname[256];
811          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
812                       unix_to_name(&(laddr.un), llen, lname));
813          return;
814          }
815       default:
816          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
817                       laddr.a.sa_family, fd);
818          return;
819       }
820    }
821 
822    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
823 }
824 
825 
826 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)827 void VG_(show_open_fds) (const HChar* when)
828 {
829    OpenFd *i = allocated_fds;
830 
831    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
832 
833    while (i) {
834       if (i->pathname) {
835          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
836                       i->pathname);
837       } else {
838          Int val;
839          Int len = sizeof(val);
840 
841          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
842              == -1) {
843             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
844          } else {
845             getsockdetails(i->fd);
846          }
847       }
848 
849       if(i->where) {
850          VG_(pp_ExeContext)(i->where);
851          VG_(message)(Vg_UserMsg, "\n");
852       } else {
853          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
854          VG_(message)(Vg_UserMsg, "\n");
855       }
856 
857       i = i->next;
858    }
859 
860    VG_(message)(Vg_UserMsg, "\n");
861 }
862 
863 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
864    have /proc support compiled in, or a non-Linux kernel), then we need to
865    find out what file descriptors we inherited from our parent process the
866    hard way - by checking each fd in turn. */
867 static
init_preopened_fds_without_proc_self_fd(void)868 void init_preopened_fds_without_proc_self_fd(void)
869 {
870    struct vki_rlimit lim;
871    UInt count;
872    Int i;
873 
874    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
875       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
876          an arbitrarily high number.  1024 happens to be the limit in
877          the 2.4 Linux kernels. */
878       count = 1024;
879    } else {
880       count = lim.rlim_cur;
881    }
882 
883    for (i = 0; i < count; i++)
884       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
885          ML_(record_fd_open_named)(-1, i);
886 }
887 
888 /* Initialize the list of open file descriptors with the file descriptors
889    we inherited from out parent process. */
890 
VG_(init_preopened_fds)891 void VG_(init_preopened_fds)(void)
892 {
893 // DDD: should probably use HAVE_PROC here or similar, instead.
894 #if defined(VGO_linux)
895    Int ret;
896    struct vki_dirent64 d;
897    SysRes f;
898 
899    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
900    if (sr_isError(f)) {
901       init_preopened_fds_without_proc_self_fd();
902       return;
903    }
904 
905    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
906       if (ret == -1)
907          goto out;
908 
909       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
910          HChar* s;
911          Int fno = VG_(strtoll10)(d.d_name, &s);
912          if (*s == '\0') {
913             if (fno != sr_Res(f))
914                if (VG_(clo_track_fds))
915                   ML_(record_fd_open_named)(-1, fno);
916          } else {
917             VG_(message)(Vg_DebugMsg,
918                "Warning: invalid file name in /proc/self/fd: %s\n",
919                d.d_name);
920          }
921       }
922 
923       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
924    }
925 
926   out:
927    VG_(close)(sr_Res(f));
928 
929 #elif defined(VGO_darwin)
930    init_preopened_fds_without_proc_self_fd();
931 
932 #elif defined(VGO_solaris)
933    Int ret;
934    Char buf[VKI_MAXGETDENTS_SIZE];
935    SysRes f;
936 
937    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
938    if (sr_isError(f)) {
939       init_preopened_fds_without_proc_self_fd();
940       return;
941    }
942 
943    while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
944                                  sizeof(buf))) > 0) {
945       Int i = 0;
946       while (i < ret) {
947          /* Proceed one entry. */
948          struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
949          if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
950             HChar *s;
951             Int fno = VG_(strtoll10)(d->d_name, &s);
952             if (*s == '\0') {
953                if (fno != sr_Res(f))
954                   if (VG_(clo_track_fds))
955                      ML_(record_fd_open_named)(-1, fno);
956             } else {
957                VG_(message)(Vg_DebugMsg,
958                      "Warning: invalid file name in /proc/self/fd: %s\n",
959                      d->d_name);
960             }
961          }
962 
963          /* Move on the next entry. */
964          i += d->d_reclen;
965       }
966    }
967 
968    VG_(close)(sr_Res(f));
969 
970 #else
971 #  error Unknown OS
972 #endif
973 }
974 
975 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)976 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
977                             const HChar *msg, Addr base, SizeT size )
978 {
979    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
980    VG_(sprintf)(outmsg, "sendmsg%s", msg);
981    PRE_MEM_READ( outmsg, base, size );
982 }
983 
984 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)985 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
986                              const HChar *msg, Addr base, SizeT size )
987 {
988    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
989    VG_(sprintf)(outmsg, "recvmsg%s", msg);
990    if ( read )
991       PRE_MEM_READ( outmsg, base, size );
992    else
993       PRE_MEM_WRITE( outmsg, base, size );
994 }
995 
996 static
post_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * fieldName,Addr base,SizeT size)997 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
998                               const HChar *fieldName, Addr base, SizeT size )
999 {
1000    if ( !read )
1001       POST_MEM_WRITE( base, size );
1002 }
1003 
1004 static
msghdr_foreachfield(ThreadId tid,const HChar * name,struct vki_msghdr * msg,UInt length,void (* foreach_func)(ThreadId,Bool,const HChar *,Addr,SizeT),Bool rekv)1005 void msghdr_foreachfield (
1006         ThreadId tid,
1007         const HChar *name,
1008         struct vki_msghdr *msg,
1009         UInt length,
1010         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
1011         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
1012      )
1013 {
1014    HChar fieldName[VG_(strlen)(name) + 32]; // large enough.
1015    Addr a;
1016    SizeT s;
1017 
1018    if ( !msg )
1019       return;
1020 
1021    VG_(sprintf) ( fieldName, "(%s)", name );
1022 
1023    /* FIELDPAIR helps the compiler do one call to foreach_func
1024       for consecutive (no holes) fields. */
1025 #define FIELDPAIR(f1,f2) \
1026    if (offsetof(struct vki_msghdr, f1) + sizeof(msg->f1)                \
1027        == offsetof(struct vki_msghdr, f2))                              \
1028       s += sizeof(msg->f2);                                             \
1029    else {                                                               \
1030       foreach_func (tid, True, fieldName, a, s);                        \
1031       a = (Addr)&msg->f2;                                               \
1032       s = sizeof(msg->f2);                                              \
1033    }
1034 
1035    a = (Addr)&msg->msg_name;
1036    s = sizeof(msg->msg_name);
1037    FIELDPAIR(msg_name,    msg_namelen);
1038    FIELDPAIR(msg_namelen, msg_iov);
1039    FIELDPAIR(msg_iov,     msg_iovlen);
1040    FIELDPAIR(msg_iovlen,  msg_control);
1041    FIELDPAIR(msg_control, msg_controllen);
1042    foreach_func ( tid, True, fieldName, a, s);
1043 #undef FIELDPAIR
1044 
1045    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
1046       the field, but does write to it. */
1047    if ( rekv )
1048       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
1049 
1050    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
1051         && msg->msg_name ) {
1052       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
1053       foreach_func ( tid, False, fieldName,
1054                      (Addr)msg->msg_name, msg->msg_namelen );
1055    }
1056 
1057    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
1058         && msg->msg_iov ) {
1059       struct vki_iovec *iov = msg->msg_iov;
1060       UInt i;
1061 
1062       if (ML_(safe_to_deref)(&msg->msg_iovlen, sizeof (UInt))) {
1063          VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
1064          foreach_func ( tid, True, fieldName, (Addr)iov,
1065                         msg->msg_iovlen * sizeof( struct vki_iovec ) );
1066 
1067          for ( i = 0; i < msg->msg_iovlen && length > 0; ++i, ++iov ) {
1068             if (ML_(safe_to_deref)(&iov->iov_len, sizeof (UInt))) {
1069                UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
1070                VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
1071                foreach_func ( tid, False, fieldName,
1072                               (Addr)iov->iov_base, iov_len );
1073                length = length - iov_len;
1074             }
1075          }
1076       }
1077    }
1078 
1079    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
1080         && msg->msg_control ) {
1081       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
1082       foreach_func ( tid, False, fieldName,
1083                      (Addr)msg->msg_control, msg->msg_controllen );
1084    }
1085 
1086 }
1087 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)1088 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
1089 {
1090    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
1091 
1092    while (cm) {
1093       if (cm->cmsg_level == VKI_SOL_SOCKET
1094           && cm->cmsg_type == VKI_SCM_RIGHTS ) {
1095          Int *fds = (Int *) VKI_CMSG_DATA(cm);
1096          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1097                          / sizeof(int);
1098          Int i;
1099 
1100          for (i = 0; i < fdc; i++)
1101             if(VG_(clo_track_fds))
1102                // XXX: must we check the range on these fds with
1103                //      ML_(fd_allowed)()?
1104                ML_(record_fd_open_named)(tid, fds[i]);
1105       }
1106 
1107       cm = VKI_CMSG_NXTHDR(msg, cm);
1108    }
1109 }
1110 
1111 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1112 static
pre_mem_read_sockaddr(ThreadId tid,const HChar * description,struct vki_sockaddr * sa,UInt salen)1113 void pre_mem_read_sockaddr ( ThreadId tid,
1114                              const HChar *description,
1115                              struct vki_sockaddr *sa, UInt salen )
1116 {
1117    HChar outmsg[VG_(strlen)( description ) + 30]; // large enough
1118    struct vki_sockaddr_un*  saun = (struct vki_sockaddr_un *)sa;
1119    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
1120    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1121 #  ifdef VKI_AF_BLUETOOTH
1122    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
1123 #  endif
1124 #  ifdef VKI_AF_NETLINK
1125    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
1126 #  endif
1127 
1128    /* NULL/zero-length sockaddrs are legal */
1129    if ( sa == NULL || salen == 0 ) return;
1130 
1131    VG_(sprintf) ( outmsg, description, "sa_family" );
1132    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1133 
1134    /* Don't do any extra checking if we cannot determine the sa_family. */
1135    if (! ML_(safe_to_deref) (&sa->sa_family, sizeof(vki_sa_family_t)))
1136       return;
1137 
1138    switch (sa->sa_family) {
1139 
1140       case VKI_AF_UNIX:
1141          if (ML_(safe_to_deref) (&saun->sun_path, sizeof (Addr))) {
1142             VG_(sprintf) ( outmsg, description, "sun_path" );
1143             PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
1144             // GrP fixme max of sun_len-2? what about nul char?
1145          }
1146          break;
1147 
1148       case VKI_AF_INET:
1149          VG_(sprintf) ( outmsg, description, "sin_port" );
1150          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1151          VG_(sprintf) ( outmsg, description, "sin_addr" );
1152          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1153          break;
1154 
1155       case VKI_AF_INET6:
1156          VG_(sprintf) ( outmsg, description, "sin6_port" );
1157          PRE_MEM_READ( outmsg,
1158             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1159          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1160          PRE_MEM_READ( outmsg,
1161             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1162          VG_(sprintf) ( outmsg, description, "sin6_addr" );
1163          PRE_MEM_READ( outmsg,
1164             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1165          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1166          PRE_MEM_READ( outmsg,
1167             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1168          break;
1169 
1170 #     ifdef VKI_AF_BLUETOOTH
1171       case VKI_AF_BLUETOOTH:
1172          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1173          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1174          VG_(sprintf) ( outmsg, description, "rc_channel" );
1175          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1176          break;
1177 #     endif
1178 
1179 #     ifdef VKI_AF_NETLINK
1180       case VKI_AF_NETLINK:
1181          VG_(sprintf)(outmsg, description, "nl_pid");
1182          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
1183          VG_(sprintf)(outmsg, description, "nl_groups");
1184          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
1185          break;
1186 #     endif
1187 
1188 #     ifdef VKI_AF_UNSPEC
1189       case VKI_AF_UNSPEC:
1190          break;
1191 #     endif
1192 
1193       default:
1194          /* No specific information about this address family.
1195             Let's just check the full data following the family.
1196             Note that this can give false positive if this (unknown)
1197             struct sockaddr_???? has padding bytes between its elements. */
1198          VG_(sprintf) ( outmsg, description, "sa_data" );
1199          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
1200                        salen -  sizeof(sa->sa_family));
1201          break;
1202    }
1203 }
1204 
1205 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,const HChar * s)1206 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1207 {
1208    UInt* a_p = (UInt*)a;
1209    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1210    if (a_p == NULL || ! ML_(safe_to_deref) (a_p, sizeof(UInt)))
1211       return 0;
1212    else
1213       return *a_p;
1214 }
1215 
ML_(buf_and_len_pre_check)1216 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1217                                   const HChar* buf_s, const HChar* buflen_s )
1218 {
1219    if (VG_(tdict).track_pre_mem_write) {
1220       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1221       if (buflen_in > 0) {
1222          VG_(tdict).track_pre_mem_write(
1223             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1224       }
1225    }
1226 }
1227 
ML_(buf_and_len_post_check)1228 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1229                                    Addr buf_p, Addr buflen_p, const HChar* s )
1230 {
1231    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1232       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1233       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1234          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1235       }
1236    }
1237 }
1238 
1239 /* ---------------------------------------------------------------------
1240    Data seg end, for brk()
1241    ------------------------------------------------------------------ */
1242 
1243 /*   +--------+------------+
1244      | anon   |    resvn   |
1245      +--------+------------+
1246 
1247      ^     ^  ^
1248      |     |  boundary is page aligned
1249      |     VG_(brk_limit) -- no alignment constraint
1250      VG_(brk_base) -- page aligned -- does not move
1251 
1252      Both the anon part and the reservation part are always at least
1253      one page.
1254 */
1255 
1256 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1257    NEWBRK, else return the current data segment end. */
1258 
do_brk(Addr newbrk,ThreadId tid)1259 static Addr do_brk ( Addr newbrk, ThreadId tid )
1260 {
1261    NSegment const* aseg;
1262    Addr newbrkP;
1263    SizeT delta;
1264    Bool debug = False;
1265 
1266    if (debug)
1267       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1268 		  VG_(brk_base), VG_(brk_limit), newbrk);
1269 
1270    if (0) VG_(am_show_nsegments)(0, "in_brk");
1271 
1272    if (newbrk < VG_(brk_base))
1273       /* Clearly impossible. */
1274       goto bad;
1275 
1276    if (newbrk < VG_(brk_limit)) {
1277       /* shrinking the data segment.  Be lazy and don't munmap the
1278          excess area. */
1279       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1280       vg_assert(seg);
1281 
1282       if (seg->hasT)
1283          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1284                                     "do_brk(shrink)" );
1285       /* Since we're being lazy and not unmapping pages, we have to
1286          zero out the area, so that if the area later comes back into
1287          circulation, it will be filled with zeroes, as if it really
1288          had been unmapped and later remapped.  Be a bit paranoid and
1289          try hard to ensure we're not going to segfault by doing the
1290          write - check both ends of the range are in the same segment
1291          and that segment is writable. */
1292       NSegment const * seg2;
1293 
1294       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
1295       vg_assert(seg2);
1296 
1297       if (seg == seg2 && seg->hasW)
1298          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1299 
1300       VG_(brk_limit) = newbrk;
1301       return newbrk;
1302    }
1303 
1304    /* otherwise we're expanding the brk segment. */
1305    if (VG_(brk_limit) > VG_(brk_base))
1306       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1307    else
1308       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1309 
1310    /* These should be assured by setup_client_dataseg in m_main. */
1311    vg_assert(aseg);
1312    vg_assert(aseg->kind == SkAnonC);
1313 
1314    if (newbrk <= aseg->end + 1) {
1315       /* still fits within the anon segment. */
1316       VG_(brk_limit) = newbrk;
1317       return newbrk;
1318    }
1319 
1320    newbrkP = VG_PGROUNDUP(newbrk);
1321    delta = newbrkP - (aseg->end + 1);
1322    vg_assert(delta > 0);
1323    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1324 
1325    Bool overflow = False;
1326    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
1327                                                           &overflow)) {
1328       if (overflow) {
1329          static Bool alreadyComplained = False;
1330          if (!alreadyComplained) {
1331             alreadyComplained = True;
1332             if (VG_(clo_verbosity) > 0) {
1333                VG_(umsg)("brk segment overflow in thread #%u: "
1334                          "can't grow to %#lx\n",
1335                          tid, newbrkP);
1336                VG_(umsg)("(see section Limitations in user manual)\n");
1337                VG_(umsg)("NOTE: further instances of this message "
1338                          "will not be shown\n");
1339             }
1340          }
1341       } else {
1342          if (VG_(clo_verbosity) > 0) {
1343             VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
1344                       "to %#lx\n", tid, newbrkP);
1345             VG_(umsg)("(see section Limitations in user manual)\n");
1346          }
1347       }
1348       goto bad;
1349    }
1350 
1351    VG_(brk_limit) = newbrk;
1352    return newbrk;
1353 
1354   bad:
1355    return VG_(brk_limit);
1356 }
1357 
1358 
1359 /* ---------------------------------------------------------------------
1360    Vet file descriptors for sanity
1361    ------------------------------------------------------------------ */
1362 /*
1363 > - what does the "Bool soft" parameter mean?
1364 
1365 (Tom Hughes, 3 Oct 05):
1366 
1367 Whether or not to consider a file descriptor invalid if it is above
1368 the current soft limit.
1369 
1370 Basically if we are testing whether a newly created file descriptor is
1371 valid (in a post handler) then we set soft to true, and if we are
1372 testing whether a file descriptor that is about to be used (in a pre
1373 handler) is valid [viz, an already-existing fd] then we set it to false.
1374 
1375 The point is that if the (virtual) soft limit is lowered then any
1376 existing descriptors can still be read/written/closed etc (so long as
1377 they are below the valgrind reserved descriptors) but no new
1378 descriptors can be created above the new soft limit.
1379 
1380 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1381 */
1382 
1383 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1384 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1385                      Bool isNewFd)
1386 {
1387    Bool allowed = True;
1388 
1389    /* hard limits always apply */
1390    if (fd < 0 || fd >= VG_(fd_hard_limit))
1391       allowed = False;
1392 
1393    /* hijacking the output fds is never allowed */
1394    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1395       allowed = False;
1396 
1397    /* if creating a new fd (rather than using an existing one), the
1398       soft limit must also be observed */
1399    if (isNewFd && fd >= VG_(fd_soft_limit))
1400       allowed = False;
1401 
1402    /* this looks like it ought to be included, but causes problems: */
1403    /*
1404    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1405       allowed = False;
1406    */
1407    /* The difficulty is as follows: consider a program P which expects
1408       to be able to mess with (redirect) its own stderr (fd 2).
1409       Usually to deal with P we would issue command line flags to send
1410       logging somewhere other than stderr, so as not to disrupt P.
1411       The problem is that -d unilaterally hijacks stderr with no
1412       consultation with P.  And so, if this check is enabled, P will
1413       work OK normally but fail if -d is issued.
1414 
1415       Basically -d is a hack and you take your chances when using it.
1416       It's very useful for low level debugging -- particularly at
1417       startup -- and having its presence change the behaviour of the
1418       client is exactly what we don't want.  */
1419 
1420    /* croak? */
1421    if ((!allowed) && VG_(showing_core_errors)() ) {
1422       VG_(message)(Vg_UserMsg,
1423          "Warning: invalid file descriptor %d in syscall %s()\n",
1424          fd, syscallname);
1425       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1426 	 VG_(message)(Vg_UserMsg,
1427             "   Use --log-fd=<number> to select an alternative log fd.\n");
1428       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1429 	 VG_(message)(Vg_UserMsg,
1430             "   Use --xml-fd=<number> to select an alternative XML "
1431             "output fd.\n");
1432       // DDD: consider always printing this stack trace, it's useful.
1433       // Also consider also making this a proper core error, ie.
1434       // suppressible and all that.
1435       if (VG_(clo_verbosity) > 1) {
1436          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1437       }
1438    }
1439 
1440    return allowed;
1441 }
1442 
1443 
1444 /* ---------------------------------------------------------------------
1445    Deal with a bunch of socket-related syscalls
1446    ------------------------------------------------------------------ */
1447 
1448 /* ------ */
1449 
1450 void
ML_(generic_PRE_sys_socketpair)1451 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1452                                   UWord arg0, UWord arg1,
1453                                   UWord arg2, UWord arg3 )
1454 {
1455    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1456    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1457                   arg3, 2*sizeof(int) );
1458 }
1459 
1460 SysRes
ML_(generic_POST_sys_socketpair)1461 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1462                                    SysRes res,
1463                                    UWord arg0, UWord arg1,
1464                                    UWord arg2, UWord arg3 )
1465 {
1466    SysRes r = res;
1467    Int fd1 = ((Int*)arg3)[0];
1468    Int fd2 = ((Int*)arg3)[1];
1469    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1470    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1471    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1472        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1473       VG_(close)(fd1);
1474       VG_(close)(fd2);
1475       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1476    } else {
1477       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1478       if (VG_(clo_track_fds)) {
1479          ML_(record_fd_open_nameless)(tid, fd1);
1480          ML_(record_fd_open_nameless)(tid, fd2);
1481       }
1482    }
1483    return r;
1484 }
1485 
1486 /* ------ */
1487 
1488 SysRes
ML_(generic_POST_sys_socket)1489 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1490 {
1491    SysRes r = res;
1492    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1493    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1494       VG_(close)(sr_Res(res));
1495       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1496    } else {
1497       if (VG_(clo_track_fds))
1498          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1499    }
1500    return r;
1501 }
1502 
1503 /* ------ */
1504 
1505 void
ML_(generic_PRE_sys_bind)1506 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1507                             UWord arg0, UWord arg1, UWord arg2 )
1508 {
1509    /* int bind(int sockfd, struct sockaddr *my_addr,
1510                int addrlen); */
1511    pre_mem_read_sockaddr(
1512       tid, "socketcall.bind(my_addr.%s)",
1513       (struct vki_sockaddr *) arg1, arg2
1514    );
1515 }
1516 
1517 /* ------ */
1518 
1519 void
ML_(generic_PRE_sys_accept)1520 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1521                               UWord arg0, UWord arg1, UWord arg2 )
1522 {
1523    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1524    Addr addr_p     = arg1;
1525    Addr addrlen_p  = arg2;
1526    if (addr_p != (Addr)NULL)
1527       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1528                                    "socketcall.accept(addr)",
1529                                    "socketcall.accept(addrlen_in)" );
1530 }
1531 
1532 SysRes
ML_(generic_POST_sys_accept)1533 ML_(generic_POST_sys_accept) ( ThreadId tid,
1534                                SysRes res,
1535                                UWord arg0, UWord arg1, UWord arg2 )
1536 {
1537    SysRes r = res;
1538    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1539    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1540       VG_(close)(sr_Res(res));
1541       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1542    } else {
1543       Addr addr_p     = arg1;
1544       Addr addrlen_p  = arg2;
1545       if (addr_p != (Addr)NULL)
1546          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1547                                        "socketcall.accept(addrlen_out)" );
1548       if (VG_(clo_track_fds))
1549           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1550    }
1551    return r;
1552 }
1553 
1554 /* ------ */
1555 
1556 void
ML_(generic_PRE_sys_sendto)1557 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1558                               UWord arg0, UWord arg1, UWord arg2,
1559                               UWord arg3, UWord arg4, UWord arg5 )
1560 {
1561    /* int sendto(int s, const void *msg, int len,
1562                  unsigned int flags,
1563                  const struct sockaddr *to, int tolen); */
1564    PRE_MEM_READ( "socketcall.sendto(msg)",
1565                  arg1, /* msg */
1566                  arg2  /* len */ );
1567    pre_mem_read_sockaddr(
1568       tid, "socketcall.sendto(to.%s)",
1569       (struct vki_sockaddr *) arg4, arg5
1570    );
1571 }
1572 
1573 /* ------ */
1574 
1575 void
ML_(generic_PRE_sys_send)1576 ML_(generic_PRE_sys_send) ( ThreadId tid,
1577                             UWord arg0, UWord arg1, UWord arg2 )
1578 {
1579    /* int send(int s, const void *msg, size_t len, int flags); */
1580    PRE_MEM_READ( "socketcall.send(msg)",
1581                   arg1, /* msg */
1582                   arg2  /* len */ );
1583 
1584 }
1585 
1586 /* ------ */
1587 
1588 void
ML_(generic_PRE_sys_recvfrom)1589 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1590                                 UWord arg0, UWord arg1, UWord arg2,
1591                                 UWord arg3, UWord arg4, UWord arg5 )
1592 {
1593    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1594                    struct sockaddr *from, int *fromlen); */
1595    Addr buf_p      = arg1;
1596    Int  len        = arg2;
1597    Addr from_p     = arg4;
1598    Addr fromlen_p  = arg5;
1599    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1600    if (from_p != (Addr)NULL)
1601       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1602                                    "socketcall.recvfrom(from)",
1603                                    "socketcall.recvfrom(fromlen_in)" );
1604 }
1605 
1606 void
ML_(generic_POST_sys_recvfrom)1607 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1608                                  SysRes res,
1609                                  UWord arg0, UWord arg1, UWord arg2,
1610                                  UWord arg3, UWord arg4, UWord arg5 )
1611 {
1612    Addr buf_p      = arg1;
1613    Int  len        = arg2;
1614    Addr from_p     = arg4;
1615    Addr fromlen_p  = arg5;
1616 
1617    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1618    if (from_p != (Addr)NULL)
1619       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1620                                     "socketcall.recvfrom(fromlen_out)" );
1621    POST_MEM_WRITE( buf_p, len );
1622 }
1623 
1624 /* ------ */
1625 
1626 void
ML_(generic_PRE_sys_recv)1627 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1628                             UWord arg0, UWord arg1, UWord arg2 )
1629 {
1630    /* int recv(int s, void *buf, int len, unsigned int flags); */
1631    /* man 2 recv says:
1632       The  recv call is normally used only on a connected socket
1633       (see connect(2)) and is identical to recvfrom with a  NULL
1634       from parameter.
1635    */
1636    PRE_MEM_WRITE( "socketcall.recv(buf)",
1637                   arg1, /* buf */
1638                   arg2  /* len */ );
1639 }
1640 
1641 void
ML_(generic_POST_sys_recv)1642 ML_(generic_POST_sys_recv) ( ThreadId tid,
1643                              UWord res,
1644                              UWord arg0, UWord arg1, UWord arg2 )
1645 {
1646    if (res >= 0 && arg1 != 0) {
1647       POST_MEM_WRITE( arg1, /* buf */
1648                       arg2  /* len */ );
1649    }
1650 }
1651 
1652 /* ------ */
1653 
1654 void
ML_(generic_PRE_sys_connect)1655 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1656                                UWord arg0, UWord arg1, UWord arg2 )
1657 {
1658    /* int connect(int sockfd,
1659                   struct sockaddr *serv_addr, int addrlen ); */
1660    pre_mem_read_sockaddr( tid,
1661                           "socketcall.connect(serv_addr.%s)",
1662                           (struct vki_sockaddr *) arg1, arg2);
1663 }
1664 
1665 /* ------ */
1666 
1667 void
ML_(generic_PRE_sys_setsockopt)1668 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1669                                   UWord arg0, UWord arg1, UWord arg2,
1670                                   UWord arg3, UWord arg4 )
1671 {
1672    /* int setsockopt(int s, int level, int optname,
1673                      const void *optval, int optlen); */
1674    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1675                  arg3, /* optval */
1676                  arg4  /* optlen */ );
1677 }
1678 
1679 /* ------ */
1680 
1681 void
ML_(generic_PRE_sys_getsockname)1682 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1683                                    UWord arg0, UWord arg1, UWord arg2 )
1684 {
1685    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1686    Addr name_p     = arg1;
1687    Addr namelen_p  = arg2;
1688    /* Nb: name_p cannot be NULL */
1689    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1690                                 "socketcall.getsockname(name)",
1691                                 "socketcall.getsockname(namelen_in)" );
1692 }
1693 
1694 void
ML_(generic_POST_sys_getsockname)1695 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1696                                     SysRes res,
1697                                     UWord arg0, UWord arg1, UWord arg2 )
1698 {
1699    Addr name_p     = arg1;
1700    Addr namelen_p  = arg2;
1701    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1702    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1703                                  "socketcall.getsockname(namelen_out)" );
1704 }
1705 
1706 /* ------ */
1707 
1708 void
ML_(generic_PRE_sys_getpeername)1709 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1710                                    UWord arg0, UWord arg1, UWord arg2 )
1711 {
1712    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1713    Addr name_p     = arg1;
1714    Addr namelen_p  = arg2;
1715    /* Nb: name_p cannot be NULL */
1716    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1717                                 "socketcall.getpeername(name)",
1718                                 "socketcall.getpeername(namelen_in)" );
1719 }
1720 
1721 void
ML_(generic_POST_sys_getpeername)1722 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1723                                     SysRes res,
1724                                     UWord arg0, UWord arg1, UWord arg2 )
1725 {
1726    Addr name_p     = arg1;
1727    Addr namelen_p  = arg2;
1728    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1729    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1730                                  "socketcall.getpeername(namelen_out)" );
1731 }
1732 
1733 /* ------ */
1734 
1735 void
ML_(generic_PRE_sys_sendmsg)1736 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1737                                struct vki_msghdr *msg )
1738 {
1739    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1740 }
1741 
1742 /* ------ */
1743 
1744 void
ML_(generic_PRE_sys_recvmsg)1745 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1746                                struct vki_msghdr *msg )
1747 {
1748    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1749 }
1750 
1751 void
ML_(generic_POST_sys_recvmsg)1752 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1753                                 struct vki_msghdr *msg, UInt length )
1754 {
1755    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1756    check_cmsg_for_fds( tid, msg );
1757 }
1758 
1759 
1760 /* ---------------------------------------------------------------------
1761    Deal with a bunch of IPC related syscalls
1762    ------------------------------------------------------------------ */
1763 
1764 /* ------ */
1765 
1766 void
ML_(generic_PRE_sys_semop)1767 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1768                              UWord arg0, UWord arg1, UWord arg2 )
1769 {
1770    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1771    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1772 }
1773 
1774 /* ------ */
1775 
1776 void
ML_(generic_PRE_sys_semtimedop)1777 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1778                                   UWord arg0, UWord arg1,
1779                                   UWord arg2, UWord arg3 )
1780 {
1781    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1782                      struct timespec *timeout); */
1783    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1784    if (arg3 != 0)
1785       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1786 }
1787 
1788 /* ------ */
1789 
1790 static
get_sem_count(Int semid)1791 UInt get_sem_count( Int semid )
1792 {
1793    struct vki_semid_ds buf;
1794    union vki_semun arg;
1795    SysRes res;
1796 
1797    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1798       (experimental) otherwise complains that the use in the return
1799       statement below is uninitialised. */
1800    buf.sem_nsems = 0;
1801 
1802    arg.buf = &buf;
1803 
1804 #  if defined(__NR_semctl)
1805    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1806 #  elif defined(__NR_semsys) /* Solaris */
1807    res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
1808                           *(UWord *)&arg);
1809 #  else
1810    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1811                           VKI_IPC_STAT, (UWord)&arg);
1812 #  endif
1813    if (sr_isError(res))
1814       return 0;
1815 
1816    return buf.sem_nsems;
1817 }
1818 
1819 void
ML_(generic_PRE_sys_semctl)1820 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1821                               UWord arg0, UWord arg1,
1822                               UWord arg2, UWord arg3 )
1823 {
1824    /* int semctl(int semid, int semnum, int cmd, ...); */
1825    union vki_semun arg = *(union vki_semun *)&arg3;
1826    UInt nsems;
1827    switch (arg2 /* cmd */) {
1828 #if defined(VKI_IPC_INFO)
1829    case VKI_IPC_INFO:
1830    case VKI_SEM_INFO:
1831    case VKI_IPC_INFO|VKI_IPC_64:
1832    case VKI_SEM_INFO|VKI_IPC_64:
1833       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1834                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1835       break;
1836 #endif
1837 
1838    case VKI_IPC_STAT:
1839 #if defined(VKI_SEM_STAT)
1840    case VKI_SEM_STAT:
1841 #endif
1842       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1843                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1844       break;
1845 
1846 #if defined(VKI_IPC_64)
1847    case VKI_IPC_STAT|VKI_IPC_64:
1848 #if defined(VKI_SEM_STAT)
1849    case VKI_SEM_STAT|VKI_IPC_64:
1850 #endif
1851 #endif
1852 #if defined(VKI_IPC_STAT64)
1853    case VKI_IPC_STAT64:
1854 #endif
1855 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1856       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1857                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1858       break;
1859 #endif
1860 
1861    case VKI_IPC_SET:
1862       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1863                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1864       break;
1865 
1866 #if defined(VKI_IPC_64)
1867    case VKI_IPC_SET|VKI_IPC_64:
1868 #endif
1869 #if defined(VKI_IPC_SET64)
1870    case VKI_IPC_SET64:
1871 #endif
1872 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
1873       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1874                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1875       break;
1876 #endif
1877 
1878    case VKI_GETALL:
1879 #if defined(VKI_IPC_64)
1880    case VKI_GETALL|VKI_IPC_64:
1881 #endif
1882       nsems = get_sem_count( arg0 );
1883       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1884                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1885       break;
1886 
1887    case VKI_SETALL:
1888 #if defined(VKI_IPC_64)
1889    case VKI_SETALL|VKI_IPC_64:
1890 #endif
1891       nsems = get_sem_count( arg0 );
1892       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1893                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1894       break;
1895    }
1896 }
1897 
1898 void
ML_(generic_POST_sys_semctl)1899 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1900                                UWord res,
1901                                UWord arg0, UWord arg1,
1902                                UWord arg2, UWord arg3 )
1903 {
1904    union vki_semun arg = *(union vki_semun *)&arg3;
1905    UInt nsems;
1906    switch (arg2 /* cmd */) {
1907 #if defined(VKI_IPC_INFO)
1908    case VKI_IPC_INFO:
1909    case VKI_SEM_INFO:
1910    case VKI_IPC_INFO|VKI_IPC_64:
1911    case VKI_SEM_INFO|VKI_IPC_64:
1912       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1913       break;
1914 #endif
1915 
1916    case VKI_IPC_STAT:
1917 #if defined(VKI_SEM_STAT)
1918    case VKI_SEM_STAT:
1919 #endif
1920       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1921       break;
1922 
1923 #if defined(VKI_IPC_64)
1924    case VKI_IPC_STAT|VKI_IPC_64:
1925    case VKI_SEM_STAT|VKI_IPC_64:
1926 #endif
1927 #if defined(VKI_IPC_STAT64)
1928    case VKI_IPC_STAT64:
1929 #endif
1930 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1931       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1932       break;
1933 #endif
1934 
1935    case VKI_GETALL:
1936 #if defined(VKI_IPC_64)
1937    case VKI_GETALL|VKI_IPC_64:
1938 #endif
1939       nsems = get_sem_count( arg0 );
1940       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1941       break;
1942    }
1943 }
1944 
1945 /* ------ */
1946 
1947 /* ------ */
1948 
1949 static
get_shm_size(Int shmid)1950 SizeT get_shm_size ( Int shmid )
1951 {
1952 #if defined(__NR_shmctl)
1953 #  ifdef VKI_IPC_64
1954    struct vki_shmid64_ds buf;
1955 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
1956      /* See bug 222545 comment 7 */
1957      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1958                                      VKI_IPC_STAT, (UWord)&buf);
1959 #    else
1960      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1961                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1962 #    endif
1963 #  else /* !def VKI_IPC_64 */
1964    struct vki_shmid_ds buf;
1965    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1966 #  endif /* def VKI_IPC_64 */
1967 #elif defined(__NR_shmsys) /* Solaris */
1968    struct vki_shmid_ds buf;
1969    SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
1970                          (UWord)&buf);
1971 #else
1972    struct vki_shmid_ds buf;
1973    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1974                                  VKI_IPC_STAT, 0, (UWord)&buf);
1975 #endif
1976    if (sr_isError(__res))
1977       return 0;
1978 
1979    return (SizeT) buf.shm_segsz;
1980 }
1981 
1982 UWord
ML_(generic_PRE_sys_shmat)1983 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1984                              UWord arg0, UWord arg1, UWord arg2 )
1985 {
1986    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1987    SizeT  segmentSize = get_shm_size ( arg0 );
1988    UWord tmp;
1989    Bool  ok;
1990    if (arg1 == 0) {
1991       /* arm-linux only: work around the fact that
1992          VG_(am_get_advisory_client_simple) produces something that is
1993          VKI_PAGE_SIZE aligned, whereas what we want is something
1994          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1995          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1996          then round the result up to the next VKI_SHMLBA boundary.
1997          See bug 222545 comment 15.  So far, arm-linux is the only
1998          platform where this is known to be necessary. */
1999       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
2000       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2001          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
2002       }
2003       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
2004       if (ok) {
2005          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2006             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
2007          } else {
2008             arg1 = tmp;
2009          }
2010       }
2011    }
2012    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
2013       arg1 = 0;
2014    return arg1;
2015 }
2016 
2017 void
ML_(generic_POST_sys_shmat)2018 ML_(generic_POST_sys_shmat) ( ThreadId tid,
2019                               UWord res,
2020                               UWord arg0, UWord arg1, UWord arg2 )
2021 {
2022    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
2023    if ( segmentSize > 0 ) {
2024       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
2025       Bool d;
2026 
2027       if (arg2 & VKI_SHM_RDONLY)
2028          prot &= ~VKI_PROT_WRITE;
2029       /* It isn't exactly correct to pass 0 for the fd and offset
2030          here.  The kernel seems to think the corresponding section
2031          does have dev/ino numbers:
2032 
2033          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
2034 
2035          However there is no obvious way to find them.  In order to
2036          cope with the discrepancy, aspacem's sync checker omits the
2037          dev/ino correspondence check in cases where V does not know
2038          the dev/ino. */
2039       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
2040 
2041       /* we don't distinguish whether it's read-only or
2042        * read-write -- it doesn't matter really. */
2043       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
2044                               0/*di_handle*/ );
2045       if (d)
2046          VG_(discard_translations)( (Addr)res,
2047                                     (ULong)VG_PGROUNDUP(segmentSize),
2048                                     "ML_(generic_POST_sys_shmat)" );
2049    }
2050 }
2051 
2052 /* ------ */
2053 
2054 Bool
ML_(generic_PRE_sys_shmdt)2055 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
2056 {
2057    /* int shmdt(const void *shmaddr); */
2058    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
2059 }
2060 
2061 void
ML_(generic_POST_sys_shmdt)2062 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
2063 {
2064    NSegment const* s = VG_(am_find_nsegment)(arg0);
2065 
2066    if (s != NULL) {
2067       Addr  s_start = s->start;
2068       SizeT s_len   = s->end+1 - s->start;
2069       Bool  d;
2070 
2071       vg_assert(s->kind == SkShmC);
2072       vg_assert(s->start == arg0);
2073 
2074       d = VG_(am_notify_munmap)(s_start, s_len);
2075       s = NULL; /* s is now invalid */
2076       VG_TRACK( die_mem_munmap, s_start, s_len );
2077       if (d)
2078          VG_(discard_translations)( s_start,
2079                                     (ULong)s_len,
2080                                     "ML_(generic_POST_sys_shmdt)" );
2081    }
2082 }
2083 /* ------ */
2084 
2085 void
ML_(generic_PRE_sys_shmctl)2086 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
2087                               UWord arg0, UWord arg1, UWord arg2 )
2088 {
2089    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
2090    switch (arg1 /* cmd */) {
2091 #if defined(VKI_IPC_INFO)
2092    case VKI_IPC_INFO:
2093       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2094                      arg2, sizeof(struct vki_shminfo) );
2095       break;
2096 #if defined(VKI_IPC_64)
2097    case VKI_IPC_INFO|VKI_IPC_64:
2098       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2099                      arg2, sizeof(struct vki_shminfo64) );
2100       break;
2101 #endif
2102 #endif
2103 
2104 #if defined(VKI_SHM_INFO)
2105    case VKI_SHM_INFO:
2106 #if defined(VKI_IPC_64)
2107    case VKI_SHM_INFO|VKI_IPC_64:
2108 #endif
2109       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
2110                      arg2, sizeof(struct vki_shm_info) );
2111       break;
2112 #endif
2113 
2114    case VKI_IPC_STAT:
2115 #if defined(VKI_SHM_STAT)
2116    case VKI_SHM_STAT:
2117 #endif
2118       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
2119                      arg2, sizeof(struct vki_shmid_ds) );
2120       break;
2121 
2122 #if defined(VKI_IPC_64)
2123    case VKI_IPC_STAT|VKI_IPC_64:
2124    case VKI_SHM_STAT|VKI_IPC_64:
2125       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
2126                      arg2, sizeof(struct vki_shmid64_ds) );
2127       break;
2128 #endif
2129 
2130    case VKI_IPC_SET:
2131       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2132                     arg2, sizeof(struct vki_shmid_ds) );
2133       break;
2134 
2135 #if defined(VKI_IPC_64)
2136    case VKI_IPC_SET|VKI_IPC_64:
2137       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2138                     arg2, sizeof(struct vki_shmid64_ds) );
2139       break;
2140 #endif
2141    }
2142 }
2143 
2144 void
ML_(generic_POST_sys_shmctl)2145 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
2146                                UWord res,
2147                                UWord arg0, UWord arg1, UWord arg2 )
2148 {
2149    switch (arg1 /* cmd */) {
2150 #if defined(VKI_IPC_INFO)
2151    case VKI_IPC_INFO:
2152       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2153       break;
2154    case VKI_IPC_INFO|VKI_IPC_64:
2155       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2156       break;
2157 #endif
2158 
2159 #if defined(VKI_SHM_INFO)
2160    case VKI_SHM_INFO:
2161    case VKI_SHM_INFO|VKI_IPC_64:
2162       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2163       break;
2164 #endif
2165 
2166    case VKI_IPC_STAT:
2167 #if defined(VKI_SHM_STAT)
2168    case VKI_SHM_STAT:
2169 #endif
2170       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2171       break;
2172 
2173 #if defined(VKI_IPC_64)
2174    case VKI_IPC_STAT|VKI_IPC_64:
2175    case VKI_SHM_STAT|VKI_IPC_64:
2176       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2177       break;
2178 #endif
2179 
2180 
2181    }
2182 }
2183 
2184 /* ---------------------------------------------------------------------
2185    Generic handler for mmap
2186    ------------------------------------------------------------------ */
2187 
2188 /*
2189  * Although mmap is specified by POSIX and the argument are generally
2190  * consistent across platforms the precise details of the low level
2191  * argument passing conventions differ. For example:
2192  *
2193  * - On x86-linux there is mmap (aka old_mmap) which takes the
2194  *   arguments in a memory block and the offset in bytes; and
2195  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2196  *   way and the offset in pages.
2197  *
2198  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2199  *   arguments in the normal way and the offset in bytes; and
2200  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2201  *   way and the offset in pages.
2202  *
2203  * - On amd64-linux everything is simple and there is just the one
2204  *   call, mmap (aka sys_mmap)  which takes the arguments in the
2205  *   normal way and the offset in bytes.
2206  *
2207  * - On s390x-linux there is mmap (aka old_mmap) which takes the
2208  *   arguments in a memory block and the offset in bytes. mmap2
2209  *   is also available (but not exported via unistd.h) with
2210  *   arguments in a memory block and the offset in pages.
2211  *
2212  * To cope with all this we provide a generic handler function here
2213  * and then each platform implements one or more system call handlers
2214  * which call this generic routine after extracting and normalising
2215  * the arguments.
2216  */
2217 
2218 SysRes
ML_(generic_PRE_sys_mmap)2219 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2220                             UWord arg1, UWord arg2, UWord arg3,
2221                             UWord arg4, UWord arg5, Off64T arg6 )
2222 {
2223    Addr       advised;
2224    SysRes     sres;
2225    MapRequest mreq;
2226    Bool       mreq_ok;
2227 
2228 #  if defined(VGO_darwin)
2229    // Nb: we can't use this on Darwin, it has races:
2230    // * needs to RETRY if advisory succeeds but map fails
2231    //   (could have been some other thread in a nonblocking call)
2232    // * needs to not use fixed-position mmap() on Darwin
2233    //   (mmap will cheerfully smash whatever's already there, which might
2234    //   be a new mapping from some other thread in a nonblocking call)
2235    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2236 #  endif
2237 
2238    if (arg2 == 0) {
2239       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2240          shall be established. */
2241       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2242    }
2243 
2244    if (!VG_IS_PAGE_ALIGNED(arg1)) {
2245       /* zap any misaligned addresses. */
2246       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2247          to fail.   Here, we catch them all. */
2248       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2249    }
2250 
2251    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2252       /* zap any misaligned offsets. */
2253       /* SuSV3 says: The off argument is constrained to be aligned and
2254          sized according to the value returned by sysconf() when
2255          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2256       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2257    }
2258 
2259    /* Figure out what kind of allocation constraints there are
2260       (fixed/hint/any), and ask aspacem what we should do. */
2261    mreq.start = arg1;
2262    mreq.len   = arg2;
2263    if (arg4 & VKI_MAP_FIXED) {
2264       mreq.rkind = MFixed;
2265    } else
2266 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
2267    if (arg4 & VKI_MAP_ALIGN) {
2268       mreq.rkind = MAlign;
2269       if (mreq.start == 0) {
2270          mreq.start = VKI_PAGE_SIZE;
2271       }
2272       /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
2273       arg4 &= ~VKI_MAP_ALIGN;
2274    } else
2275 #endif
2276    if (arg1 != 0) {
2277       mreq.rkind = MHint;
2278    } else {
2279       mreq.rkind = MAny;
2280    }
2281 
2282    /* Enquire ... */
2283    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2284    if (!mreq_ok) {
2285       /* Our request was bounced, so we'd better fail. */
2286       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2287    }
2288 
2289 #  if defined(VKI_MAP_32BIT)
2290    /* MAP_32BIT is royally unportable, so if the client asks for it, try our
2291       best to make it work (but without complexifying aspacemgr).
2292       If the user requested MAP_32BIT, the mmap-ed space must be in the
2293       first 2GB of the address space. So, return ENOMEM if aspacemgr
2294       advisory is above the first 2GB. If MAP_FIXED is also requested,
2295       MAP_32BIT has to be ignored.
2296       Assumption about aspacemgr behaviour: aspacemgr scans the address space
2297       from low addresses to find a free segment. No special effort is done
2298       to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
2299       fail once the program has already allocated significant memory. */
2300    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
2301       if (advised + arg2 >= 0x80000000)
2302          return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2303    }
2304 #  endif
2305 
2306    /* Otherwise we're OK (so far).  Install aspacem's choice of
2307       address, and let the mmap go through.  */
2308    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2309                                     arg4 | VKI_MAP_FIXED,
2310                                     arg5, arg6);
2311 
2312 #  if defined(VKI_MAP_32BIT)
2313    /* No recovery trial if the advisory was not accepted. */
2314    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
2315        && sr_isError(sres)) {
2316       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2317    }
2318 #  endif
2319 
2320    /* A refinement: it may be that the kernel refused aspacem's choice
2321       of address.  If we were originally asked for a hinted mapping,
2322       there is still a last chance: try again at any address.
2323       Hence: */
2324    if (mreq.rkind == MHint && sr_isError(sres)) {
2325       mreq.start = 0;
2326       mreq.len   = arg2;
2327       mreq.rkind = MAny;
2328       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2329       if (!mreq_ok) {
2330          /* Our request was bounced, so we'd better fail. */
2331          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2332       }
2333       /* and try again with the kernel */
2334       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2335                                        arg4 | VKI_MAP_FIXED,
2336                                        arg5, arg6);
2337    }
2338 
2339    /* Yet another refinement : sometimes valgrind chooses an address
2340       which is not acceptable by the kernel. This at least happens
2341       when mmap-ing huge pages, using the flag MAP_HUGETLB.
2342       valgrind aspacem does not know about huge pages, and modifying
2343       it to handle huge pages is not straightforward (e.g. need
2344       to understand special file system mount options).
2345       So, let's just redo an mmap, without giving any constraint to
2346       the kernel. If that succeeds, check with aspacem that the returned
2347       address is acceptable.
2348       This will give a similar effect as if the user would have
2349       hinted that address.
2350       The aspacem state will be correctly updated afterwards.
2351       We however cannot do this last refinement when the user asked
2352       for a fixed mapping, as the user asked a specific address. */
2353    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
2354       advised = 0;
2355       /* try mmap with NULL address and without VKI_MAP_FIXED
2356          to let the kernel decide. */
2357       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2358                                        arg4,
2359                                        arg5, arg6);
2360       if (!sr_isError(sres)) {
2361          /* The kernel is supposed to know what it is doing, but let's
2362             do a last sanity check anyway, as if the chosen address had
2363             been initially hinted by the client. The whole point of this
2364             last try was to allow mmap of huge pages to succeed without
2365             making aspacem understand them, on the other hand the kernel
2366             does not know about valgrind reservations, so this mapping
2367             can end up in free space and reservations. */
2368          mreq.start = (Addr)sr_Res(sres);
2369          mreq.len   = arg2;
2370          mreq.rkind = MHint;
2371          advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2372          vg_assert(mreq_ok && advised == mreq.start);
2373       }
2374    }
2375 
2376    if (!sr_isError(sres)) {
2377       ULong di_handle;
2378       /* Notify aspacem. */
2379       notify_core_of_mmap(
2380          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2381          arg2, /* length */
2382          arg3, /* prot */
2383          arg4, /* the original flags value */
2384          arg5, /* fd */
2385          arg6  /* offset */
2386       );
2387       /* Load symbols? */
2388       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2389                                        False/*allow_SkFileV*/, (Int)arg5 );
2390       /* Notify the tool. */
2391       notify_tool_of_mmap(
2392          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2393          arg2, /* length */
2394          arg3, /* prot */
2395          di_handle /* so the tool can refer to the read debuginfo later,
2396                       if it wants. */
2397       );
2398    }
2399 
2400    /* Stay sane */
2401    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2402       vg_assert(sr_Res(sres) == arg1);
2403 
2404    return sres;
2405 }
2406 
2407 
2408 /* ---------------------------------------------------------------------
2409    The Main Entertainment ... syscall wrappers
2410    ------------------------------------------------------------------ */
2411 
2412 /* Note: the PRE() and POST() wrappers are for the actual functions
2413    implementing the system calls in the OS kernel.  These mostly have
2414    names like sys_write();  a few have names like old_mmap().  See the
2415    comment for ML_(syscall_table)[] for important info about the __NR_foo
2416    constants and their relationship to the sys_foo() functions.
2417 
2418    Some notes about names used for syscalls and args:
2419    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2420      ambiguity.
2421 
2422    - For error messages, we generally use a somewhat generic name
2423      for the syscall (eg. "write" rather than "sys_write").  This should be
2424      good enough for the average user to understand what is happening,
2425      without confusing them with names like "sys_write".
2426 
2427    - Also, for error messages the arg names are mostly taken from the man
2428      pages (even though many of those man pages are really for glibc
2429      functions of the same name), rather than from the OS kernel source,
2430      for the same reason -- a user presented with a "bogus foo(bar)" arg
2431      will most likely look at the "foo" man page to see which is the "bar"
2432      arg.
2433 
2434    Note that we use our own vki_* types.  The one exception is in
2435    PRE_REG_READn calls, where pointer types haven't been changed, because
2436    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2437    be visible.
2438 
2439    XXX: some of these are arch-specific, and should be factored out.
2440 */
2441 
2442 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2443 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2444 
PRE(sys_exit)2445 PRE(sys_exit)
2446 {
2447    ThreadState* tst;
2448    /* simple; just make this thread exit */
2449    PRINT("exit( %ld )", SARG1);
2450    PRE_REG_READ1(void, "exit", int, status);
2451    tst = VG_(get_ThreadState)(tid);
2452    /* Set the thread's status to be exiting, then claim that the
2453       syscall succeeded. */
2454    tst->exitreason = VgSrc_ExitThread;
2455    tst->os_state.exitcode = ARG1;
2456    SET_STATUS_Success(0);
2457 }
2458 
PRE(sys_ni_syscall)2459 PRE(sys_ni_syscall)
2460 {
2461    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2462       VG_SYSNUM_STRING(SYSNO));
2463    PRE_REG_READ0(long, "ni_syscall");
2464    SET_STATUS_Failure( VKI_ENOSYS );
2465 }
2466 
PRE(sys_iopl)2467 PRE(sys_iopl)
2468 {
2469    PRINT("sys_iopl ( %lu )", ARG1);
2470    PRE_REG_READ1(long, "iopl", unsigned long, level);
2471 }
2472 
PRE(sys_fsync)2473 PRE(sys_fsync)
2474 {
2475    *flags |= SfMayBlock;
2476    PRINT("sys_fsync ( %lu )", ARG1);
2477    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2478 }
2479 
PRE(sys_fdatasync)2480 PRE(sys_fdatasync)
2481 {
2482    *flags |= SfMayBlock;
2483    PRINT("sys_fdatasync ( %lu )", ARG1);
2484    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2485 }
2486 
PRE(sys_msync)2487 PRE(sys_msync)
2488 {
2489    *flags |= SfMayBlock;
2490    PRINT("sys_msync ( %#lx, %lu, %#lx )", ARG1, ARG2, ARG3);
2491    PRE_REG_READ3(long, "msync",
2492                  unsigned long, start, vki_size_t, length, int, flags);
2493    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2494 }
2495 
2496 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2497 // versions of LiS (Linux Streams).  They are not part of the kernel.
2498 // Therefore, we have to provide this type ourself, rather than getting it
2499 // from the kernel sources.
2500 struct vki_pmsg_strbuf {
2501    int     maxlen;         /* no. of bytes in buffer */
2502    int     len;            /* no. of bytes returned */
2503    vki_caddr_t buf;        /* pointer to data */
2504 };
PRE(sys_getpmsg)2505 PRE(sys_getpmsg)
2506 {
2507    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2508    struct vki_pmsg_strbuf *ctrl;
2509    struct vki_pmsg_strbuf *data;
2510    *flags |= SfMayBlock;
2511    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
2512          ARG4, ARG5);
2513    PRE_REG_READ5(int, "getpmsg",
2514                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2515                  int *, bandp, int *, flagsp);
2516    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2517    data = (struct vki_pmsg_strbuf *)ARG3;
2518    if (ctrl && ctrl->maxlen > 0)
2519       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2520    if (data && data->maxlen > 0)
2521       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2522    if (ARG4)
2523       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2524    if (ARG5)
2525       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2526 }
POST(sys_getpmsg)2527 POST(sys_getpmsg)
2528 {
2529    struct vki_pmsg_strbuf *ctrl;
2530    struct vki_pmsg_strbuf *data;
2531    vg_assert(SUCCESS);
2532    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2533    data = (struct vki_pmsg_strbuf *)ARG3;
2534    if (RES == 0 && ctrl && ctrl->len > 0) {
2535       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2536    }
2537    if (RES == 0 && data && data->len > 0) {
2538       POST_MEM_WRITE( (Addr)data->buf, data->len);
2539    }
2540 }
2541 
PRE(sys_putpmsg)2542 PRE(sys_putpmsg)
2543 {
2544    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2545    struct vki_pmsg_strbuf *ctrl;
2546    struct vki_pmsg_strbuf *data;
2547    *flags |= SfMayBlock;
2548    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", SARG1, ARG2, ARG3,
2549          SARG4, SARG5);
2550    PRE_REG_READ5(int, "putpmsg",
2551                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2552                  int, band, int, flags);
2553    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2554    data = (struct vki_pmsg_strbuf *)ARG3;
2555    if (ctrl && ctrl->len > 0)
2556       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2557    if (data && data->len > 0)
2558       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2559 }
2560 
PRE(sys_getitimer)2561 PRE(sys_getitimer)
2562 {
2563    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2564    PRINT("sys_getitimer ( %ld, %#lx )", SARG1, ARG2);
2565    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2566 
2567    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2568    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2569 }
2570 
POST(sys_getitimer)2571 POST(sys_getitimer)
2572 {
2573    if (ARG2 != (Addr)NULL) {
2574       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2575       POST_timeval_WRITE( &(value->it_interval) );
2576       POST_timeval_WRITE( &(value->it_value) );
2577    }
2578 }
2579 
PRE(sys_setitimer)2580 PRE(sys_setitimer)
2581 {
2582    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", SARG1, ARG2, ARG3);
2583    PRE_REG_READ3(long, "setitimer",
2584                  int, which,
2585                  struct itimerval *, value, struct itimerval *, ovalue);
2586    if (ARG2 != (Addr)NULL) {
2587       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2588       PRE_timeval_READ( "setitimer(&value->it_interval)",
2589                          &(value->it_interval));
2590       PRE_timeval_READ( "setitimer(&value->it_value)",
2591                          &(value->it_value));
2592    }
2593    if (ARG3 != (Addr)NULL) {
2594       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2595       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2596                          &(ovalue->it_interval));
2597       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2598                          &(ovalue->it_value));
2599    }
2600 }
2601 
POST(sys_setitimer)2602 POST(sys_setitimer)
2603 {
2604    if (ARG3 != (Addr)NULL) {
2605       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2606       POST_timeval_WRITE( &(ovalue->it_interval) );
2607       POST_timeval_WRITE( &(ovalue->it_value) );
2608    }
2609 }
2610 
PRE(sys_chroot)2611 PRE(sys_chroot)
2612 {
2613    PRINT("sys_chroot ( %#lx )", ARG1);
2614    PRE_REG_READ1(long, "chroot", const char *, path);
2615    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2616 }
2617 
PRE(sys_madvise)2618 PRE(sys_madvise)
2619 {
2620    *flags |= SfMayBlock;
2621    PRINT("sys_madvise ( %#lx, %lu, %ld )", ARG1, ARG2, SARG3);
2622    PRE_REG_READ3(long, "madvise",
2623                  unsigned long, start, vki_size_t, length, int, advice);
2624 }
2625 
2626 #if HAVE_MREMAP
PRE(sys_mremap)2627 PRE(sys_mremap)
2628 {
2629    // Nb: this is different to the glibc version described in the man pages,
2630    // which lacks the fifth 'new_address' argument.
2631    if (ARG4 & VKI_MREMAP_FIXED) {
2632       PRINT("sys_mremap ( %#lx, %lu, %lu, %#lx, %#lx )",
2633             ARG1, ARG2, ARG3, ARG4, ARG5);
2634       PRE_REG_READ5(unsigned long, "mremap",
2635                     unsigned long, old_addr, unsigned long, old_size,
2636                     unsigned long, new_size, unsigned long, flags,
2637                     unsigned long, new_addr);
2638    } else {
2639       PRINT("sys_mremap ( %#lx, %lu, %lu, 0x%lx )",
2640             ARG1, ARG2, ARG3, ARG4);
2641       PRE_REG_READ4(unsigned long, "mremap",
2642                     unsigned long, old_addr, unsigned long, old_size,
2643                     unsigned long, new_size, unsigned long, flags);
2644    }
2645    SET_STATUS_from_SysRes(
2646       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2647    );
2648 }
2649 #endif /* HAVE_MREMAP */
2650 
PRE(sys_nice)2651 PRE(sys_nice)
2652 {
2653    PRINT("sys_nice ( %ld )", SARG1);
2654    PRE_REG_READ1(long, "nice", int, inc);
2655 }
2656 
PRE(sys_mlock)2657 PRE(sys_mlock)
2658 {
2659    *flags |= SfMayBlock;
2660    PRINT("sys_mlock ( %#lx, %lu )", ARG1, ARG2);
2661    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2662 }
2663 
PRE(sys_munlock)2664 PRE(sys_munlock)
2665 {
2666    *flags |= SfMayBlock;
2667    PRINT("sys_munlock ( %#lx, %lu )", ARG1, ARG2);
2668    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2669 }
2670 
PRE(sys_mlockall)2671 PRE(sys_mlockall)
2672 {
2673    *flags |= SfMayBlock;
2674    PRINT("sys_mlockall ( %lx )", ARG1);
2675    PRE_REG_READ1(long, "mlockall", int, flags);
2676 }
2677 
PRE(sys_setpriority)2678 PRE(sys_setpriority)
2679 {
2680    PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
2681    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2682 }
2683 
PRE(sys_getpriority)2684 PRE(sys_getpriority)
2685 {
2686    PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
2687    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2688 }
2689 
PRE(sys_pwrite64)2690 PRE(sys_pwrite64)
2691 {
2692    *flags |= SfMayBlock;
2693 #if VG_WORDSIZE == 4
2694    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %lld )",
2695          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2696    PRE_REG_READ5(ssize_t, "pwrite64",
2697                  unsigned int, fd, const char *, buf, vki_size_t, count,
2698                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2699 #elif VG_WORDSIZE == 8
2700    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
2701          ARG1, ARG2, ARG3, SARG4);
2702    PRE_REG_READ4(ssize_t, "pwrite64",
2703                  unsigned int, fd, const char *, buf, vki_size_t, count,
2704                  Word, offset);
2705 #else
2706 #  error Unexpected word size
2707 #endif
2708    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2709 }
2710 
PRE(sys_sync)2711 PRE(sys_sync)
2712 {
2713    *flags |= SfMayBlock;
2714    PRINT("sys_sync ( )");
2715    PRE_REG_READ0(long, "sync");
2716 }
2717 
PRE(sys_fstatfs)2718 PRE(sys_fstatfs)
2719 {
2720    FUSE_COMPATIBLE_MAY_BLOCK();
2721    PRINT("sys_fstatfs ( %lu, %#lx )", ARG1, ARG2);
2722    PRE_REG_READ2(long, "fstatfs",
2723                  unsigned int, fd, struct statfs *, buf);
2724    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2725 }
2726 
POST(sys_fstatfs)2727 POST(sys_fstatfs)
2728 {
2729    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2730 }
2731 
PRE(sys_fstatfs64)2732 PRE(sys_fstatfs64)
2733 {
2734    FUSE_COMPATIBLE_MAY_BLOCK();
2735    PRINT("sys_fstatfs64 ( %lu, %lu, %#lx )", ARG1, ARG2, ARG3);
2736    PRE_REG_READ3(long, "fstatfs64",
2737                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2738    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2739 }
POST(sys_fstatfs64)2740 POST(sys_fstatfs64)
2741 {
2742    POST_MEM_WRITE( ARG3, ARG2 );
2743 }
2744 
PRE(sys_getsid)2745 PRE(sys_getsid)
2746 {
2747    PRINT("sys_getsid ( %ld )", SARG1);
2748    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2749 }
2750 
PRE(sys_pread64)2751 PRE(sys_pread64)
2752 {
2753    *flags |= SfMayBlock;
2754 #if VG_WORDSIZE == 4
2755    PRINT("sys_pread64 ( %lu, %#lx, %lu, %lld )",
2756          ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2757    PRE_REG_READ5(ssize_t, "pread64",
2758                  unsigned int, fd, char *, buf, vki_size_t, count,
2759                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2760 #elif VG_WORDSIZE == 8
2761    PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
2762          ARG1, ARG2, ARG3, SARG4);
2763    PRE_REG_READ4(ssize_t, "pread64",
2764                  unsigned int, fd, char *, buf, vki_size_t, count,
2765                  Word, offset);
2766 #else
2767 #  error Unexpected word size
2768 #endif
2769    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2770 }
POST(sys_pread64)2771 POST(sys_pread64)
2772 {
2773    vg_assert(SUCCESS);
2774    if (RES > 0) {
2775       POST_MEM_WRITE( ARG2, RES );
2776    }
2777 }
2778 
PRE(sys_mknod)2779 PRE(sys_mknod)
2780 {
2781    FUSE_COMPATIBLE_MAY_BLOCK();
2782    PRINT("sys_mknod ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3 );
2783    PRE_REG_READ3(long, "mknod",
2784                  const char *, pathname, int, mode, unsigned, dev);
2785    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2786 }
2787 
PRE(sys_flock)2788 PRE(sys_flock)
2789 {
2790    *flags |= SfMayBlock;
2791    PRINT("sys_flock ( %lu, %lu )", ARG1, ARG2 );
2792    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2793 }
2794 
2795 // Pre_read a char** argument.
ML_(pre_argv_envp)2796 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
2797 {
2798    while (True) {
2799       Addr a_deref;
2800       Addr* a_p = (Addr*)a;
2801       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2802       a_deref = *a_p;
2803       if (0 == a_deref)
2804          break;
2805       PRE_MEM_RASCIIZ( s2, a_deref );
2806       a += sizeof(char*);
2807    }
2808 }
2809 
i_am_the_only_thread(void)2810 static Bool i_am_the_only_thread ( void )
2811 {
2812    Int c = VG_(count_living_threads)();
2813    vg_assert(c >= 1); /* stay sane */
2814    return c == 1;
2815 }
2816 
2817 /* Wait until all other threads disappear. */
VG_(reap_threads)2818 void VG_(reap_threads)(ThreadId self)
2819 {
2820    while (!i_am_the_only_thread()) {
2821       /* Let other thread(s) run */
2822       VG_(vg_yield)();
2823       VG_(poll_signals)(self);
2824    }
2825    vg_assert(i_am_the_only_thread());
2826 }
2827 
2828 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2829 // but it seems to work nonetheless...
PRE(sys_execve)2830 PRE(sys_execve)
2831 {
2832    HChar*       path = NULL;       /* path to executable */
2833    HChar**      envp = NULL;
2834    HChar**      argv = NULL;
2835    HChar**      arg2copy;
2836    HChar*       launcher_basename = NULL;
2837    ThreadState* tst;
2838    Int          i, j, tot_args;
2839    SysRes       res;
2840    Bool         setuid_allowed, trace_this_child;
2841 
2842    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (HChar*)ARG1, ARG2, ARG3);
2843    PRE_REG_READ3(vki_off_t, "execve",
2844                  char *, filename, char **, argv, char **, envp);
2845    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2846    if (ARG2 != 0) {
2847       /* At least the terminating NULL must be addressable. */
2848       if (!ML_(safe_to_deref)((HChar **) ARG2, sizeof(HChar *))) {
2849          SET_STATUS_Failure(VKI_EFAULT);
2850          return;
2851       }
2852       ML_(pre_argv_envp)( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2853    }
2854    if (ARG3 != 0) {
2855       /* At least the terminating NULL must be addressable. */
2856       if (!ML_(safe_to_deref)((HChar **) ARG3, sizeof(HChar *))) {
2857          SET_STATUS_Failure(VKI_EFAULT);
2858          return;
2859       }
2860       ML_(pre_argv_envp)( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2861    }
2862 
2863    vg_assert(VG_(is_valid_tid)(tid));
2864    tst = VG_(get_ThreadState)(tid);
2865 
2866    /* Erk.  If the exec fails, then the following will have made a
2867       mess of things which makes it hard for us to continue.  The
2868       right thing to do is piece everything together again in
2869       POST(execve), but that's close to impossible.  Instead, we make
2870       an effort to check that the execve will work before actually
2871       doing it. */
2872 
2873    /* Check that the name at least begins in client-accessible storage. */
2874    if (ARG1 == 0 /* obviously bogus */
2875        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2876       SET_STATUS_Failure( VKI_EFAULT );
2877       return;
2878    }
2879 
2880    // debug-only printing
2881    if (0) {
2882       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2883       if (ARG2) {
2884          VG_(printf)("ARG2 = ");
2885          Int q;
2886          HChar** vec = (HChar**)ARG2;
2887          for (q = 0; vec[q]; q++)
2888             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2889          VG_(printf)("\n");
2890       } else {
2891          VG_(printf)("ARG2 = null\n");
2892       }
2893    }
2894 
2895    // Decide whether or not we want to follow along
2896    { // Make 'child_argv' be a pointer to the child's arg vector
2897      // (skipping the exe name)
2898      const HChar** child_argv = (const HChar**)ARG2;
2899      if (child_argv && child_argv[0] == NULL)
2900         child_argv = NULL;
2901      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2902    }
2903 
2904    // Do the important checks:  it is a file, is executable, permissions are
2905    // ok, etc.  We allow setuid executables to run only in the case when
2906    // we are not simulating them, that is, they to be run natively.
2907    setuid_allowed = trace_this_child  ? False  : True;
2908    res = VG_(pre_exec_check)((const HChar *)ARG1, NULL, setuid_allowed);
2909    if (sr_isError(res)) {
2910       SET_STATUS_Failure( sr_Err(res) );
2911       return;
2912    }
2913 
2914    /* If we're tracing the child, and the launcher name looks bogus
2915       (possibly because launcher.c couldn't figure it out, see
2916       comments therein) then we have no option but to fail. */
2917    if (trace_this_child
2918        && (VG_(name_of_launcher) == NULL
2919            || VG_(name_of_launcher)[0] != '/')) {
2920       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2921       return;
2922    }
2923 
2924    /* After this point, we can't recover if the execve fails. */
2925    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG1);
2926 
2927 
2928    // Terminate gdbserver if it is active.
2929    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2930       // If the child will not be traced, we need to terminate gdbserver
2931       // to cleanup the gdbserver resources (e.g. the FIFO files).
2932       // If child will be traced, we also terminate gdbserver: the new
2933       // Valgrind will start a fresh gdbserver after exec.
2934       VG_(gdbserver) (0);
2935    }
2936 
2937    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2938       this. (Really, nuke them all, since the new process will make
2939       its own new thread.) */
2940    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2941    VG_(reap_threads)(tid);
2942 
2943    // Set up the child's exe path.
2944    //
2945    if (trace_this_child) {
2946 
2947       // We want to exec the launcher.  Get its pre-remembered path.
2948       path = VG_(name_of_launcher);
2949       // VG_(name_of_launcher) should have been acquired by m_main at
2950       // startup.
2951       vg_assert(path);
2952 
2953       launcher_basename = VG_(strrchr)(path, '/');
2954       if (launcher_basename == NULL || launcher_basename[1] == 0) {
2955          launcher_basename = path;  // hmm, tres dubious
2956       } else {
2957          launcher_basename++;
2958       }
2959 
2960    } else {
2961       path = (HChar*)ARG1;
2962    }
2963 
2964    // Set up the child's environment.
2965    //
2966    // Remove the valgrind-specific stuff from the environment so the
2967    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2968    // This is done unconditionally, since if we are tracing the child,
2969    // the child valgrind will set up the appropriate client environment.
2970    // Nb: we make a copy of the environment before trying to mangle it
2971    // as it might be in read-only memory (this was bug #101881).
2972    //
2973    // Then, if tracing the child, set VALGRIND_LIB for it.
2974    //
2975    if (ARG3 == 0) {
2976       envp = NULL;
2977    } else {
2978       envp = VG_(env_clone)( (HChar**)ARG3 );
2979       if (envp == NULL) goto hosed;
2980       VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
2981    }
2982 
2983    if (trace_this_child) {
2984       // Set VALGRIND_LIB in ARG3 (the environment)
2985       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2986    }
2987 
2988    // Set up the child's args.  If not tracing it, they are
2989    // simply ARG2.  Otherwise, they are
2990    //
2991    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2992    //
2993    // except that the first VG_(args_for_valgrind_noexecpass) args
2994    // are omitted.
2995    //
2996    if (!trace_this_child) {
2997       argv = (HChar**)ARG2;
2998    } else {
2999       vg_assert( VG_(args_for_valgrind) );
3000       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
3001       vg_assert( VG_(args_for_valgrind_noexecpass)
3002                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
3003       /* how many args in total will there be? */
3004       // launcher basename
3005       tot_args = 1;
3006       // V's args
3007       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
3008       tot_args -= VG_(args_for_valgrind_noexecpass);
3009       // name of client exe
3010       tot_args++;
3011       // args for client exe, skipping [0]
3012       arg2copy = (HChar**)ARG2;
3013       if (arg2copy && arg2copy[0]) {
3014          for (i = 1; arg2copy[i]; i++)
3015             tot_args++;
3016       }
3017       // allocate
3018       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
3019                           (tot_args+1) * sizeof(HChar*) );
3020       // copy
3021       j = 0;
3022       argv[j++] = launcher_basename;
3023       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
3024          if (i < VG_(args_for_valgrind_noexecpass))
3025             continue;
3026          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
3027       }
3028       argv[j++] = (HChar*)ARG1;
3029       if (arg2copy && arg2copy[0])
3030          for (i = 1; arg2copy[i]; i++)
3031             argv[j++] = arg2copy[i];
3032       argv[j++] = NULL;
3033       // check
3034       vg_assert(j == tot_args+1);
3035    }
3036 
3037    /*
3038       Set the signal state up for exec.
3039 
3040       We need to set the real signal state to make sure the exec'd
3041       process gets SIG_IGN properly.
3042 
3043       Also set our real sigmask to match the client's sigmask so that
3044       the exec'd child will get the right mask.  First we need to
3045       clear out any pending signals so they they don't get delivered,
3046       which would confuse things.
3047 
3048       XXX This is a bug - the signals should remain pending, and be
3049       delivered to the new process after exec.  There's also a
3050       race-condition, since if someone delivers us a signal between
3051       the sigprocmask and the execve, we'll still get the signal. Oh
3052       well.
3053    */
3054    {
3055       vki_sigset_t allsigs;
3056       vki_siginfo_t info;
3057 
3058       /* What this loop does: it queries SCSS (the signal state that
3059          the client _thinks_ the kernel is in) by calling
3060          VG_(do_sys_sigaction), and modifies the real kernel signal
3061          state accordingly. */
3062       for (i = 1; i < VG_(max_signal); i++) {
3063          vki_sigaction_fromK_t sa_f;
3064          vki_sigaction_toK_t   sa_t;
3065          VG_(do_sys_sigaction)(i, NULL, &sa_f);
3066          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
3067          if (sa_t.ksa_handler == VKI_SIG_IGN)
3068             VG_(sigaction)(i, &sa_t, NULL);
3069          else {
3070             sa_t.ksa_handler = VKI_SIG_DFL;
3071             VG_(sigaction)(i, &sa_t, NULL);
3072          }
3073       }
3074 
3075       VG_(sigfillset)(&allsigs);
3076       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
3077          ;
3078 
3079       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
3080    }
3081 
3082    if (0) {
3083       HChar **cpp;
3084       VG_(printf)("exec: %s\n", path);
3085       for (cpp = argv; cpp && *cpp; cpp++)
3086          VG_(printf)("argv: %s\n", *cpp);
3087       if (0)
3088          for (cpp = envp; cpp && *cpp; cpp++)
3089             VG_(printf)("env: %s\n", *cpp);
3090    }
3091 
3092    SET_STATUS_from_SysRes(
3093       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
3094    );
3095 
3096    /* If we got here, then the execve failed.  We've already made way
3097       too much of a mess to continue, so we have to abort. */
3098   hosed:
3099    vg_assert(FAILURE);
3100    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %lu\n",
3101                 ARG1, (HChar*)ARG1, ARG2, ARG3, ERR);
3102    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
3103                             "execve() failing, so I'm dying.\n");
3104    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
3105                             "or work out how to recover.\n");
3106    VG_(exit)(101);
3107 }
3108 
PRE(sys_access)3109 PRE(sys_access)
3110 {
3111    PRINT("sys_access ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
3112    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
3113    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
3114 }
3115 
PRE(sys_alarm)3116 PRE(sys_alarm)
3117 {
3118    PRINT("sys_alarm ( %lu )", ARG1);
3119    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
3120 }
3121 
PRE(sys_brk)3122 PRE(sys_brk)
3123 {
3124    Addr brk_limit = VG_(brk_limit);
3125    Addr brk_new;
3126 
3127    /* libc   says: int   brk(void *end_data_segment);
3128       kernel says: void* brk(void* end_data_segment);  (more or less)
3129 
3130       libc returns 0 on success, and -1 (and sets errno) on failure.
3131       Nb: if you ask to shrink the dataseg end below what it
3132       currently is, that always succeeds, even if the dataseg end
3133       doesn't actually change (eg. brk(0)).  Unless it seg faults.
3134 
3135       Kernel returns the new dataseg end.  If the brk() failed, this
3136       will be unchanged from the old one.  That's why calling (kernel)
3137       brk(0) gives the current dataseg end (libc brk() just returns
3138       zero in that case).
3139 
3140       Both will seg fault if you shrink it back into a text segment.
3141    */
3142    PRINT("sys_brk ( %#lx )", ARG1);
3143    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
3144 
3145    brk_new = do_brk(ARG1, tid);
3146    SET_STATUS_Success( brk_new );
3147 
3148    if (brk_new == ARG1) {
3149       /* brk() succeeded */
3150       if (brk_new < brk_limit) {
3151          /* successfully shrunk the data segment. */
3152          VG_TRACK( die_mem_brk, (Addr)ARG1,
3153 		   brk_limit-ARG1 );
3154       } else
3155       if (brk_new > brk_limit) {
3156          /* successfully grew the data segment */
3157          VG_TRACK( new_mem_brk, brk_limit,
3158                    ARG1-brk_limit, tid );
3159       }
3160    } else {
3161       /* brk() failed */
3162       vg_assert(brk_limit == brk_new);
3163    }
3164 }
3165 
PRE(sys_chdir)3166 PRE(sys_chdir)
3167 {
3168    FUSE_COMPATIBLE_MAY_BLOCK();
3169    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3170    PRE_REG_READ1(long, "chdir", const char *, path);
3171    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
3172 }
3173 
PRE(sys_chmod)3174 PRE(sys_chmod)
3175 {
3176    FUSE_COMPATIBLE_MAY_BLOCK();
3177    PRINT("sys_chmod ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
3178    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
3179    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
3180 }
3181 
PRE(sys_chown)3182 PRE(sys_chown)
3183 {
3184    FUSE_COMPATIBLE_MAY_BLOCK();
3185    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3186    PRE_REG_READ3(long, "chown",
3187                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3188    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
3189 }
3190 
PRE(sys_lchown)3191 PRE(sys_lchown)
3192 {
3193    FUSE_COMPATIBLE_MAY_BLOCK();
3194    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
3195    PRE_REG_READ3(long, "lchown",
3196                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3197    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
3198 }
3199 
PRE(sys_close)3200 PRE(sys_close)
3201 {
3202    FUSE_COMPATIBLE_MAY_BLOCK();
3203    PRINT("sys_close ( %lu )", ARG1);
3204    PRE_REG_READ1(long, "close", unsigned int, fd);
3205 
3206    /* Detect and negate attempts by the client to close Valgrind's log fd */
3207    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
3208         /* If doing -d style logging (which is to fd=2), don't
3209            allow that to be closed either. */
3210         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3211       SET_STATUS_Failure( VKI_EBADF );
3212 }
3213 
POST(sys_close)3214 POST(sys_close)
3215 {
3216    if (VG_(clo_track_fds)) ML_(record_fd_close)(ARG1);
3217 }
3218 
PRE(sys_dup)3219 PRE(sys_dup)
3220 {
3221    PRINT("sys_dup ( %lu )", ARG1);
3222    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3223 }
3224 
POST(sys_dup)3225 POST(sys_dup)
3226 {
3227    vg_assert(SUCCESS);
3228    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3229       VG_(close)(RES);
3230       SET_STATUS_Failure( VKI_EMFILE );
3231    } else {
3232       if (VG_(clo_track_fds))
3233          ML_(record_fd_open_named)(tid, RES);
3234    }
3235 }
3236 
PRE(sys_dup2)3237 PRE(sys_dup2)
3238 {
3239    PRINT("sys_dup2 ( %lu, %lu )", ARG1, ARG2);
3240    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3241    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3242       SET_STATUS_Failure( VKI_EBADF );
3243 }
3244 
POST(sys_dup2)3245 POST(sys_dup2)
3246 {
3247    vg_assert(SUCCESS);
3248    if (VG_(clo_track_fds))
3249       ML_(record_fd_open_named)(tid, RES);
3250 }
3251 
PRE(sys_fchdir)3252 PRE(sys_fchdir)
3253 {
3254    FUSE_COMPATIBLE_MAY_BLOCK();
3255    PRINT("sys_fchdir ( %lu )", ARG1);
3256    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3257 }
3258 
PRE(sys_fchown)3259 PRE(sys_fchown)
3260 {
3261    FUSE_COMPATIBLE_MAY_BLOCK();
3262    PRINT("sys_fchown ( %lu, %lu, %lu )", ARG1, ARG2, ARG3);
3263    PRE_REG_READ3(long, "fchown",
3264                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3265 }
3266 
PRE(sys_fchmod)3267 PRE(sys_fchmod)
3268 {
3269    FUSE_COMPATIBLE_MAY_BLOCK();
3270    PRINT("sys_fchmod ( %lu, %lu )", ARG1, ARG2);
3271    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3272 }
3273 
PRE(sys_newfstat)3274 PRE(sys_newfstat)
3275 {
3276    FUSE_COMPATIBLE_MAY_BLOCK();
3277    PRINT("sys_newfstat ( %lu, %#lx )", ARG1, ARG2);
3278    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3279    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3280 }
3281 
POST(sys_newfstat)3282 POST(sys_newfstat)
3283 {
3284    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3285 }
3286 
3287 #if !defined(VGO_solaris) && !defined(VGP_arm64_linux)
3288 static vki_sigset_t fork_saved_mask;
3289 
3290 // In Linux, the sys_fork() function varies across architectures, but we
3291 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)3292 PRE(sys_fork)
3293 {
3294    Bool is_child;
3295    Int child_pid;
3296    vki_sigset_t mask;
3297 
3298    PRINT("sys_fork ( )");
3299    PRE_REG_READ0(long, "fork");
3300 
3301    /* Block all signals during fork, so that we can fix things up in
3302       the child without being interrupted. */
3303    VG_(sigfillset)(&mask);
3304    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3305 
3306    VG_(do_atfork_pre)(tid);
3307 
3308    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3309 
3310    if (!SUCCESS) return;
3311 
3312 #if defined(VGO_linux)
3313    // RES is 0 for child, non-0 (the child's PID) for parent.
3314    is_child = ( RES == 0 ? True : False );
3315    child_pid = ( is_child ? -1 : RES );
3316 #elif defined(VGO_darwin)
3317    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3318    is_child = RESHI;
3319    child_pid = RES;
3320 #else
3321 #  error Unknown OS
3322 #endif
3323 
3324    if (is_child) {
3325       VG_(do_atfork_child)(tid);
3326 
3327       /* restore signal mask */
3328       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3329    } else {
3330       VG_(do_atfork_parent)(tid);
3331 
3332       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3333 
3334       /* restore signal mask */
3335       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3336    }
3337 }
3338 #endif // !defined(VGO_solaris) && !defined(VGP_arm64_linux)
3339 
PRE(sys_ftruncate)3340 PRE(sys_ftruncate)
3341 {
3342    *flags |= SfMayBlock;
3343    PRINT("sys_ftruncate ( %lu, %lu )", ARG1, ARG2);
3344    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3345 }
3346 
PRE(sys_truncate)3347 PRE(sys_truncate)
3348 {
3349    *flags |= SfMayBlock;
3350    PRINT("sys_truncate ( %#lx(%s), %lu )", ARG1, (HChar*)ARG1, ARG2);
3351    PRE_REG_READ2(long, "truncate",
3352                  const char *, path, unsigned long, length);
3353    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3354 }
3355 
PRE(sys_ftruncate64)3356 PRE(sys_ftruncate64)
3357 {
3358    *flags |= SfMayBlock;
3359 #if VG_WORDSIZE == 4
3360    PRINT("sys_ftruncate64 ( %lu, %llu )", ARG1, MERGE64(ARG2,ARG3));
3361    PRE_REG_READ3(long, "ftruncate64",
3362                  unsigned int, fd,
3363                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3364 #else
3365    PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
3366    PRE_REG_READ2(long, "ftruncate64",
3367                  unsigned int,fd, UWord,length);
3368 #endif
3369 }
3370 
PRE(sys_truncate64)3371 PRE(sys_truncate64)
3372 {
3373    *flags |= SfMayBlock;
3374 #if VG_WORDSIZE == 4
3375    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3376    PRE_REG_READ3(long, "truncate64",
3377                  const char *, path,
3378                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3379 #else
3380    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3381    PRE_REG_READ2(long, "truncate64",
3382                  const char *,path, UWord,length);
3383 #endif
3384    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3385 }
3386 
PRE(sys_getdents)3387 PRE(sys_getdents)
3388 {
3389    *flags |= SfMayBlock;
3390    PRINT("sys_getdents ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
3391    PRE_REG_READ3(long, "getdents",
3392                  unsigned int, fd, struct vki_dirent *, dirp,
3393                  unsigned int, count);
3394    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3395 }
3396 
POST(sys_getdents)3397 POST(sys_getdents)
3398 {
3399    vg_assert(SUCCESS);
3400    if (RES > 0)
3401       POST_MEM_WRITE( ARG2, RES );
3402 }
3403 
PRE(sys_getdents64)3404 PRE(sys_getdents64)
3405 {
3406    *flags |= SfMayBlock;
3407    PRINT("sys_getdents64 ( %lu, %#lx, %lu )",ARG1, ARG2, ARG3);
3408    PRE_REG_READ3(long, "getdents64",
3409                  unsigned int, fd, struct vki_dirent64 *, dirp,
3410                  unsigned int, count);
3411    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3412 }
3413 
POST(sys_getdents64)3414 POST(sys_getdents64)
3415 {
3416    vg_assert(SUCCESS);
3417    if (RES > 0)
3418       POST_MEM_WRITE( ARG2, RES );
3419 }
3420 
PRE(sys_getgroups)3421 PRE(sys_getgroups)
3422 {
3423    PRINT("sys_getgroups ( %ld, %#lx )", SARG1, ARG2);
3424    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3425    if (ARG1 > 0)
3426       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3427 }
3428 
POST(sys_getgroups)3429 POST(sys_getgroups)
3430 {
3431    vg_assert(SUCCESS);
3432    if (ARG1 > 0 && RES > 0)
3433       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3434 }
3435 
PRE(sys_getcwd)3436 PRE(sys_getcwd)
3437 {
3438    // Comment from linux/fs/dcache.c:
3439    //   NOTE! The user-level library version returns a character pointer.
3440    //   The kernel system call just returns the length of the buffer filled
3441    //   (which includes the ending '\0' character), or a negative error
3442    //   value.
3443    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3444    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3445    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3446    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3447 }
3448 
POST(sys_getcwd)3449 POST(sys_getcwd)
3450 {
3451    vg_assert(SUCCESS);
3452    if (RES != (Addr)NULL)
3453       POST_MEM_WRITE( ARG1, RES );
3454 }
3455 
PRE(sys_geteuid)3456 PRE(sys_geteuid)
3457 {
3458    PRINT("sys_geteuid ( )");
3459    PRE_REG_READ0(long, "geteuid");
3460 }
3461 
PRE(sys_getegid)3462 PRE(sys_getegid)
3463 {
3464    PRINT("sys_getegid ( )");
3465    PRE_REG_READ0(long, "getegid");
3466 }
3467 
PRE(sys_getgid)3468 PRE(sys_getgid)
3469 {
3470    PRINT("sys_getgid ( )");
3471    PRE_REG_READ0(long, "getgid");
3472 }
3473 
PRE(sys_getpid)3474 PRE(sys_getpid)
3475 {
3476    PRINT("sys_getpid ()");
3477    PRE_REG_READ0(long, "getpid");
3478 }
3479 
PRE(sys_getpgid)3480 PRE(sys_getpgid)
3481 {
3482    PRINT("sys_getpgid ( %ld )", SARG1);
3483    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3484 }
3485 
PRE(sys_getpgrp)3486 PRE(sys_getpgrp)
3487 {
3488    PRINT("sys_getpgrp ()");
3489    PRE_REG_READ0(long, "getpgrp");
3490 }
3491 
PRE(sys_getppid)3492 PRE(sys_getppid)
3493 {
3494    PRINT("sys_getppid ()");
3495    PRE_REG_READ0(long, "getppid");
3496 }
3497 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3498 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3499 {
3500    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3501 
3502 #ifdef _RLIMIT_POSIX_FLAG
3503    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3504    // Unset it here to make the switch case below work correctly.
3505    a1 &= ~_RLIMIT_POSIX_FLAG;
3506 #endif
3507 
3508    switch (a1) {
3509    case VKI_RLIMIT_NOFILE:
3510       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3511       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3512       break;
3513 
3514    case VKI_RLIMIT_DATA:
3515       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3516       break;
3517 
3518    case VKI_RLIMIT_STACK:
3519       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3520       break;
3521    }
3522 }
3523 
PRE(sys_old_getrlimit)3524 PRE(sys_old_getrlimit)
3525 {
3526    PRINT("sys_old_getrlimit ( %lu, %#lx )", ARG1, ARG2);
3527    PRE_REG_READ2(long, "old_getrlimit",
3528                  unsigned int, resource, struct rlimit *, rlim);
3529    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3530 }
3531 
POST(sys_old_getrlimit)3532 POST(sys_old_getrlimit)
3533 {
3534    common_post_getrlimit(tid, ARG1, ARG2);
3535 }
3536 
PRE(sys_getrlimit)3537 PRE(sys_getrlimit)
3538 {
3539    PRINT("sys_getrlimit ( %lu, %#lx )", ARG1, ARG2);
3540    PRE_REG_READ2(long, "getrlimit",
3541                  unsigned int, resource, struct rlimit *, rlim);
3542    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3543 }
3544 
POST(sys_getrlimit)3545 POST(sys_getrlimit)
3546 {
3547    common_post_getrlimit(tid, ARG1, ARG2);
3548 }
3549 
PRE(sys_getrusage)3550 PRE(sys_getrusage)
3551 {
3552    PRINT("sys_getrusage ( %ld, %#lx )", SARG1, ARG2);
3553    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3554    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3555 }
3556 
POST(sys_getrusage)3557 POST(sys_getrusage)
3558 {
3559    vg_assert(SUCCESS);
3560    if (RES == 0)
3561       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3562 }
3563 
PRE(sys_gettimeofday)3564 PRE(sys_gettimeofday)
3565 {
3566    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3567    PRE_REG_READ2(long, "gettimeofday",
3568                  struct timeval *, tv, struct timezone *, tz);
3569    // GrP fixme does darwin write to *tz anymore?
3570    if (ARG1 != 0)
3571       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3572    if (ARG2 != 0)
3573       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3574 }
3575 
POST(sys_gettimeofday)3576 POST(sys_gettimeofday)
3577 {
3578    vg_assert(SUCCESS);
3579    if (RES == 0) {
3580       if (ARG1 != 0)
3581          POST_timeval_WRITE( ARG1 );
3582       if (ARG2 != 0)
3583 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3584    }
3585 }
3586 
PRE(sys_settimeofday)3587 PRE(sys_settimeofday)
3588 {
3589    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3590    PRE_REG_READ2(long, "settimeofday",
3591                  struct timeval *, tv, struct timezone *, tz);
3592    if (ARG1 != 0)
3593       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3594    if (ARG2 != 0) {
3595       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3596       /* maybe should warn if tz->tz_dsttime is non-zero? */
3597    }
3598 }
3599 
PRE(sys_getuid)3600 PRE(sys_getuid)
3601 {
3602    PRINT("sys_getuid ( )");
3603    PRE_REG_READ0(long, "getuid");
3604 }
3605 
ML_(PRE_unknown_ioctl)3606 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3607 {
3608    /* We don't have any specific information on it, so
3609       try to do something reasonable based on direction and
3610       size bits.  The encoding scheme is described in
3611       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3612 
3613       According to Simon Hausmann, _IOC_READ means the kernel
3614       writes a value to the ioctl value passed from the user
3615       space and the other way around with _IOC_WRITE. */
3616 
3617 #if defined(VGO_solaris)
3618    /* Majority of Solaris ioctl requests does not honour direction hints. */
3619    UInt dir  = _VKI_IOC_NONE;
3620 #else
3621    UInt dir  = _VKI_IOC_DIR(request);
3622 #endif
3623    UInt size = _VKI_IOC_SIZE(request);
3624 
3625    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
3626       /*
3627        * Be very lax about ioctl handling; the only
3628        * assumption is that the size is correct. Doesn't
3629        * require the full buffer to be initialized when
3630        * writing.  Without this, using some device
3631        * drivers with a large number of strange ioctl
3632        * commands becomes very tiresome.
3633        */
3634    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3635       static UWord unknown_ioctl[10];
3636       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
3637 
3638       if (moans > 0 && !VG_(clo_xml)) {
3639          /* Check if have not already moaned for this request. */
3640          UInt i;
3641          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
3642             if (unknown_ioctl[i] == request)
3643                break;
3644             if (unknown_ioctl[i] == 0) {
3645                unknown_ioctl[i] = request;
3646                moans--;
3647                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3648                          " with no size/direction hints.\n", request);
3649                VG_(umsg)("   This could cause spurious value errors to appear.\n");
3650                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3651                          "guidance on writing a proper wrapper.\n" );
3652                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3653                return;
3654             }
3655          }
3656       }
3657    } else {
3658       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3659       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3660       if ((dir & _VKI_IOC_WRITE) && size > 0)
3661          PRE_MEM_READ( "ioctl(generic)", arg, size);
3662       if ((dir & _VKI_IOC_READ) && size > 0)
3663          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3664    }
3665 }
3666 
ML_(POST_unknown_ioctl)3667 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3668 {
3669    /* We don't have any specific information on it, so
3670       try to do something reasonable based on direction and
3671       size bits.  The encoding scheme is described in
3672       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3673 
3674       According to Simon Hausmann, _IOC_READ means the kernel
3675       writes a value to the ioctl value passed from the user
3676       space and the other way around with _IOC_WRITE. */
3677 
3678    UInt dir  = _VKI_IOC_DIR(request);
3679    UInt size = _VKI_IOC_SIZE(request);
3680    if (size > 0 && (dir & _VKI_IOC_READ)
3681        && res == 0
3682        && arg != (Addr)NULL) {
3683       POST_MEM_WRITE(arg, size);
3684    }
3685 }
3686 
3687 /*
3688    If we're sending a SIGKILL to one of our own threads, then simulate
3689    it rather than really sending the signal, so that the target thread
3690    gets a chance to clean up.  Returns True if we did the killing (or
3691    no killing is necessary), and False if the caller should use the
3692    normal kill syscall.
3693 
3694    "pid" is any pid argument which can be passed to kill; group kills
3695    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3696    they'll most likely hit all the threads and we won't need to worry
3697    about cleanup.  In truth, we can't fully emulate these multicast
3698    kills.
3699 
3700    "tgid" is a thread group id.  If it is not -1, then the target
3701    thread must be in that thread group.
3702  */
ML_(do_sigkill)3703 Bool ML_(do_sigkill)(Int pid, Int tgid)
3704 {
3705    ThreadState *tst;
3706    ThreadId tid;
3707 
3708    if (pid <= 0)
3709       return False;
3710 
3711    tid = VG_(lwpid_to_vgtid)(pid);
3712    if (tid == VG_INVALID_THREADID)
3713       return False;		/* none of our threads */
3714 
3715    tst = VG_(get_ThreadState)(tid);
3716    if (tst == NULL || tst->status == VgTs_Empty)
3717       return False;		/* hm, shouldn't happen */
3718 
3719    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3720       return False;		/* not the right thread group */
3721 
3722    /* Check to see that the target isn't already exiting. */
3723    if (!VG_(is_exiting)(tid)) {
3724       if (VG_(clo_trace_signals))
3725 	 VG_(message)(Vg_DebugMsg,
3726                       "Thread %u being killed with SIGKILL\n",
3727                       tst->tid);
3728 
3729       tst->exitreason = VgSrc_FatalSig;
3730       tst->os_state.fatalsig = VKI_SIGKILL;
3731 
3732       if (!VG_(is_running_thread)(tid))
3733 	 VG_(get_thread_out_of_syscall)(tid);
3734    }
3735 
3736    return True;
3737 }
3738 
PRE(sys_kill)3739 PRE(sys_kill)
3740 {
3741    PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
3742    PRE_REG_READ2(long, "kill", int, pid, int, signal);
3743    if (!ML_(client_signal_OK)(ARG2)) {
3744       SET_STATUS_Failure( VKI_EINVAL );
3745       return;
3746    }
3747 
3748    /* If we're sending SIGKILL, check to see if the target is one of
3749       our threads and handle it specially. */
3750    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3751       SET_STATUS_Success(0);
3752    else
3753       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3754          affecting how posix-compliant the call is.  I guess it is
3755          harmless to pass the 3rd arg on other platforms; hence pass
3756          it on all. */
3757       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3758 
3759    if (VG_(clo_trace_signals))
3760       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3761 		   SARG2, SARG1);
3762 
3763    /* This kill might have given us a pending signal.  Ask for a check once
3764       the syscall is done. */
3765    *flags |= SfPollAfter;
3766 }
3767 
PRE(sys_link)3768 PRE(sys_link)
3769 {
3770    *flags |= SfMayBlock;
3771    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3772    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3773    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3774    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3775 }
3776 
PRE(sys_newlstat)3777 PRE(sys_newlstat)
3778 {
3779    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3780    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3781    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3782    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3783 }
3784 
POST(sys_newlstat)3785 POST(sys_newlstat)
3786 {
3787    vg_assert(SUCCESS);
3788    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3789 }
3790 
PRE(sys_mkdir)3791 PRE(sys_mkdir)
3792 {
3793    *flags |= SfMayBlock;
3794    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
3795    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3796    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3797 }
3798 
PRE(sys_mprotect)3799 PRE(sys_mprotect)
3800 {
3801    PRINT("sys_mprotect ( %#lx, %lu, %lu )", ARG1, ARG2, ARG3);
3802    PRE_REG_READ3(long, "mprotect",
3803                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3804 
3805    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3806       SET_STATUS_Failure( VKI_ENOMEM );
3807    }
3808 #if defined(VKI_PROT_GROWSDOWN)
3809    else
3810    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3811       /* Deal with mprotects on growable stack areas.
3812 
3813          The critical files to understand all this are mm/mprotect.c
3814          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3815          glibc.
3816 
3817          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3818          round the start/end address of mprotect to the start/end of
3819          the underlying vma and glibc uses that as an easy way to
3820          change the protection of the stack by calling mprotect on the
3821          last page of the stack with PROT_GROWSDOWN set.
3822 
3823          The sanity check provided by the kernel is that the vma must
3824          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3825       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3826       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3827       NSegment const *rseg;
3828 
3829       vg_assert(aseg);
3830 
3831       if (grows == VKI_PROT_GROWSDOWN) {
3832          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
3833          if (rseg
3834              && rseg->kind == SkResvn
3835              && rseg->smode == SmUpper
3836              && rseg->end+1 == aseg->start) {
3837             Addr end = ARG1 + ARG2;
3838             ARG1 = aseg->start;
3839             ARG2 = end - aseg->start;
3840             ARG3 &= ~VKI_PROT_GROWSDOWN;
3841          } else {
3842             SET_STATUS_Failure( VKI_EINVAL );
3843          }
3844       } else if (grows == VKI_PROT_GROWSUP) {
3845          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
3846          if (rseg
3847              && rseg->kind == SkResvn
3848              && rseg->smode == SmLower
3849              && aseg->end+1 == rseg->start) {
3850             ARG2 = aseg->end - ARG1 + 1;
3851             ARG3 &= ~VKI_PROT_GROWSUP;
3852          } else {
3853             SET_STATUS_Failure( VKI_EINVAL );
3854          }
3855       } else {
3856          /* both GROWSUP and GROWSDOWN */
3857          SET_STATUS_Failure( VKI_EINVAL );
3858       }
3859    }
3860 #endif   // defined(VKI_PROT_GROWSDOWN)
3861 }
3862 
POST(sys_mprotect)3863 POST(sys_mprotect)
3864 {
3865    Addr a    = ARG1;
3866    SizeT len = ARG2;
3867    Int  prot = ARG3;
3868 
3869    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3870 }
3871 
PRE(sys_munmap)3872 PRE(sys_munmap)
3873 {
3874    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3875    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3876    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3877 
3878    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3879       SET_STATUS_Failure( VKI_EINVAL );
3880 }
3881 
POST(sys_munmap)3882 POST(sys_munmap)
3883 {
3884    Addr  a   = ARG1;
3885    SizeT len = ARG2;
3886 
3887    ML_(notify_core_and_tool_of_munmap)( a, len );
3888 }
3889 
PRE(sys_mincore)3890 PRE(sys_mincore)
3891 {
3892    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3893    PRE_REG_READ3(long, "mincore",
3894                  unsigned long, start, vki_size_t, length,
3895                  unsigned char *, vec);
3896    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3897 }
POST(sys_mincore)3898 POST(sys_mincore)
3899 {
3900    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3901 }
3902 
PRE(sys_nanosleep)3903 PRE(sys_nanosleep)
3904 {
3905    *flags |= SfMayBlock|SfPostOnFail;
3906    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3907    PRE_REG_READ2(long, "nanosleep",
3908                  struct timespec *, req, struct timespec *, rem);
3909    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3910    if (ARG2 != 0)
3911       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3912 }
3913 
POST(sys_nanosleep)3914 POST(sys_nanosleep)
3915 {
3916    vg_assert(SUCCESS || FAILURE);
3917    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3918       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3919 }
3920 
3921 #if defined(VGO_linux) || defined(VGO_solaris)
3922 /* Handles the case where the open is of /proc/self/auxv or
3923    /proc/<pid>/auxv, and just gives out a copy of the fd for the
3924    fake file we cooked up at startup (in m_main).  Also, seeks the
3925    cloned fd back to the start.
3926    Returns True if auxv open was handled (status is set). */
ML_(handle_auxv_open)3927 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
3928                            int flags)
3929 {
3930    HChar  name[30];   // large enough
3931 
3932    if (!ML_(safe_to_deref)((const void *) filename, 1))
3933       return False;
3934 
3935    /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
3936    VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
3937    if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
3938       return False;
3939 
3940    /* Allow to open the file only for reading. */
3941    if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
3942       SET_STATUS_Failure(VKI_EACCES);
3943       return True;
3944    }
3945 
3946 #  if defined(VGO_solaris)
3947    VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
3948    SysRes sres = VG_(open)(name, flags, 0);
3949    SET_STATUS_from_SysRes(sres);
3950 #  else
3951    SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
3952    SET_STATUS_from_SysRes(sres);
3953    if (!sr_isError(sres)) {
3954       OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
3955       if (off < 0)
3956          SET_STATUS_Failure(VKI_EMFILE);
3957    }
3958 #  endif
3959 
3960    return True;
3961 }
3962 #endif // defined(VGO_linux) || defined(VGO_solaris)
3963 
PRE(sys_open)3964 PRE(sys_open)
3965 {
3966    if (ARG2 & VKI_O_CREAT) {
3967       // 3-arg version
3968       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1, (HChar*)ARG1, SARG2, SARG3);
3969       PRE_REG_READ3(long, "open",
3970                     const char *, filename, int, flags, int, mode);
3971    } else {
3972       // 2-arg version
3973       PRINT("sys_open ( %#lx(%s), %ld )",ARG1, (HChar*)ARG1, SARG2);
3974       PRE_REG_READ2(long, "open",
3975                     const char *, filename, int, flags);
3976    }
3977    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3978 
3979 #if defined(VGO_linux)
3980    /* Handle the case where the open is of /proc/self/cmdline or
3981       /proc/<pid>/cmdline, and just give it a copy of the fd for the
3982       fake file we cooked up at startup (in m_main).  Also, seek the
3983       cloned fd back to the start. */
3984    {
3985       HChar  name[30];   // large enough
3986       HChar* arg1s = (HChar*) ARG1;
3987       SysRes sres;
3988 
3989       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3990       if (ML_(safe_to_deref)( arg1s, 1 )
3991           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))) {
3992          sres = VG_(dup)( VG_(cl_cmdline_fd) );
3993          SET_STATUS_from_SysRes( sres );
3994          if (!sr_isError(sres)) {
3995             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3996             if (off < 0)
3997                SET_STATUS_Failure( VKI_EMFILE );
3998          }
3999          return;
4000       }
4001    }
4002 
4003    /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv. */
4004    if (ML_(handle_auxv_open)(status, (const HChar *)ARG1, ARG2))
4005       return;
4006 #endif // defined(VGO_linux)
4007 
4008    /* Otherwise handle normally */
4009    *flags |= SfMayBlock;
4010 }
4011 
POST(sys_open)4012 POST(sys_open)
4013 {
4014    vg_assert(SUCCESS);
4015    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
4016       VG_(close)(RES);
4017       SET_STATUS_Failure( VKI_EMFILE );
4018    } else {
4019       if (VG_(clo_track_fds))
4020          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
4021    }
4022 }
4023 
PRE(sys_read)4024 PRE(sys_read)
4025 {
4026    *flags |= SfMayBlock;
4027    PRINT("sys_read ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4028    PRE_REG_READ3(ssize_t, "read",
4029                  unsigned int, fd, char *, buf, vki_size_t, count);
4030 
4031    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
4032       SET_STATUS_Failure( VKI_EBADF );
4033    else
4034       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
4035 }
4036 
POST(sys_read)4037 POST(sys_read)
4038 {
4039    vg_assert(SUCCESS);
4040    POST_MEM_WRITE( ARG2, RES );
4041 }
4042 
PRE(sys_write)4043 PRE(sys_write)
4044 {
4045    Bool ok;
4046    *flags |= SfMayBlock;
4047    PRINT("sys_write ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4048    PRE_REG_READ3(ssize_t, "write",
4049                  unsigned int, fd, const char *, buf, vki_size_t, count);
4050    /* check to see if it is allowed.  If not, try for an exemption from
4051       --sim-hints=enable-outer (used for self hosting). */
4052    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
4053    if (!ok && ARG1 == 2/*stderr*/
4054            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
4055       ok = True;
4056 #if defined(VGO_solaris)
4057    if (!ok && VG_(vfork_fildes_addr) != NULL
4058        && *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
4059       ok = True;
4060 #endif
4061    if (!ok)
4062       SET_STATUS_Failure( VKI_EBADF );
4063    else
4064       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
4065 }
4066 
PRE(sys_creat)4067 PRE(sys_creat)
4068 {
4069    *flags |= SfMayBlock;
4070    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1, (HChar*)ARG1, SARG2);
4071    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
4072    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
4073 }
4074 
POST(sys_creat)4075 POST(sys_creat)
4076 {
4077    vg_assert(SUCCESS);
4078    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
4079       VG_(close)(RES);
4080       SET_STATUS_Failure( VKI_EMFILE );
4081    } else {
4082       if (VG_(clo_track_fds))
4083          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)ARG1);
4084    }
4085 }
4086 
PRE(sys_poll)4087 PRE(sys_poll)
4088 {
4089    /* struct pollfd {
4090         int fd;           -- file descriptor
4091         short events;     -- requested events
4092         short revents;    -- returned events
4093       };
4094       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
4095    */
4096    UInt i;
4097    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
4098    *flags |= SfMayBlock;
4099    PRINT("sys_poll ( %#lx, %lu, %ld )\n", ARG1, ARG2, SARG3);
4100    PRE_REG_READ3(long, "poll",
4101                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
4102 
4103    for (i = 0; i < ARG2; i++) {
4104       PRE_MEM_READ( "poll(ufds.fd)",
4105                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
4106       PRE_MEM_READ( "poll(ufds.events)",
4107                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
4108       PRE_MEM_WRITE( "poll(ufds.revents)",
4109                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4110    }
4111 }
4112 
POST(sys_poll)4113 POST(sys_poll)
4114 {
4115    if (RES >= 0) {
4116       UInt i;
4117       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
4118       for (i = 0; i < ARG2; i++)
4119 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4120    }
4121 }
4122 
PRE(sys_readlink)4123 PRE(sys_readlink)
4124 {
4125    FUSE_COMPATIBLE_MAY_BLOCK();
4126    Word saved = SYSNO;
4127 
4128    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
4129    PRE_REG_READ3(long, "readlink",
4130                  const char *, path, char *, buf, int, bufsiz);
4131    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
4132    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
4133 
4134 
4135    {
4136 #if defined(VGO_linux) || defined(VGO_solaris)
4137 #if defined(VGO_linux)
4138 #define PID_EXEPATH  "/proc/%d/exe"
4139 #define SELF_EXEPATH "/proc/self/exe"
4140 #define SELF_EXEFD   "/proc/self/fd/%d"
4141 #elif defined(VGO_solaris)
4142 #define PID_EXEPATH  "/proc/%d/path/a.out"
4143 #define SELF_EXEPATH "/proc/self/path/a.out"
4144 #define SELF_EXEFD   "/proc/self/path/%d"
4145 #endif
4146       /*
4147        * Handle the case where readlink is looking at /proc/self/exe or
4148        * /proc/<pid>/exe, or equivalent on Solaris.
4149        */
4150       HChar  name[30];   // large enough
4151       HChar* arg1s = (HChar*) ARG1;
4152       VG_(sprintf)(name, PID_EXEPATH, VG_(getpid)());
4153       if (ML_(safe_to_deref)(arg1s, 1)
4154           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, SELF_EXEPATH))) {
4155          VG_(sprintf)(name, SELF_EXEFD, VG_(cl_exec_fd));
4156          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
4157                                                          ARG2, ARG3));
4158       } else
4159 #endif
4160       {
4161          /* Normal case */
4162          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
4163       }
4164    }
4165 
4166    if (SUCCESS && RES > 0)
4167       POST_MEM_WRITE( ARG2, RES );
4168 }
4169 
PRE(sys_readv)4170 PRE(sys_readv)
4171 {
4172    Int i;
4173    struct vki_iovec * vec;
4174    *flags |= SfMayBlock;
4175    PRINT("sys_readv ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4176    PRE_REG_READ3(ssize_t, "readv",
4177                  unsigned long, fd, const struct iovec *, vector,
4178                  unsigned long, count);
4179    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
4180       SET_STATUS_Failure( VKI_EBADF );
4181    } else {
4182       if ((Int)ARG3 >= 0)
4183          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
4184 
4185       if (ARG2 != 0) {
4186          /* ToDo: don't do any of the following if the vector is invalid */
4187          vec = (struct vki_iovec *)ARG2;
4188          for (i = 0; i < (Int)ARG3; i++)
4189             PRE_MEM_WRITE( "readv(vector[...])",
4190                            (Addr)vec[i].iov_base, vec[i].iov_len );
4191       }
4192    }
4193 }
4194 
POST(sys_readv)4195 POST(sys_readv)
4196 {
4197    vg_assert(SUCCESS);
4198    if (RES > 0) {
4199       Int i;
4200       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
4201       Int remains = RES;
4202 
4203       /* RES holds the number of bytes read. */
4204       for (i = 0; i < (Int)ARG3; i++) {
4205 	 Int nReadThisBuf = vec[i].iov_len;
4206 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
4207 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
4208 	 remains -= nReadThisBuf;
4209 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
4210       }
4211    }
4212 }
4213 
PRE(sys_rename)4214 PRE(sys_rename)
4215 {
4216    FUSE_COMPATIBLE_MAY_BLOCK();
4217    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4218    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
4219    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
4220    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
4221 }
4222 
PRE(sys_rmdir)4223 PRE(sys_rmdir)
4224 {
4225    *flags |= SfMayBlock;
4226    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
4227    PRE_REG_READ1(long, "rmdir", const char *, pathname);
4228    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
4229 }
4230 
PRE(sys_select)4231 PRE(sys_select)
4232 {
4233    *flags |= SfMayBlock;
4234    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", SARG1, ARG2, ARG3,
4235          ARG4, ARG5);
4236    PRE_REG_READ5(long, "select",
4237                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
4238                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
4239    // XXX: this possibly understates how much memory is read.
4240    if (ARG2 != 0)
4241       PRE_MEM_READ( "select(readfds)",
4242 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
4243    if (ARG3 != 0)
4244       PRE_MEM_READ( "select(writefds)",
4245 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
4246    if (ARG4 != 0)
4247       PRE_MEM_READ( "select(exceptfds)",
4248 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
4249    if (ARG5 != 0)
4250       PRE_timeval_READ( "select(timeout)", ARG5 );
4251 }
4252 
PRE(sys_setgid)4253 PRE(sys_setgid)
4254 {
4255    PRINT("sys_setgid ( %lu )", ARG1);
4256    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4257 }
4258 
PRE(sys_setsid)4259 PRE(sys_setsid)
4260 {
4261    PRINT("sys_setsid ( )");
4262    PRE_REG_READ0(long, "setsid");
4263 }
4264 
PRE(sys_setgroups)4265 PRE(sys_setgroups)
4266 {
4267    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
4268    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4269    if (ARG1 > 0)
4270       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4271 }
4272 
PRE(sys_setpgid)4273 PRE(sys_setpgid)
4274 {
4275    PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
4276    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4277 }
4278 
PRE(sys_setregid)4279 PRE(sys_setregid)
4280 {
4281    PRINT("sys_setregid ( %lu, %lu )", ARG1, ARG2);
4282    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4283 }
4284 
PRE(sys_setreuid)4285 PRE(sys_setreuid)
4286 {
4287    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
4288    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4289 }
4290 
PRE(sys_setrlimit)4291 PRE(sys_setrlimit)
4292 {
4293    UWord arg1 = ARG1;
4294    PRINT("sys_setrlimit ( %lu, %#lx )", ARG1, ARG2);
4295    PRE_REG_READ2(long, "setrlimit",
4296                  unsigned int, resource, struct rlimit *, rlim);
4297    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4298 
4299 #ifdef _RLIMIT_POSIX_FLAG
4300    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4301    // Unset it here to make the if statements below work correctly.
4302    arg1 &= ~_RLIMIT_POSIX_FLAG;
4303 #endif
4304 
4305    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
4306                                     VKI_PROT_READ)) {
4307       SET_STATUS_Failure( VKI_EFAULT );
4308    }
4309    else if (((struct vki_rlimit *)ARG2)->rlim_cur
4310             > ((struct vki_rlimit *)ARG2)->rlim_max) {
4311       SET_STATUS_Failure( VKI_EINVAL );
4312    }
4313    else if (arg1 == VKI_RLIMIT_NOFILE) {
4314       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4315           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4316          SET_STATUS_Failure( VKI_EPERM );
4317       }
4318       else {
4319          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
4320          SET_STATUS_Success( 0 );
4321       }
4322    }
4323    else if (arg1 == VKI_RLIMIT_DATA) {
4324       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
4325           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
4326          SET_STATUS_Failure( VKI_EPERM );
4327       }
4328       else {
4329          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
4330          SET_STATUS_Success( 0 );
4331       }
4332    }
4333    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4334       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
4335           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
4336          SET_STATUS_Failure( VKI_EPERM );
4337       }
4338       else {
4339          /* Change the value of client_stack_szB to the rlim_cur value but
4340             only if it is smaller than the size of the allocated stack for the
4341             client.
4342             TODO: All platforms should set VG_(clstk_max_size) as part of their
4343                   setup_client_stack(). */
4344          if ((VG_(clstk_max_size) == 0)
4345              || (((struct vki_rlimit *) ARG2)->rlim_cur <= VG_(clstk_max_size)))
4346             VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
4347 
4348          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
4349          SET_STATUS_Success( 0 );
4350       }
4351    }
4352 }
4353 
PRE(sys_setuid)4354 PRE(sys_setuid)
4355 {
4356    PRINT("sys_setuid ( %lu )", ARG1);
4357    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4358 }
4359 
PRE(sys_newstat)4360 PRE(sys_newstat)
4361 {
4362    FUSE_COMPATIBLE_MAY_BLOCK();
4363    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4364    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4365    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4366    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4367 }
4368 
POST(sys_newstat)4369 POST(sys_newstat)
4370 {
4371    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4372 }
4373 
PRE(sys_statfs)4374 PRE(sys_statfs)
4375 {
4376    FUSE_COMPATIBLE_MAY_BLOCK();
4377    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
4378    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4379    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4380    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4381 }
POST(sys_statfs)4382 POST(sys_statfs)
4383 {
4384    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4385 }
4386 
PRE(sys_statfs64)4387 PRE(sys_statfs64)
4388 {
4389    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4390    PRE_REG_READ3(long, "statfs64",
4391                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
4392    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4393    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4394 }
POST(sys_statfs64)4395 POST(sys_statfs64)
4396 {
4397    POST_MEM_WRITE( ARG3, ARG2 );
4398 }
4399 
PRE(sys_symlink)4400 PRE(sys_symlink)
4401 {
4402    *flags |= SfMayBlock;
4403    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4404    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4405    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4406    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4407 }
4408 
PRE(sys_time)4409 PRE(sys_time)
4410 {
4411    /* time_t time(time_t *t); */
4412    PRINT("sys_time ( %#lx )",ARG1);
4413    PRE_REG_READ1(long, "time", int *, t);
4414    if (ARG1 != 0) {
4415       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4416    }
4417 }
4418 
POST(sys_time)4419 POST(sys_time)
4420 {
4421    if (ARG1 != 0) {
4422       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4423    }
4424 }
4425 
PRE(sys_times)4426 PRE(sys_times)
4427 {
4428    PRINT("sys_times ( %#lx )", ARG1);
4429    PRE_REG_READ1(long, "times", struct tms *, buf);
4430    if (ARG1 != 0) {
4431       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4432    }
4433 }
4434 
POST(sys_times)4435 POST(sys_times)
4436 {
4437    if (ARG1 != 0) {
4438       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4439    }
4440 }
4441 
PRE(sys_umask)4442 PRE(sys_umask)
4443 {
4444    PRINT("sys_umask ( %ld )", SARG1);
4445    PRE_REG_READ1(long, "umask", int, mask);
4446 }
4447 
PRE(sys_unlink)4448 PRE(sys_unlink)
4449 {
4450    *flags |= SfMayBlock;
4451    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4452    PRE_REG_READ1(long, "unlink", const char *, pathname);
4453    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4454 }
4455 
PRE(sys_newuname)4456 PRE(sys_newuname)
4457 {
4458    PRINT("sys_newuname ( %#lx )", ARG1);
4459    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4460    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4461 }
4462 
POST(sys_newuname)4463 POST(sys_newuname)
4464 {
4465    if (ARG1 != 0) {
4466       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4467    }
4468 }
4469 
PRE(sys_waitpid)4470 PRE(sys_waitpid)
4471 {
4472    *flags |= SfMayBlock;
4473    PRINT("sys_waitpid ( %ld, %#lx, %ld )", SARG1, ARG2, SARG3);
4474    PRE_REG_READ3(long, "waitpid",
4475                  vki_pid_t, pid, unsigned int *, status, int, options);
4476 
4477    if (ARG2 != (Addr)NULL)
4478       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4479 }
4480 
POST(sys_waitpid)4481 POST(sys_waitpid)
4482 {
4483    if (ARG2 != (Addr)NULL)
4484       POST_MEM_WRITE( ARG2, sizeof(int) );
4485 }
4486 
PRE(sys_wait4)4487 PRE(sys_wait4)
4488 {
4489    *flags |= SfMayBlock;
4490    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", SARG1, ARG2, SARG3, ARG4);
4491 
4492    PRE_REG_READ4(long, "wait4",
4493                  vki_pid_t, pid, unsigned int *, status, int, options,
4494                  struct rusage *, rusage);
4495    if (ARG2 != (Addr)NULL)
4496       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4497    if (ARG4 != (Addr)NULL)
4498       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4499 }
4500 
POST(sys_wait4)4501 POST(sys_wait4)
4502 {
4503    if (ARG2 != (Addr)NULL)
4504       POST_MEM_WRITE( ARG2, sizeof(int) );
4505    if (ARG4 != (Addr)NULL)
4506       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4507 }
4508 
PRE(sys_writev)4509 PRE(sys_writev)
4510 {
4511    Int i;
4512    struct vki_iovec * vec;
4513    *flags |= SfMayBlock;
4514    PRINT("sys_writev ( %lu, %#lx, %lu )", ARG1, ARG2, ARG3);
4515    PRE_REG_READ3(ssize_t, "writev",
4516                  unsigned long, fd, const struct iovec *, vector,
4517                  unsigned long, count);
4518    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4519       SET_STATUS_Failure( VKI_EBADF );
4520    } else {
4521       if ((Int)ARG3 >= 0)
4522          PRE_MEM_READ( "writev(vector)",
4523                        ARG2, ARG3 * sizeof(struct vki_iovec) );
4524       if (ARG2 != 0) {
4525          /* ToDo: don't do any of the following if the vector is invalid */
4526          vec = (struct vki_iovec *)ARG2;
4527          for (i = 0; i < (Int)ARG3; i++)
4528             PRE_MEM_READ( "writev(vector[...])",
4529                            (Addr)vec[i].iov_base, vec[i].iov_len );
4530       }
4531    }
4532 }
4533 
PRE(sys_utimes)4534 PRE(sys_utimes)
4535 {
4536    FUSE_COMPATIBLE_MAY_BLOCK();
4537    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4538    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4539    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4540    if (ARG2 != 0) {
4541       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4542       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4543    }
4544 }
4545 
PRE(sys_acct)4546 PRE(sys_acct)
4547 {
4548    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4549    PRE_REG_READ1(long, "acct", const char *, filename);
4550    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4551 }
4552 
PRE(sys_pause)4553 PRE(sys_pause)
4554 {
4555    *flags |= SfMayBlock;
4556    PRINT("sys_pause ( )");
4557    PRE_REG_READ0(long, "pause");
4558 }
4559 
PRE(sys_sigaltstack)4560 PRE(sys_sigaltstack)
4561 {
4562    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4563    PRE_REG_READ2(int, "sigaltstack",
4564                  const vki_stack_t *, ss, vki_stack_t *, oss);
4565    if (ARG1 != 0) {
4566       const vki_stack_t *ss = (vki_stack_t *)ARG1;
4567       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4568       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4569       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4570    }
4571    if (ARG2 != 0) {
4572       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4573    }
4574 
4575    /* Be safe. */
4576    if (ARG1 && !ML_(safe_to_deref((void*)ARG1, sizeof(vki_stack_t)))) {
4577       SET_STATUS_Failure(VKI_EFAULT);
4578       return;
4579    }
4580    if (ARG2 && !ML_(safe_to_deref((void*)ARG2, sizeof(vki_stack_t)))) {
4581       SET_STATUS_Failure(VKI_EFAULT);
4582       return;
4583    }
4584 
4585    SET_STATUS_from_SysRes(
4586       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4587                               (vki_stack_t*)ARG2)
4588    );
4589 }
POST(sys_sigaltstack)4590 POST(sys_sigaltstack)
4591 {
4592    vg_assert(SUCCESS);
4593    if (RES == 0 && ARG2 != 0)
4594       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4595 }
4596 
PRE(sys_sethostname)4597 PRE(sys_sethostname)
4598 {
4599    PRINT("sys_sethostname ( %#lx, %ld )", ARG1, SARG2);
4600    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
4601    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
4602 }
4603 
4604 #undef PRE
4605 #undef POST
4606 
4607 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
4608 
4609 /*--------------------------------------------------------------------*/
4610 /*--- end                                                          ---*/
4611 /*--------------------------------------------------------------------*/
4612