• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Wrappers for generic Unix system calls                       ---*/
4 /*---                                            syswrap-generic.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2012 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #if defined(VGO_linux) || defined(VGO_darwin)
33 
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_tool_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 
64 #include "priv_types_n_macros.h"
65 #include "priv_syswrap-generic.h"
66 
67 #include "config.h"
68 
69 
70 /* Returns True iff address range is something the client can
71    plausibly mess with: all of it is either already belongs to the
72    client or is free or a reservation. */
73 
ML_(valid_client_addr)74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
75                                    const Char *syscallname)
76 {
77    Bool ret;
78 
79    if (size == 0)
80       return True;
81 
82    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
83             (start,size,VKI_PROT_NONE);
84 
85    if (0)
86       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
87 		  syscallname, start, start+size-1, (Int)ret);
88 
89    if (!ret && syscallname != NULL) {
90       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
91                                "to modify addresses %#lx-%#lx\n",
92                                syscallname, start, start+size-1);
93       if (VG_(clo_verbosity) > 1) {
94          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
95       }
96    }
97 
98    return ret;
99 }
100 
101 
ML_(client_signal_OK)102 Bool ML_(client_signal_OK)(Int sigNo)
103 {
104    /* signal 0 is OK for kill */
105    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
106 
107    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
108 
109    return ret;
110 }
111 
112 
113 /* Handy small function to help stop wrappers from segfaulting when
114    presented with bogus client addresses.  Is not used for generating
115    user-visible errors. */
116 
ML_(safe_to_deref)117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
118 {
119    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
120 }
121 
122 
123 /* ---------------------------------------------------------------------
124    Doing mmap, mremap
125    ------------------------------------------------------------------ */
126 
127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128    munmap, mprotect (and mremap??) work at the page level.  So addresses
129    and lengths must be adjusted for this. */
130 
131 /* Mash around start and length so that the area exactly covers
132    an integral number of pages.  If we don't do that, memcheck's
133    idea of addressible memory diverges from that of the
134    kernel's, which causes the leak detector to crash. */
135 static
page_align_addr_and_len(Addr * a,SizeT * len)136 void page_align_addr_and_len( Addr* a, SizeT* len)
137 {
138    Addr ra;
139 
140    ra = VG_PGROUNDDN(*a);
141    *len = VG_PGROUNDUP(*a + *len) - ra;
142    *a = ra;
143 }
144 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146                                 UInt flags, Int fd, Off64T offset)
147 {
148    Bool d;
149 
150    /* 'a' is the return value from a real kernel mmap, hence: */
151    vg_assert(VG_IS_PAGE_ALIGNED(a));
152    /* whereas len is whatever the syscall supplied.  So: */
153    len = VG_PGROUNDUP(len);
154 
155    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
156 
157    if (d)
158       VG_(discard_translations)( (Addr64)a, (ULong)len,
159                                  "notify_core_of_mmap" );
160 }
161 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
163 {
164    Bool rr, ww, xx;
165 
166    /* 'a' is the return value from a real kernel mmap, hence: */
167    vg_assert(VG_IS_PAGE_ALIGNED(a));
168    /* whereas len is whatever the syscall supplied.  So: */
169    len = VG_PGROUNDUP(len);
170 
171    rr = toBool(prot & VKI_PROT_READ);
172    ww = toBool(prot & VKI_PROT_WRITE);
173    xx = toBool(prot & VKI_PROT_EXEC);
174 
175    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
176 }
177 
178 
179 /* When a client mmap has been successfully done, this function must
180    be called.  It notifies both aspacem and the tool of the new
181    mapping.
182 
183    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
184    it is called from is POST(sys_io_setup).  In particular,
185    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
186    client mmap.  But it doesn't call this function; instead it does the
187    relevant notifications itself.  Here, we just pass di_handle=0 to
188    notify_tool_of_mmap as we have no better information.  But really this
189    function should be done away with; problem is I don't understand what
190    POST(sys_io_setup) does or how it works.
191 
192    [However, this function is used lots for Darwin, because
193     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
194  */
195 void
ML_(notify_core_and_tool_of_mmap)196 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
197                                     UInt flags, Int fd, Off64T offset )
198 {
199    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
200    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
201    // Should it?  --njn
202    notify_core_of_mmap(a, len, prot, flags, fd, offset);
203    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
204 }
205 
206 void
ML_(notify_core_and_tool_of_munmap)207 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
208 {
209    Bool d;
210 
211    page_align_addr_and_len(&a, &len);
212    d = VG_(am_notify_munmap)(a, len);
213    VG_TRACK( die_mem_munmap, a, len );
214    VG_(di_notify_munmap)( a, len );
215    if (d)
216       VG_(discard_translations)( (Addr64)a, (ULong)len,
217                                  "ML_(notify_core_and_tool_of_munmap)" );
218 }
219 
220 void
ML_(notify_core_and_tool_of_mprotect)221 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
222 {
223    Bool rr = toBool(prot & VKI_PROT_READ);
224    Bool ww = toBool(prot & VKI_PROT_WRITE);
225    Bool xx = toBool(prot & VKI_PROT_EXEC);
226    Bool d;
227 
228    page_align_addr_and_len(&a, &len);
229    d = VG_(am_notify_mprotect)(a, len, prot);
230    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
231    VG_(di_notify_mprotect)( a, len, prot );
232    if (d)
233       VG_(discard_translations)( (Addr64)a, (ULong)len,
234                                  "ML_(notify_core_and_tool_of_mprotect)" );
235 }
236 
237 
238 
239 #if HAVE_MREMAP
240 /* Expand (or shrink) an existing mapping, potentially moving it at
241    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
242 */
243 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)244 SysRes do_mremap( Addr old_addr, SizeT old_len,
245                   Addr new_addr, SizeT new_len,
246                   UWord flags, ThreadId tid )
247 {
248 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
249 
250    Bool      ok, d;
251    NSegment const* old_seg;
252    Addr      advised;
253    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
254    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
255 
256    if (0)
257       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
258                   old_addr,old_len,new_addr,new_len,
259                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
260                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
261    if (0)
262       VG_(am_show_nsegments)(0, "do_remap: before");
263 
264    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
265       goto eINVAL;
266 
267    if (!VG_IS_PAGE_ALIGNED(old_addr))
268       goto eINVAL;
269 
270    old_len = VG_PGROUNDUP(old_len);
271    new_len = VG_PGROUNDUP(new_len);
272 
273    if (new_len == 0)
274       goto eINVAL;
275 
276    /* kernel doesn't reject this, but we do. */
277    if (old_len == 0)
278       goto eINVAL;
279 
280    /* reject wraparounds */
281    if (old_addr + old_len < old_addr)
282       goto eINVAL;
283    if (f_fixed == True && new_addr + new_len < new_len)
284       goto eINVAL;
285 
286    /* kernel rejects all fixed, no-move requests (which are
287       meaningless). */
288    if (f_fixed == True && f_maymove == False)
289       goto eINVAL;
290 
291    /* Stay away from non-client areas. */
292    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
293       goto eINVAL;
294 
295    /* In all remaining cases, if the old range does not fall within a
296       single segment, fail. */
297    old_seg = VG_(am_find_nsegment)( old_addr );
298    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
299       goto eINVAL;
300    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
301       goto eINVAL;
302 
303    vg_assert(old_len > 0);
304    vg_assert(new_len > 0);
305    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
306    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
307    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
308 
309    /* There are 3 remaining cases:
310 
311       * maymove == False
312 
313         new space has to be at old address, so:
314             - shrink    -> unmap end
315             - same size -> do nothing
316             - grow      -> if can grow in-place, do so, else fail
317 
318       * maymove == True, fixed == False
319 
320         new space can be anywhere, so:
321             - shrink    -> unmap end
322             - same size -> do nothing
323             - grow      -> if can grow in-place, do so, else
324                            move to anywhere large enough, else fail
325 
326       * maymove == True, fixed == True
327 
328         new space must be at new address, so:
329 
330             - if new address is not page aligned, fail
331             - if new address range overlaps old one, fail
332             - if new address range cannot be allocated, fail
333             - else move to new address range with new size
334             - else fail
335    */
336 
337    if (f_maymove == False) {
338       /* new space has to be at old address */
339       if (new_len < old_len)
340          goto shrink_in_place;
341       if (new_len > old_len)
342          goto grow_in_place_or_fail;
343       goto same_in_place;
344    }
345 
346    if (f_maymove == True && f_fixed == False) {
347       /* new space can be anywhere */
348       if (new_len < old_len)
349          goto shrink_in_place;
350       if (new_len > old_len)
351          goto grow_in_place_or_move_anywhere_or_fail;
352       goto same_in_place;
353    }
354 
355    if (f_maymove == True && f_fixed == True) {
356       /* new space can only be at the new address */
357       if (!VG_IS_PAGE_ALIGNED(new_addr))
358          goto eINVAL;
359       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
360          /* no overlap */
361       } else {
362          goto eINVAL;
363       }
364       if (new_addr == 0)
365          goto eINVAL;
366          /* VG_(am_get_advisory_client_simple) interprets zero to mean
367             non-fixed, which is not what we want */
368       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
369       if (!ok || advised != new_addr)
370          goto eNOMEM;
371       ok = VG_(am_relocate_nooverlap_client)
372               ( &d, old_addr, old_len, new_addr, new_len );
373       if (ok) {
374          VG_TRACK( copy_mem_remap, old_addr, new_addr,
375                                    MIN_SIZET(old_len,new_len) );
376          if (new_len > old_len)
377             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
378                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
379                       0/*di_handle*/ );
380          VG_TRACK(die_mem_munmap, old_addr, old_len);
381          if (d) {
382             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
383             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
384          }
385          return VG_(mk_SysRes_Success)( new_addr );
386       }
387       goto eNOMEM;
388    }
389 
390    /* end of the 3 cases */
391    /*NOTREACHED*/ vg_assert(0);
392 
393   grow_in_place_or_move_anywhere_or_fail:
394    {
395    /* try growing it in-place */
396    Addr   needA = old_addr + old_len;
397    SSizeT needL = new_len - old_len;
398 
399    vg_assert(needL > 0);
400    if (needA == 0)
401       goto eINVAL;
402       /* VG_(am_get_advisory_client_simple) interprets zero to mean
403          non-fixed, which is not what we want */
404    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
405    if (ok) {
406       /* Fixes bug #129866. */
407       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
408    }
409    if (ok && advised == needA) {
410       ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
411       if (ok) {
412          VG_TRACK( new_mem_mmap, needA, needL,
413                                  old_seg->hasR,
414                                  old_seg->hasW, old_seg->hasX,
415                                  0/*di_handle*/ );
416          if (d)
417             VG_(discard_translations)( needA, needL, "do_remap(3)" );
418          return VG_(mk_SysRes_Success)( old_addr );
419       }
420    }
421 
422    /* that failed.  Look elsewhere. */
423    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
424    if (ok) {
425       Bool oldR = old_seg->hasR;
426       Bool oldW = old_seg->hasW;
427       Bool oldX = old_seg->hasX;
428       /* assert new area does not overlap old */
429       vg_assert(advised+new_len-1 < old_addr
430                 || advised > old_addr+old_len-1);
431       ok = VG_(am_relocate_nooverlap_client)
432               ( &d, old_addr, old_len, advised, new_len );
433       if (ok) {
434          VG_TRACK( copy_mem_remap, old_addr, advised,
435                                    MIN_SIZET(old_len,new_len) );
436          if (new_len > old_len)
437             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
438                       oldR, oldW, oldX, 0/*di_handle*/ );
439          VG_TRACK(die_mem_munmap, old_addr, old_len);
440          if (d) {
441             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
442             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
443          }
444          return VG_(mk_SysRes_Success)( advised );
445       }
446    }
447    goto eNOMEM;
448    }
449    /*NOTREACHED*/ vg_assert(0);
450 
451   grow_in_place_or_fail:
452    {
453    Addr  needA = old_addr + old_len;
454    SizeT needL = new_len - old_len;
455    if (needA == 0)
456       goto eINVAL;
457       /* VG_(am_get_advisory_client_simple) interprets zero to mean
458          non-fixed, which is not what we want */
459    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
460    if (ok) {
461       /* Fixes bug #129866. */
462       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
463    }
464    if (!ok || advised != needA)
465       goto eNOMEM;
466    ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
467    if (!ok)
468       goto eNOMEM;
469    VG_TRACK( new_mem_mmap, needA, needL,
470                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
471                            0/*di_handle*/ );
472    if (d)
473       VG_(discard_translations)( needA, needL, "do_remap(6)" );
474    return VG_(mk_SysRes_Success)( old_addr );
475    }
476    /*NOTREACHED*/ vg_assert(0);
477 
478   shrink_in_place:
479    {
480    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
481    if (sr_isError(sres))
482       return sres;
483    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
484    if (d)
485       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
486                                  "do_remap(7)" );
487    return VG_(mk_SysRes_Success)( old_addr );
488    }
489    /*NOTREACHED*/ vg_assert(0);
490 
491   same_in_place:
492    return VG_(mk_SysRes_Success)( old_addr );
493    /*NOTREACHED*/ vg_assert(0);
494 
495   eINVAL:
496    return VG_(mk_SysRes_Error)( VKI_EINVAL );
497   eNOMEM:
498    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
499 
500 #  undef MIN_SIZET
501 }
502 #endif /* HAVE_MREMAP */
503 
504 
505 /* ---------------------------------------------------------------------
506    File-descriptor tracking
507    ------------------------------------------------------------------ */
508 
509 /* One of these is allocated for each open file descriptor.  */
510 typedef struct OpenFd
511 {
512    Int fd;                        /* The file descriptor */
513    Char *pathname;                /* NULL if not a regular file or unknown */
514    ExeContext *where;             /* NULL if inherited from parent */
515    struct OpenFd *next, *prev;
516 } OpenFd;
517 
518 /* List of allocated file descriptors. */
519 static OpenFd *allocated_fds = NULL;
520 
521 /* Count of open file descriptors. */
522 static Int fd_count = 0;
523 
524 
525 /* Note the fact that a file descriptor was just closed. */
526 static
record_fd_close(Int fd)527 void record_fd_close(Int fd)
528 {
529    OpenFd *i = allocated_fds;
530 
531    if (fd >= VG_(fd_hard_limit))
532       return;			/* Valgrind internal */
533 
534    while(i) {
535       if(i->fd == fd) {
536          if(i->prev)
537             i->prev->next = i->next;
538          else
539             allocated_fds = i->next;
540          if(i->next)
541             i->next->prev = i->prev;
542          if(i->pathname)
543             VG_(arena_free) (VG_AR_CORE, i->pathname);
544          VG_(arena_free) (VG_AR_CORE, i);
545          fd_count--;
546          break;
547       }
548       i = i->next;
549    }
550 }
551 
552 /* Note the fact that a file descriptor was just opened.  If the
553    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
554    this either indicates a non-standard file (i.e. a pipe or socket or
555    some such thing) or that we don't know the filename.  If the fd is
556    already open, then we're probably doing a dup2() to an existing fd,
557    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)558 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
559 {
560    OpenFd *i;
561 
562    if (fd >= VG_(fd_hard_limit))
563       return;			/* Valgrind internal */
564 
565    /* Check to see if this fd is already open. */
566    i = allocated_fds;
567    while (i) {
568       if (i->fd == fd) {
569          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
570          break;
571       }
572       i = i->next;
573    }
574 
575    /* Not already one: allocate an OpenFd */
576    if (i == NULL) {
577       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
578 
579       i->prev = NULL;
580       i->next = allocated_fds;
581       if(allocated_fds) allocated_fds->prev = i;
582       allocated_fds = i;
583       fd_count++;
584    }
585 
586    i->fd = fd;
587    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
588    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
589 }
590 
591 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)592 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
593 {
594    static HChar buf[VKI_PATH_MAX];
595    Char* name;
596    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
597       name = buf;
598    else
599       name = NULL;
600 
601    ML_(record_fd_open_with_given_name)(tid, fd, name);
602 }
603 
604 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)605 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
606 {
607    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
608 }
609 
610 static
unix2name(struct vki_sockaddr_un * sa,UInt len,Char * name)611 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
612 {
613    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
614       VG_(sprintf)(name, "<unknown>");
615    } else {
616       VG_(sprintf)(name, "%s", sa->sun_path);
617    }
618 
619    return name;
620 }
621 
622 static
inet2name(struct vki_sockaddr_in * sa,UInt len,Char * name)623 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
624 {
625    if (sa == NULL || len == 0) {
626       VG_(sprintf)(name, "<unknown>");
627    } else {
628       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
629       if (addr == 0) {
630          VG_(sprintf)(name, "<unbound>");
631       } else {
632          VG_(sprintf)(name, "%u.%u.%u.%u:%u",
633                       (addr>>24) & 0xFF, (addr>>16) & 0xFF,
634                       (addr>>8) & 0xFF, addr & 0xFF,
635                       VG_(ntohs)(sa->sin_port));
636       }
637    }
638 
639    return name;
640 }
641 
642 /*
643  * Try get some details about a socket.
644  */
645 static void
getsockdetails(Int fd)646 getsockdetails(Int fd)
647 {
648    union u {
649       struct vki_sockaddr a;
650       struct vki_sockaddr_in in;
651       struct vki_sockaddr_un un;
652    } laddr;
653    UInt llen;
654 
655    llen = sizeof(laddr);
656    VG_(memset)(&laddr, 0, llen);
657 
658    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
659       switch(laddr.a.sa_family) {
660       case VKI_AF_INET: {
661          static char lname[32];
662          static char pname[32];
663          struct vki_sockaddr_in paddr;
664          UInt plen = sizeof(struct vki_sockaddr_in);
665 
666          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
667             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
668                          inet2name(&(laddr.in), llen, lname),
669                          inet2name(&paddr, plen, pname));
670          } else {
671             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
672                          fd, inet2name(&(laddr.in), llen, lname));
673          }
674          return;
675          }
676       case VKI_AF_UNIX: {
677          static char lname[256];
678          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
679                       unix2name(&(laddr.un), llen, lname));
680          return;
681          }
682       default:
683          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
684                       laddr.a.sa_family, fd);
685          return;
686       }
687    }
688 
689    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
690 }
691 
692 
693 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)694 void VG_(show_open_fds) (void)
695 {
696    OpenFd *i = allocated_fds;
697 
698    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
699 
700    while (i) {
701       if (i->pathname) {
702          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
703                       i->pathname);
704       } else {
705          Int val;
706          UInt len = sizeof(val);
707 
708          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
709              == -1) {
710             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
711          } else {
712             getsockdetails(i->fd);
713          }
714       }
715 
716       if(i->where) {
717          VG_(pp_ExeContext)(i->where);
718          VG_(message)(Vg_UserMsg, "\n");
719       } else {
720          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
721          VG_(message)(Vg_UserMsg, "\n");
722       }
723 
724       i = i->next;
725    }
726 
727    VG_(message)(Vg_UserMsg, "\n");
728 }
729 
730 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
731    have /proc support compiled in, or a non-Linux kernel), then we need to
732    find out what file descriptors we inherited from our parent process the
733    hard way - by checking each fd in turn. */
734 static
init_preopened_fds_without_proc_self_fd(void)735 void init_preopened_fds_without_proc_self_fd(void)
736 {
737    struct vki_rlimit lim;
738    UInt count;
739    Int i;
740 
741    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
742       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
743          an arbitrarily high number.  1024 happens to be the limit in
744          the 2.4 Linux kernels. */
745       count = 1024;
746    } else {
747       count = lim.rlim_cur;
748    }
749 
750    for (i = 0; i < count; i++)
751       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
752          ML_(record_fd_open_named)(-1, i);
753 }
754 
755 /* Initialize the list of open file descriptors with the file descriptors
756    we inherited from out parent process. */
757 
VG_(init_preopened_fds)758 void VG_(init_preopened_fds)(void)
759 {
760 // DDD: should probably use HAVE_PROC here or similar, instead.
761 #if defined(VGO_linux)
762    Int ret;
763    struct vki_dirent d;
764    SysRes f;
765 
766    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
767    if (sr_isError(f)) {
768       init_preopened_fds_without_proc_self_fd();
769       return;
770    }
771 
772    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
773       if (ret == -1)
774          goto out;
775 
776       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
777          Char* s;
778          Int fno = VG_(strtoll10)(d.d_name, &s);
779          if (*s == '\0') {
780             if (fno != sr_Res(f))
781                if (VG_(clo_track_fds))
782                   ML_(record_fd_open_named)(-1, fno);
783          } else {
784             VG_(message)(Vg_DebugMsg,
785                "Warning: invalid file name in /proc/self/fd: %s\n",
786                d.d_name);
787          }
788       }
789 
790       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
791    }
792 
793   out:
794    VG_(close)(sr_Res(f));
795 
796 #elif defined(VGO_darwin)
797    init_preopened_fds_without_proc_self_fd();
798 
799 #else
800 #  error Unknown OS
801 #endif
802 }
803 
804 static
strdupcat(HChar * cc,const Char * s1,const Char * s2,ArenaId aid)805 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
806 {
807    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
808    Char *result = VG_(arena_malloc) ( aid, cc, len );
809    VG_(strcpy) ( result, s1 );
810    VG_(strcat) ( result, s2 );
811    return result;
812 }
813 
814 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,Char * msg,Addr base,SizeT size)815 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
816                             Char *msg, Addr base, SizeT size )
817 {
818    Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
819                               "sendmsg", msg, VG_AR_CORE );
820    PRE_MEM_READ( outmsg, base, size );
821    VG_(arena_free) ( VG_AR_CORE, outmsg );
822 }
823 
824 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,Char * msg,Addr base,SizeT size)825 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
826                              Char *msg, Addr base, SizeT size )
827 {
828    Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
829                               "recvmsg", msg, VG_AR_CORE );
830    if ( read )
831       PRE_MEM_READ( outmsg, base, size );
832    else
833       PRE_MEM_WRITE( outmsg, base, size );
834    VG_(arena_free) ( VG_AR_CORE, outmsg );
835 }
836 
837 static
post_mem_write_recvmsg(ThreadId tid,Bool read,Char * fieldName,Addr base,SizeT size)838 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
839                               Char *fieldName, Addr base, SizeT size )
840 {
841    if ( !read )
842       POST_MEM_WRITE( base, size );
843 }
844 
845 static
msghdr_foreachfield(ThreadId tid,Char * name,struct vki_msghdr * msg,UInt length,void (* foreach_func)(ThreadId,Bool,Char *,Addr,SizeT))846 void msghdr_foreachfield (
847         ThreadId tid,
848         Char *name,
849         struct vki_msghdr *msg,
850         UInt length,
851         void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
852      )
853 {
854    Char *fieldName;
855 
856    if ( !msg )
857       return;
858 
859    fieldName = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.mfef", VG_(strlen)(name) + 32 );
860 
861    VG_(sprintf) ( fieldName, "(%s)", name );
862 
863    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
864    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
865    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
866    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
867    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
868    foreach_func ( tid, True, fieldName, (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
869    foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
870 
871    if ( msg->msg_name ) {
872       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
873       foreach_func ( tid, False, fieldName,
874                      (Addr)msg->msg_name, msg->msg_namelen );
875    }
876 
877    if ( msg->msg_iov ) {
878       struct vki_iovec *iov = msg->msg_iov;
879       UInt i;
880 
881       VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
882 
883       foreach_func ( tid, True, fieldName,
884                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
885 
886       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov ) {
887          UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
888          VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
889          foreach_func ( tid, False, fieldName,
890                         (Addr)iov->iov_base, iov_len );
891          length = length - iov_len;
892       }
893    }
894 
895    if ( msg->msg_control )
896    {
897       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
898       foreach_func ( tid, False, fieldName,
899                      (Addr)msg->msg_control, msg->msg_controllen );
900    }
901 
902    VG_(arena_free) ( VG_AR_CORE, fieldName );
903 }
904 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)905 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
906 {
907    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
908 
909    while (cm) {
910       if (cm->cmsg_level == VKI_SOL_SOCKET &&
911           cm->cmsg_type == VKI_SCM_RIGHTS ) {
912          Int *fds = (Int *) VKI_CMSG_DATA(cm);
913          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
914                          / sizeof(int);
915          Int i;
916 
917          for (i = 0; i < fdc; i++)
918             if(VG_(clo_track_fds))
919                // XXX: must we check the range on these fds with
920                //      ML_(fd_allowed)()?
921                ML_(record_fd_open_named)(tid, fds[i]);
922       }
923 
924       cm = VKI_CMSG_NXTHDR(msg, cm);
925    }
926 }
927 
928 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
929 static
pre_mem_read_sockaddr(ThreadId tid,Char * description,struct vki_sockaddr * sa,UInt salen)930 void pre_mem_read_sockaddr ( ThreadId tid,
931                              Char *description,
932                              struct vki_sockaddr *sa, UInt salen )
933 {
934    Char *outmsg;
935    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
936    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
937    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
938 
939    /* NULL/zero-length sockaddrs are legal */
940    if ( sa == NULL || salen == 0 ) return;
941 
942    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
943                                 VG_(strlen)( description ) + 30 );
944 
945    VG_(sprintf) ( outmsg, description, "sa_family" );
946    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
947 
948    switch (sa->sa_family) {
949 
950       case VKI_AF_UNIX:
951          VG_(sprintf) ( outmsg, description, "sun_path" );
952          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
953          // GrP fixme max of sun_len-2? what about nul char?
954          break;
955 
956       case VKI_AF_INET:
957          VG_(sprintf) ( outmsg, description, "sin_port" );
958          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
959          VG_(sprintf) ( outmsg, description, "sin_addr" );
960          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
961          break;
962 
963       case VKI_AF_INET6:
964          VG_(sprintf) ( outmsg, description, "sin6_port" );
965          PRE_MEM_READ( outmsg,
966             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
967          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
968          PRE_MEM_READ( outmsg,
969             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
970          VG_(sprintf) ( outmsg, description, "sin6_addr" );
971          PRE_MEM_READ( outmsg,
972             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
973          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
974          PRE_MEM_READ( outmsg,
975             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
976          break;
977 
978       default:
979          VG_(sprintf) ( outmsg, description, "" );
980          PRE_MEM_READ( outmsg, (Addr) sa, salen );
981          break;
982    }
983 
984    VG_(arena_free) ( VG_AR_CORE, outmsg );
985 }
986 
987 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,Char * s)988 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
989 {
990    UInt* a_p = (UInt*)a;
991    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
992    if (a_p == NULL)
993       return 0;
994    else
995       return *a_p;
996 }
997 
ML_(buf_and_len_pre_check)998 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
999                                   Char* buf_s, Char* buflen_s )
1000 {
1001    if (VG_(tdict).track_pre_mem_write) {
1002       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1003       if (buflen_in > 0) {
1004          VG_(tdict).track_pre_mem_write(
1005             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1006       }
1007    }
1008 }
1009 
ML_(buf_and_len_post_check)1010 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1011                                    Addr buf_p, Addr buflen_p, Char* s )
1012 {
1013    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1014       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1015       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1016          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1017       }
1018    }
1019 }
1020 
1021 /* ---------------------------------------------------------------------
1022    Data seg end, for brk()
1023    ------------------------------------------------------------------ */
1024 
1025 /*   +--------+------------+
1026      | anon   |    resvn   |
1027      +--------+------------+
1028 
1029      ^     ^  ^
1030      |     |  boundary is page aligned
1031      |     VG_(brk_limit) -- no alignment constraint
1032      VG_(brk_base) -- page aligned -- does not move
1033 
1034      Both the anon part and the reservation part are always at least
1035      one page.
1036 */
1037 
1038 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1039    NEWBRK, else return the current data segment end. */
1040 
do_brk(Addr newbrk)1041 static Addr do_brk ( Addr newbrk )
1042 {
1043    NSegment const* aseg;
1044    NSegment const* rseg;
1045    Addr newbrkP;
1046    SizeT delta;
1047    Bool ok;
1048    Bool debug = False;
1049 
1050    if (debug)
1051       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1052 		  VG_(brk_base), VG_(brk_limit), newbrk);
1053 
1054 #  if 0
1055    if (0) show_segments("in_brk");
1056 #  endif
1057 
1058    if (newbrk < VG_(brk_base))
1059       /* Clearly impossible. */
1060       goto bad;
1061 
1062    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1063       /* shrinking the data segment.  Be lazy and don't munmap the
1064          excess area. */
1065       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1066       if (seg && seg->hasT)
1067          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1068                                     "do_brk(shrink)" );
1069       /* Since we're being lazy and not unmapping pages, we have to
1070          zero out the area, so that if the area later comes back into
1071          circulation, it will be filled with zeroes, as if it really
1072          had been unmapped and later remapped.  Be a bit paranoid and
1073          try hard to ensure we're not going to segfault by doing the
1074          write - check both ends of the range are in the same segment
1075          and that segment is writable. */
1076       if (seg) {
1077          /* pre: newbrk < VG_(brk_limit)
1078               => newbrk <= VG_(brk_limit)-1 */
1079          NSegment const * seg2;
1080          vg_assert(newbrk < VG_(brk_limit));
1081          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1082          if (seg2 && seg == seg2 && seg->hasW)
1083             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1084       }
1085 
1086       VG_(brk_limit) = newbrk;
1087       return newbrk;
1088    }
1089 
1090    /* otherwise we're expanding the brk segment. */
1091    if (VG_(brk_limit) > VG_(brk_base))
1092       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1093    else
1094       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1095    rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1096 
1097    /* These should be assured by setup_client_dataseg in m_main. */
1098    vg_assert(aseg);
1099    vg_assert(rseg);
1100    vg_assert(aseg->kind == SkAnonC);
1101    vg_assert(rseg->kind == SkResvn);
1102    vg_assert(aseg->end+1 == rseg->start);
1103 
1104    vg_assert(newbrk >= VG_(brk_base));
1105    if (newbrk <= rseg->start) {
1106       /* still fits within the anon segment. */
1107       VG_(brk_limit) = newbrk;
1108       return newbrk;
1109    }
1110 
1111    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1112       /* request is too large -- the resvn would fall below 1 page,
1113          which isn't allowed. */
1114       goto bad;
1115    }
1116 
1117    newbrkP = VG_PGROUNDUP(newbrk);
1118    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1119    delta = newbrkP - rseg->start;
1120    vg_assert(delta > 0);
1121    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1122 
1123    ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1124    if (!ok) goto bad;
1125 
1126    VG_(brk_limit) = newbrk;
1127    return newbrk;
1128 
1129   bad:
1130    return VG_(brk_limit);
1131 }
1132 
1133 
1134 /* ---------------------------------------------------------------------
1135    Vet file descriptors for sanity
1136    ------------------------------------------------------------------ */
1137 /*
1138 > - what does the "Bool soft" parameter mean?
1139 
1140 (Tom Hughes, 3 Oct 05):
1141 
1142 Whether or not to consider a file descriptor invalid if it is above
1143 the current soft limit.
1144 
1145 Basically if we are testing whether a newly created file descriptor is
1146 valid (in a post handler) then we set soft to true, and if we are
1147 testing whether a file descriptor that is about to be used (in a pre
1148 handler) is valid [viz, an already-existing fd] then we set it to false.
1149 
1150 The point is that if the (virtual) soft limit is lowered then any
1151 existing descriptors can still be read/written/closed etc (so long as
1152 they are below the valgrind reserved descriptors) but no new
1153 descriptors can be created above the new soft limit.
1154 
1155 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1156 */
1157 
1158 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1159 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1160 {
1161    Bool allowed = True;
1162 
1163    /* hard limits always apply */
1164    if (fd < 0 || fd >= VG_(fd_hard_limit))
1165       allowed = False;
1166 
1167    /* hijacking the output fds is never allowed */
1168    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1169       allowed = False;
1170 
1171    /* if creating a new fd (rather than using an existing one), the
1172       soft limit must also be observed */
1173    if (isNewFd && fd >= VG_(fd_soft_limit))
1174       allowed = False;
1175 
1176    /* this looks like it ought to be included, but causes problems: */
1177    /*
1178    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1179       allowed = False;
1180    */
1181    /* The difficulty is as follows: consider a program P which expects
1182       to be able to mess with (redirect) its own stderr (fd 2).
1183       Usually to deal with P we would issue command line flags to send
1184       logging somewhere other than stderr, so as not to disrupt P.
1185       The problem is that -d unilaterally hijacks stderr with no
1186       consultation with P.  And so, if this check is enabled, P will
1187       work OK normally but fail if -d is issued.
1188 
1189       Basically -d is a hack and you take your chances when using it.
1190       It's very useful for low level debugging -- particularly at
1191       startup -- and having its presence change the behaviour of the
1192       client is exactly what we don't want.  */
1193 
1194    /* croak? */
1195    if ((!allowed) && VG_(showing_core_errors)() ) {
1196       VG_(message)(Vg_UserMsg,
1197          "Warning: invalid file descriptor %d in syscall %s()\n",
1198          fd, syscallname);
1199       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1200 	 VG_(message)(Vg_UserMsg,
1201             "   Use --log-fd=<number> to select an alternative log fd.\n");
1202       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1203 	 VG_(message)(Vg_UserMsg,
1204             "   Use --xml-fd=<number> to select an alternative XML "
1205             "output fd.\n");
1206       // DDD: consider always printing this stack trace, it's useful.
1207       // Also consider also making this a proper core error, ie.
1208       // suppressible and all that.
1209       if (VG_(clo_verbosity) > 1) {
1210          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1211       }
1212    }
1213 
1214    return allowed;
1215 }
1216 
1217 
1218 /* ---------------------------------------------------------------------
1219    Deal with a bunch of socket-related syscalls
1220    ------------------------------------------------------------------ */
1221 
1222 /* ------ */
1223 
1224 void
ML_(generic_PRE_sys_socketpair)1225 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1226                                   UWord arg0, UWord arg1,
1227                                   UWord arg2, UWord arg3 )
1228 {
1229    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1230    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1231                   arg3, 2*sizeof(int) );
1232 }
1233 
1234 SysRes
ML_(generic_POST_sys_socketpair)1235 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1236                                    SysRes res,
1237                                    UWord arg0, UWord arg1,
1238                                    UWord arg2, UWord arg3 )
1239 {
1240    SysRes r = res;
1241    Int fd1 = ((Int*)arg3)[0];
1242    Int fd2 = ((Int*)arg3)[1];
1243    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1244    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1245    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1246        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1247       VG_(close)(fd1);
1248       VG_(close)(fd2);
1249       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1250    } else {
1251       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1252       if (VG_(clo_track_fds)) {
1253          ML_(record_fd_open_nameless)(tid, fd1);
1254          ML_(record_fd_open_nameless)(tid, fd2);
1255       }
1256    }
1257    return r;
1258 }
1259 
1260 /* ------ */
1261 
1262 SysRes
ML_(generic_POST_sys_socket)1263 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1264 {
1265    SysRes r = res;
1266    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1267    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1268       VG_(close)(sr_Res(res));
1269       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1270    } else {
1271       if (VG_(clo_track_fds))
1272          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1273    }
1274    return r;
1275 }
1276 
1277 /* ------ */
1278 
1279 void
ML_(generic_PRE_sys_bind)1280 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1281                             UWord arg0, UWord arg1, UWord arg2 )
1282 {
1283    /* int bind(int sockfd, struct sockaddr *my_addr,
1284                int addrlen); */
1285    pre_mem_read_sockaddr(
1286       tid, "socketcall.bind(my_addr.%s)",
1287       (struct vki_sockaddr *) arg1, arg2
1288    );
1289 }
1290 
1291 /* ------ */
1292 
1293 void
ML_(generic_PRE_sys_accept)1294 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1295                               UWord arg0, UWord arg1, UWord arg2 )
1296 {
1297    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1298    Addr addr_p     = arg1;
1299    Addr addrlen_p  = arg2;
1300    if (addr_p != (Addr)NULL)
1301       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1302                                    "socketcall.accept(addr)",
1303                                    "socketcall.accept(addrlen_in)" );
1304 }
1305 
1306 SysRes
ML_(generic_POST_sys_accept)1307 ML_(generic_POST_sys_accept) ( ThreadId tid,
1308                                SysRes res,
1309                                UWord arg0, UWord arg1, UWord arg2 )
1310 {
1311    SysRes r = res;
1312    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1313    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1314       VG_(close)(sr_Res(res));
1315       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1316    } else {
1317       Addr addr_p     = arg1;
1318       Addr addrlen_p  = arg2;
1319       if (addr_p != (Addr)NULL)
1320          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1321                                        "socketcall.accept(addrlen_out)" );
1322       if (VG_(clo_track_fds))
1323           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1324    }
1325    return r;
1326 }
1327 
1328 /* ------ */
1329 
1330 void
ML_(generic_PRE_sys_sendto)1331 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1332                               UWord arg0, UWord arg1, UWord arg2,
1333                               UWord arg3, UWord arg4, UWord arg5 )
1334 {
1335    /* int sendto(int s, const void *msg, int len,
1336                  unsigned int flags,
1337                  const struct sockaddr *to, int tolen); */
1338    PRE_MEM_READ( "socketcall.sendto(msg)",
1339                  arg1, /* msg */
1340                  arg2  /* len */ );
1341    pre_mem_read_sockaddr(
1342       tid, "socketcall.sendto(to.%s)",
1343       (struct vki_sockaddr *) arg4, arg5
1344    );
1345 }
1346 
1347 /* ------ */
1348 
1349 void
ML_(generic_PRE_sys_send)1350 ML_(generic_PRE_sys_send) ( ThreadId tid,
1351                             UWord arg0, UWord arg1, UWord arg2 )
1352 {
1353    /* int send(int s, const void *msg, size_t len, int flags); */
1354    PRE_MEM_READ( "socketcall.send(msg)",
1355                   arg1, /* msg */
1356                   arg2  /* len */ );
1357 
1358 }
1359 
1360 /* ------ */
1361 
1362 void
ML_(generic_PRE_sys_recvfrom)1363 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1364                                 UWord arg0, UWord arg1, UWord arg2,
1365                                 UWord arg3, UWord arg4, UWord arg5 )
1366 {
1367    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1368                    struct sockaddr *from, int *fromlen); */
1369    Addr buf_p      = arg1;
1370    Int  len        = arg2;
1371    Addr from_p     = arg4;
1372    Addr fromlen_p  = arg5;
1373    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1374    if (from_p != (Addr)NULL)
1375       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1376                                    "socketcall.recvfrom(from)",
1377                                    "socketcall.recvfrom(fromlen_in)" );
1378 }
1379 
1380 void
ML_(generic_POST_sys_recvfrom)1381 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1382                                  SysRes res,
1383                                  UWord arg0, UWord arg1, UWord arg2,
1384                                  UWord arg3, UWord arg4, UWord arg5 )
1385 {
1386    Addr buf_p      = arg1;
1387    Int  len        = arg2;
1388    Addr from_p     = arg4;
1389    Addr fromlen_p  = arg5;
1390 
1391    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1392    if (from_p != (Addr)NULL)
1393       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1394                                     "socketcall.recvfrom(fromlen_out)" );
1395    POST_MEM_WRITE( buf_p, len );
1396 }
1397 
1398 /* ------ */
1399 
1400 void
ML_(generic_PRE_sys_recv)1401 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1402                             UWord arg0, UWord arg1, UWord arg2 )
1403 {
1404    /* int recv(int s, void *buf, int len, unsigned int flags); */
1405    /* man 2 recv says:
1406       The  recv call is normally used only on a connected socket
1407       (see connect(2)) and is identical to recvfrom with a  NULL
1408       from parameter.
1409    */
1410    PRE_MEM_WRITE( "socketcall.recv(buf)",
1411                   arg1, /* buf */
1412                   arg2  /* len */ );
1413 }
1414 
1415 void
ML_(generic_POST_sys_recv)1416 ML_(generic_POST_sys_recv) ( ThreadId tid,
1417                              UWord res,
1418                              UWord arg0, UWord arg1, UWord arg2 )
1419 {
1420    if (res >= 0 && arg1 != 0) {
1421       POST_MEM_WRITE( arg1, /* buf */
1422                       arg2  /* len */ );
1423    }
1424 }
1425 
1426 /* ------ */
1427 
1428 void
ML_(generic_PRE_sys_connect)1429 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1430                                UWord arg0, UWord arg1, UWord arg2 )
1431 {
1432    /* int connect(int sockfd,
1433                   struct sockaddr *serv_addr, int addrlen ); */
1434    pre_mem_read_sockaddr( tid,
1435                           "socketcall.connect(serv_addr.%s)",
1436                           (struct vki_sockaddr *) arg1, arg2);
1437 }
1438 
1439 /* ------ */
1440 
1441 void
ML_(generic_PRE_sys_setsockopt)1442 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1443                                   UWord arg0, UWord arg1, UWord arg2,
1444                                   UWord arg3, UWord arg4 )
1445 {
1446    /* int setsockopt(int s, int level, int optname,
1447                      const void *optval, int optlen); */
1448    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1449                  arg3, /* optval */
1450                  arg4  /* optlen */ );
1451 }
1452 
1453 /* ------ */
1454 
1455 void
ML_(generic_PRE_sys_getsockname)1456 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1457                                    UWord arg0, UWord arg1, UWord arg2 )
1458 {
1459    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1460    Addr name_p     = arg1;
1461    Addr namelen_p  = arg2;
1462    /* Nb: name_p cannot be NULL */
1463    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1464                                 "socketcall.getsockname(name)",
1465                                 "socketcall.getsockname(namelen_in)" );
1466 }
1467 
1468 void
ML_(generic_POST_sys_getsockname)1469 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1470                                     SysRes res,
1471                                     UWord arg0, UWord arg1, UWord arg2 )
1472 {
1473    Addr name_p     = arg1;
1474    Addr namelen_p  = arg2;
1475    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1476    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1477                                  "socketcall.getsockname(namelen_out)" );
1478 }
1479 
1480 /* ------ */
1481 
1482 void
ML_(generic_PRE_sys_getpeername)1483 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1484                                    UWord arg0, UWord arg1, UWord arg2 )
1485 {
1486    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1487    Addr name_p     = arg1;
1488    Addr namelen_p  = arg2;
1489    /* Nb: name_p cannot be NULL */
1490    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1491                                 "socketcall.getpeername(name)",
1492                                 "socketcall.getpeername(namelen_in)" );
1493 }
1494 
1495 void
ML_(generic_POST_sys_getpeername)1496 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1497                                     SysRes res,
1498                                     UWord arg0, UWord arg1, UWord arg2 )
1499 {
1500    Addr name_p     = arg1;
1501    Addr namelen_p  = arg2;
1502    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1503    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1504                                  "socketcall.getpeername(namelen_out)" );
1505 }
1506 
1507 /* ------ */
1508 
1509 void
ML_(generic_PRE_sys_sendmsg)1510 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
1511 {
1512    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg );
1513 }
1514 
1515 /* ------ */
1516 
1517 void
ML_(generic_PRE_sys_recvmsg)1518 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg )
1519 {
1520    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg );
1521 }
1522 
1523 void
ML_(generic_POST_sys_recvmsg)1524 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, Char *name, struct vki_msghdr *msg, UInt length )
1525 {
1526    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg );
1527    check_cmsg_for_fds( tid, msg );
1528 }
1529 
1530 
1531 /* ---------------------------------------------------------------------
1532    Deal with a bunch of IPC related syscalls
1533    ------------------------------------------------------------------ */
1534 
1535 /* ------ */
1536 
1537 void
ML_(generic_PRE_sys_semop)1538 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1539                              UWord arg0, UWord arg1, UWord arg2 )
1540 {
1541    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1542    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1543 }
1544 
1545 /* ------ */
1546 
1547 void
ML_(generic_PRE_sys_semtimedop)1548 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1549                                   UWord arg0, UWord arg1,
1550                                   UWord arg2, UWord arg3 )
1551 {
1552    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1553                      struct timespec *timeout); */
1554    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1555    if (arg3 != 0)
1556       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1557 }
1558 
1559 /* ------ */
1560 
1561 static
get_sem_count(Int semid)1562 UInt get_sem_count( Int semid )
1563 {
1564    struct vki_semid_ds buf;
1565    union vki_semun arg;
1566    SysRes res;
1567 
1568    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1569       (experimental) otherwise complains that the use in the return
1570       statement below is uninitialised. */
1571    buf.sem_nsems = 0;
1572 
1573    arg.buf = &buf;
1574 
1575 #  ifdef __NR_semctl
1576    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1577 #  else
1578    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1579                           VKI_IPC_STAT, (UWord)&arg);
1580 #  endif
1581    if (sr_isError(res))
1582       return 0;
1583 
1584    return buf.sem_nsems;
1585 }
1586 
1587 void
ML_(generic_PRE_sys_semctl)1588 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1589                               UWord arg0, UWord arg1,
1590                               UWord arg2, UWord arg3 )
1591 {
1592    /* int semctl(int semid, int semnum, int cmd, ...); */
1593    union vki_semun arg = *(union vki_semun *)&arg3;
1594    UInt nsems;
1595    switch (arg2 /* cmd */) {
1596 #if defined(VKI_IPC_INFO)
1597    case VKI_IPC_INFO:
1598    case VKI_SEM_INFO:
1599    case VKI_IPC_INFO|VKI_IPC_64:
1600    case VKI_SEM_INFO|VKI_IPC_64:
1601       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1602                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1603       break;
1604 #endif
1605 
1606    case VKI_IPC_STAT:
1607 #if defined(VKI_SEM_STAT)
1608    case VKI_SEM_STAT:
1609 #endif
1610       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1611                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1612       break;
1613 
1614 #if defined(VKI_IPC_64)
1615    case VKI_IPC_STAT|VKI_IPC_64:
1616 #if defined(VKI_SEM_STAT)
1617    case VKI_SEM_STAT|VKI_IPC_64:
1618 #endif
1619       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1620                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1621       break;
1622 #endif
1623 
1624    case VKI_IPC_SET:
1625       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1626                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1627       break;
1628 
1629 #if defined(VKI_IPC_64)
1630    case VKI_IPC_SET|VKI_IPC_64:
1631       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1632                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1633       break;
1634 #endif
1635 
1636    case VKI_GETALL:
1637 #if defined(VKI_IPC_64)
1638    case VKI_GETALL|VKI_IPC_64:
1639 #endif
1640       nsems = get_sem_count( arg0 );
1641       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1642                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1643       break;
1644 
1645    case VKI_SETALL:
1646 #if defined(VKI_IPC_64)
1647    case VKI_SETALL|VKI_IPC_64:
1648 #endif
1649       nsems = get_sem_count( arg0 );
1650       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1651                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1652       break;
1653    }
1654 }
1655 
1656 void
ML_(generic_POST_sys_semctl)1657 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1658                                UWord res,
1659                                UWord arg0, UWord arg1,
1660                                UWord arg2, UWord arg3 )
1661 {
1662    union vki_semun arg = *(union vki_semun *)&arg3;
1663    UInt nsems;
1664    switch (arg2 /* cmd */) {
1665 #if defined(VKI_IPC_INFO)
1666    case VKI_IPC_INFO:
1667    case VKI_SEM_INFO:
1668    case VKI_IPC_INFO|VKI_IPC_64:
1669    case VKI_SEM_INFO|VKI_IPC_64:
1670       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1671       break;
1672 #endif
1673 
1674    case VKI_IPC_STAT:
1675 #if defined(VKI_SEM_STAT)
1676    case VKI_SEM_STAT:
1677 #endif
1678       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1679       break;
1680 
1681 #if defined(VKI_IPC_64)
1682    case VKI_IPC_STAT|VKI_IPC_64:
1683    case VKI_SEM_STAT|VKI_IPC_64:
1684       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1685       break;
1686 #endif
1687 
1688    case VKI_GETALL:
1689 #if defined(VKI_IPC_64)
1690    case VKI_GETALL|VKI_IPC_64:
1691 #endif
1692       nsems = get_sem_count( arg0 );
1693       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1694       break;
1695    }
1696 }
1697 
1698 /* ------ */
1699 
1700 /* ------ */
1701 
1702 static
get_shm_size(Int shmid)1703 SizeT get_shm_size ( Int shmid )
1704 {
1705 #ifdef __NR_shmctl
1706 #  ifdef VKI_IPC_64
1707    struct vki_shmid64_ds buf;
1708 #    ifdef VGP_amd64_linux
1709      /* See bug 222545 comment 7 */
1710      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1711                                      VKI_IPC_STAT, (UWord)&buf);
1712 #    else
1713      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1714                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1715 #    endif
1716 #  else /* !def VKI_IPC_64 */
1717    struct vki_shmid_ds buf;
1718    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1719 #  endif /* def VKI_IPC_64 */
1720 #else
1721    struct vki_shmid_ds buf;
1722    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1723                                  VKI_IPC_STAT, 0, (UWord)&buf);
1724 #endif
1725    if (sr_isError(__res))
1726       return 0;
1727 
1728    return (SizeT) buf.shm_segsz;
1729 }
1730 
1731 UWord
ML_(generic_PRE_sys_shmat)1732 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1733                              UWord arg0, UWord arg1, UWord arg2 )
1734 {
1735    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1736    SizeT  segmentSize = get_shm_size ( arg0 );
1737    UWord tmp;
1738    Bool  ok;
1739    if (arg1 == 0) {
1740       /* arm-linux only: work around the fact that
1741          VG_(am_get_advisory_client_simple) produces something that is
1742          VKI_PAGE_SIZE aligned, whereas what we want is something
1743          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1744          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1745          then round the result up to the next VKI_SHMLBA boundary.
1746          See bug 222545 comment 15.  So far, arm-linux is the only
1747          platform where this is known to be necessary. */
1748       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1749       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1750          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1751       }
1752       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1753       if (ok) {
1754          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1755             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1756          } else {
1757             arg1 = tmp;
1758          }
1759       }
1760    }
1761    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1762       arg1 = 0;
1763    return arg1;
1764 }
1765 
1766 void
ML_(generic_POST_sys_shmat)1767 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1768                               UWord res,
1769                               UWord arg0, UWord arg1, UWord arg2 )
1770 {
1771    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1772    if ( segmentSize > 0 ) {
1773       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1774       Bool d;
1775 
1776       if (arg2 & VKI_SHM_RDONLY)
1777          prot &= ~VKI_PROT_WRITE;
1778       /* It isn't exactly correct to pass 0 for the fd and offset
1779          here.  The kernel seems to think the corresponding section
1780          does have dev/ino numbers:
1781 
1782          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1783 
1784          However there is no obvious way to find them.  In order to
1785          cope with the discrepancy, aspacem's sync checker omits the
1786          dev/ino correspondence check in cases where V does not know
1787          the dev/ino. */
1788       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1789 
1790       /* we don't distinguish whether it's read-only or
1791        * read-write -- it doesn't matter really. */
1792       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1793                               0/*di_handle*/ );
1794       if (d)
1795          VG_(discard_translations)( (Addr64)res,
1796                                     (ULong)VG_PGROUNDUP(segmentSize),
1797                                     "ML_(generic_POST_sys_shmat)" );
1798    }
1799 }
1800 
1801 /* ------ */
1802 
1803 Bool
ML_(generic_PRE_sys_shmdt)1804 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1805 {
1806    /* int shmdt(const void *shmaddr); */
1807    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1808 }
1809 
1810 void
ML_(generic_POST_sys_shmdt)1811 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1812 {
1813    NSegment const* s = VG_(am_find_nsegment)(arg0);
1814 
1815    if (s != NULL) {
1816       Addr  s_start = s->start;
1817       SizeT s_len   = s->end+1 - s->start;
1818       Bool  d;
1819 
1820       vg_assert(s->kind == SkShmC);
1821       vg_assert(s->start == arg0);
1822 
1823       d = VG_(am_notify_munmap)(s_start, s_len);
1824       s = NULL; /* s is now invalid */
1825       VG_TRACK( die_mem_munmap, s_start, s_len );
1826       if (d)
1827          VG_(discard_translations)( (Addr64)s_start,
1828                                     (ULong)s_len,
1829                                     "ML_(generic_POST_sys_shmdt)" );
1830    }
1831 }
1832 /* ------ */
1833 
1834 void
ML_(generic_PRE_sys_shmctl)1835 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1836                               UWord arg0, UWord arg1, UWord arg2 )
1837 {
1838    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1839    switch (arg1 /* cmd */) {
1840 #if defined(VKI_IPC_INFO)
1841    case VKI_IPC_INFO:
1842       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1843                      arg2, sizeof(struct vki_shminfo) );
1844       break;
1845 #if defined(VKI_IPC_64)
1846    case VKI_IPC_INFO|VKI_IPC_64:
1847       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1848                      arg2, sizeof(struct vki_shminfo64) );
1849       break;
1850 #endif
1851 #endif
1852 
1853 #if defined(VKI_SHM_INFO)
1854    case VKI_SHM_INFO:
1855 #if defined(VKI_IPC_64)
1856    case VKI_SHM_INFO|VKI_IPC_64:
1857 #endif
1858       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1859                      arg2, sizeof(struct vki_shm_info) );
1860       break;
1861 #endif
1862 
1863    case VKI_IPC_STAT:
1864 #if defined(VKI_SHM_STAT)
1865    case VKI_SHM_STAT:
1866 #endif
1867       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1868                      arg2, sizeof(struct vki_shmid_ds) );
1869       break;
1870 
1871 #if defined(VKI_IPC_64)
1872    case VKI_IPC_STAT|VKI_IPC_64:
1873    case VKI_SHM_STAT|VKI_IPC_64:
1874       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1875                      arg2, sizeof(struct vki_shmid64_ds) );
1876       break;
1877 #endif
1878 
1879    case VKI_IPC_SET:
1880       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1881                     arg2, sizeof(struct vki_shmid_ds) );
1882       break;
1883 
1884 #if defined(VKI_IPC_64)
1885    case VKI_IPC_SET|VKI_IPC_64:
1886       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1887                     arg2, sizeof(struct vki_shmid64_ds) );
1888       break;
1889 #endif
1890    }
1891 }
1892 
1893 void
ML_(generic_POST_sys_shmctl)1894 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1895                                UWord res,
1896                                UWord arg0, UWord arg1, UWord arg2 )
1897 {
1898    switch (arg1 /* cmd */) {
1899 #if defined(VKI_IPC_INFO)
1900    case VKI_IPC_INFO:
1901       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1902       break;
1903    case VKI_IPC_INFO|VKI_IPC_64:
1904       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1905       break;
1906 #endif
1907 
1908 #if defined(VKI_SHM_INFO)
1909    case VKI_SHM_INFO:
1910    case VKI_SHM_INFO|VKI_IPC_64:
1911       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1912       break;
1913 #endif
1914 
1915    case VKI_IPC_STAT:
1916 #if defined(VKI_SHM_STAT)
1917    case VKI_SHM_STAT:
1918 #endif
1919       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1920       break;
1921 
1922 #if defined(VKI_IPC_64)
1923    case VKI_IPC_STAT|VKI_IPC_64:
1924    case VKI_SHM_STAT|VKI_IPC_64:
1925       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1926       break;
1927 #endif
1928 
1929 
1930    }
1931 }
1932 
1933 
1934 /* ---------------------------------------------------------------------
1935    Generic handler for mmap
1936    ------------------------------------------------------------------ */
1937 
1938 /*
1939  * Although mmap is specified by POSIX and the argument are generally
1940  * consistent across platforms the precise details of the low level
1941  * argument passing conventions differ. For example:
1942  *
1943  * - On x86-linux there is mmap (aka old_mmap) which takes the
1944  *   arguments in a memory block and the offset in bytes; and
1945  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1946  *   way and the offset in pages.
1947  *
1948  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1949  *   arguments in the normal way and the offset in bytes; and
1950  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1951  *   way and the offset in pages.
1952  *
1953  * - On amd64-linux everything is simple and there is just the one
1954  *   call, mmap (aka sys_mmap)  which takes the arguments in the
1955  *   normal way and the offset in bytes.
1956  *
1957  * - On s390x-linux there is mmap (aka old_mmap) which takes the
1958  *   arguments in a memory block and the offset in bytes. mmap2
1959  *   is also available (but not exported via unistd.h) with
1960  *   arguments in a memory block and the offset in pages.
1961  *
1962  * To cope with all this we provide a generic handler function here
1963  * and then each platform implements one or more system call handlers
1964  * which call this generic routine after extracting and normalising
1965  * the arguments.
1966  */
1967 
1968 SysRes
ML_(generic_PRE_sys_mmap)1969 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1970                             UWord arg1, UWord arg2, UWord arg3,
1971                             UWord arg4, UWord arg5, Off64T arg6 )
1972 {
1973    Addr       advised;
1974    SysRes     sres;
1975    MapRequest mreq;
1976    Bool       mreq_ok;
1977 
1978 #if defined(VGO_darwin)
1979    // Nb: we can't use this on Darwin, it has races:
1980    // * needs to RETRY if advisory succeeds but map fails
1981    //   (could have been some other thread in a nonblocking call)
1982    // * needs to not use fixed-position mmap() on Darwin
1983    //   (mmap will cheerfully smash whatever's already there, which might
1984    //   be a new mapping from some other thread in a nonblocking call)
1985    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
1986 #endif
1987 
1988    if (arg2 == 0) {
1989       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
1990          shall be established. */
1991       return VG_(mk_SysRes_Error)( VKI_EINVAL );
1992    }
1993 
1994    if (!VG_IS_PAGE_ALIGNED(arg1)) {
1995       /* zap any misaligned addresses. */
1996       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
1997          to fail.   Here, we catch them all. */
1998       return VG_(mk_SysRes_Error)( VKI_EINVAL );
1999    }
2000 
2001    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2002       /* zap any misaligned offsets. */
2003       /* SuSV3 says: The off argument is constrained to be aligned and
2004          sized according to the value returned by sysconf() when
2005          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2006       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2007    }
2008 
2009    /* Figure out what kind of allocation constraints there are
2010       (fixed/hint/any), and ask aspacem what we should do. */
2011    mreq.start = arg1;
2012    mreq.len   = arg2;
2013    if (arg4 & VKI_MAP_FIXED) {
2014       mreq.rkind = MFixed;
2015    } else
2016    if (arg1 != 0) {
2017       mreq.rkind = MHint;
2018    } else {
2019       mreq.rkind = MAny;
2020    }
2021 
2022    /* Enquire ... */
2023    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2024    if (!mreq_ok) {
2025       /* Our request was bounced, so we'd better fail. */
2026       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2027    }
2028 
2029    /* Otherwise we're OK (so far).  Install aspacem's choice of
2030       address, and let the mmap go through.  */
2031    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2032                                     arg4 | VKI_MAP_FIXED,
2033                                     arg5, arg6);
2034 
2035    /* A refinement: it may be that the kernel refused aspacem's choice
2036       of address.  If we were originally asked for a hinted mapping,
2037       there is still a last chance: try again at any address.
2038       Hence: */
2039    if (mreq.rkind == MHint && sr_isError(sres)) {
2040       mreq.start = 0;
2041       mreq.len   = arg2;
2042       mreq.rkind = MAny;
2043       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2044       if (!mreq_ok) {
2045          /* Our request was bounced, so we'd better fail. */
2046          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2047       }
2048       /* and try again with the kernel */
2049       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2050                                        arg4 | VKI_MAP_FIXED,
2051                                        arg5, arg6);
2052    }
2053 
2054    if (!sr_isError(sres)) {
2055       ULong di_handle;
2056       /* Notify aspacem. */
2057       notify_core_of_mmap(
2058          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2059          arg2, /* length */
2060          arg3, /* prot */
2061          arg4, /* the original flags value */
2062          arg5, /* fd */
2063          arg6  /* offset */
2064       );
2065       /* Load symbols? */
2066       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2067                                        False/*allow_SkFileV*/, (Int)arg5 );
2068       /* Notify the tool. */
2069       notify_tool_of_mmap(
2070          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2071          arg2, /* length */
2072          arg3, /* prot */
2073          di_handle /* so the tool can refer to the read debuginfo later,
2074                       if it wants. */
2075       );
2076    }
2077 
2078    /* Stay sane */
2079    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2080       vg_assert(sr_Res(sres) == arg1);
2081 
2082    return sres;
2083 }
2084 
2085 
2086 /* ---------------------------------------------------------------------
2087    The Main Entertainment ... syscall wrappers
2088    ------------------------------------------------------------------ */
2089 
2090 /* Note: the PRE() and POST() wrappers are for the actual functions
2091    implementing the system calls in the OS kernel.  These mostly have
2092    names like sys_write();  a few have names like old_mmap().  See the
2093    comment for ML_(syscall_table)[] for important info about the __NR_foo
2094    constants and their relationship to the sys_foo() functions.
2095 
2096    Some notes about names used for syscalls and args:
2097    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2098      ambiguity.
2099 
2100    - For error messages, we generally use a somewhat generic name
2101      for the syscall (eg. "write" rather than "sys_write").  This should be
2102      good enough for the average user to understand what is happening,
2103      without confusing them with names like "sys_write".
2104 
2105    - Also, for error messages the arg names are mostly taken from the man
2106      pages (even though many of those man pages are really for glibc
2107      functions of the same name), rather than from the OS kernel source,
2108      for the same reason -- a user presented with a "bogus foo(bar)" arg
2109      will most likely look at the "foo" man page to see which is the "bar"
2110      arg.
2111 
2112    Note that we use our own vki_* types.  The one exception is in
2113    PRE_REG_READn calls, where pointer types haven't been changed, because
2114    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2115    be visible.
2116 
2117    XXX: some of these are arch-specific, and should be factored out.
2118 */
2119 
2120 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2121 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2122 
2123 // Macros to support 64-bit syscall args split into two 32 bit values
2124 #if defined(VG_LITTLEENDIAN)
2125 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2126 #define MERGE64_FIRST(name) name##_low
2127 #define MERGE64_SECOND(name) name##_high
2128 #elif defined(VG_BIGENDIAN)
2129 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2130 #define MERGE64_FIRST(name) name##_high
2131 #define MERGE64_SECOND(name) name##_low
2132 #else
2133 #error Unknown endianness
2134 #endif
2135 
PRE(sys_exit)2136 PRE(sys_exit)
2137 {
2138    ThreadState* tst;
2139    /* simple; just make this thread exit */
2140    PRINT("exit( %ld )", ARG1);
2141    PRE_REG_READ1(void, "exit", int, status);
2142    tst = VG_(get_ThreadState)(tid);
2143    /* Set the thread's status to be exiting, then claim that the
2144       syscall succeeded. */
2145    tst->exitreason = VgSrc_ExitThread;
2146    tst->os_state.exitcode = ARG1;
2147    SET_STATUS_Success(0);
2148 }
2149 
PRE(sys_ni_syscall)2150 PRE(sys_ni_syscall)
2151 {
2152    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2153       VG_SYSNUM_STRING(SYSNO));
2154    PRE_REG_READ0(long, "ni_syscall");
2155    SET_STATUS_Failure( VKI_ENOSYS );
2156 }
2157 
PRE(sys_iopl)2158 PRE(sys_iopl)
2159 {
2160    PRINT("sys_iopl ( %ld )", ARG1);
2161    PRE_REG_READ1(long, "iopl", unsigned long, level);
2162 }
2163 
PRE(sys_fsync)2164 PRE(sys_fsync)
2165 {
2166    *flags |= SfMayBlock;
2167    PRINT("sys_fsync ( %ld )", ARG1);
2168    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2169 }
2170 
PRE(sys_fdatasync)2171 PRE(sys_fdatasync)
2172 {
2173    *flags |= SfMayBlock;
2174    PRINT("sys_fdatasync ( %ld )", ARG1);
2175    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2176 }
2177 
PRE(sys_msync)2178 PRE(sys_msync)
2179 {
2180    *flags |= SfMayBlock;
2181    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2182    PRE_REG_READ3(long, "msync",
2183                  unsigned long, start, vki_size_t, length, int, flags);
2184    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2185 }
2186 
2187 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2188 // versions of LiS (Linux Streams).  They are not part of the kernel.
2189 // Therefore, we have to provide this type ourself, rather than getting it
2190 // from the kernel sources.
2191 struct vki_pmsg_strbuf {
2192    int     maxlen;         /* no. of bytes in buffer */
2193    int     len;            /* no. of bytes returned */
2194    vki_caddr_t buf;        /* pointer to data */
2195 };
PRE(sys_getpmsg)2196 PRE(sys_getpmsg)
2197 {
2198    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2199    struct vki_pmsg_strbuf *ctrl;
2200    struct vki_pmsg_strbuf *data;
2201    *flags |= SfMayBlock;
2202    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2203    PRE_REG_READ5(int, "getpmsg",
2204                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2205                  int *, bandp, int *, flagsp);
2206    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2207    data = (struct vki_pmsg_strbuf *)ARG3;
2208    if (ctrl && ctrl->maxlen > 0)
2209       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2210    if (data && data->maxlen > 0)
2211       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2212    if (ARG4)
2213       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2214    if (ARG5)
2215       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2216 }
POST(sys_getpmsg)2217 POST(sys_getpmsg)
2218 {
2219    struct vki_pmsg_strbuf *ctrl;
2220    struct vki_pmsg_strbuf *data;
2221    vg_assert(SUCCESS);
2222    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2223    data = (struct vki_pmsg_strbuf *)ARG3;
2224    if (RES == 0 && ctrl && ctrl->len > 0) {
2225       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2226    }
2227    if (RES == 0 && data && data->len > 0) {
2228       POST_MEM_WRITE( (Addr)data->buf, data->len);
2229    }
2230 }
2231 
PRE(sys_putpmsg)2232 PRE(sys_putpmsg)
2233 {
2234    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2235    struct vki_pmsg_strbuf *ctrl;
2236    struct vki_pmsg_strbuf *data;
2237    *flags |= SfMayBlock;
2238    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2239    PRE_REG_READ5(int, "putpmsg",
2240                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2241                  int, band, int, flags);
2242    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2243    data = (struct vki_pmsg_strbuf *)ARG3;
2244    if (ctrl && ctrl->len > 0)
2245       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2246    if (data && data->len > 0)
2247       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2248 }
2249 
PRE(sys_getitimer)2250 PRE(sys_getitimer)
2251 {
2252    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2253    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2254    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2255 
2256    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2257    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2258 }
2259 
POST(sys_getitimer)2260 POST(sys_getitimer)
2261 {
2262    if (ARG2 != (Addr)NULL) {
2263       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2264       POST_timeval_WRITE( &(value->it_interval) );
2265       POST_timeval_WRITE( &(value->it_value) );
2266    }
2267 }
2268 
PRE(sys_setitimer)2269 PRE(sys_setitimer)
2270 {
2271    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2272    PRE_REG_READ3(long, "setitimer",
2273                  int, which,
2274                  struct itimerval *, value, struct itimerval *, ovalue);
2275    if (ARG2 != (Addr)NULL) {
2276       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2277       PRE_timeval_READ( "setitimer(&value->it_interval)",
2278                          &(value->it_interval));
2279       PRE_timeval_READ( "setitimer(&value->it_value)",
2280                          &(value->it_value));
2281    }
2282    if (ARG3 != (Addr)NULL) {
2283       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2284       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2285                          &(ovalue->it_interval));
2286       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2287                          &(ovalue->it_value));
2288    }
2289 }
2290 
POST(sys_setitimer)2291 POST(sys_setitimer)
2292 {
2293    if (ARG3 != (Addr)NULL) {
2294       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2295       POST_timeval_WRITE( &(ovalue->it_interval) );
2296       POST_timeval_WRITE( &(ovalue->it_value) );
2297    }
2298 }
2299 
PRE(sys_chroot)2300 PRE(sys_chroot)
2301 {
2302    PRINT("sys_chroot ( %#lx )", ARG1);
2303    PRE_REG_READ1(long, "chroot", const char *, path);
2304    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2305 }
2306 
PRE(sys_madvise)2307 PRE(sys_madvise)
2308 {
2309    *flags |= SfMayBlock;
2310    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2311    PRE_REG_READ3(long, "madvise",
2312                  unsigned long, start, vki_size_t, length, int, advice);
2313 }
2314 
2315 #if HAVE_MREMAP
PRE(sys_mremap)2316 PRE(sys_mremap)
2317 {
2318    // Nb: this is different to the glibc version described in the man pages,
2319    // which lacks the fifth 'new_address' argument.
2320    if (ARG4 & VKI_MREMAP_FIXED) {
2321       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2322             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2323       PRE_REG_READ5(unsigned long, "mremap",
2324                     unsigned long, old_addr, unsigned long, old_size,
2325                     unsigned long, new_size, unsigned long, flags,
2326                     unsigned long, new_addr);
2327    } else {
2328       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2329             ARG1, (ULong)ARG2, ARG3, ARG4);
2330       PRE_REG_READ4(unsigned long, "mremap",
2331                     unsigned long, old_addr, unsigned long, old_size,
2332                     unsigned long, new_size, unsigned long, flags);
2333    }
2334    SET_STATUS_from_SysRes(
2335       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2336    );
2337 }
2338 #endif /* HAVE_MREMAP */
2339 
PRE(sys_nice)2340 PRE(sys_nice)
2341 {
2342    PRINT("sys_nice ( %ld )", ARG1);
2343    PRE_REG_READ1(long, "nice", int, inc);
2344 }
2345 
PRE(sys_mlock)2346 PRE(sys_mlock)
2347 {
2348    *flags |= SfMayBlock;
2349    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2350    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2351 }
2352 
PRE(sys_munlock)2353 PRE(sys_munlock)
2354 {
2355    *flags |= SfMayBlock;
2356    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2357    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2358 }
2359 
PRE(sys_mlockall)2360 PRE(sys_mlockall)
2361 {
2362    *flags |= SfMayBlock;
2363    PRINT("sys_mlockall ( %lx )", ARG1);
2364    PRE_REG_READ1(long, "mlockall", int, flags);
2365 }
2366 
PRE(sys_setpriority)2367 PRE(sys_setpriority)
2368 {
2369    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2370    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2371 }
2372 
PRE(sys_getpriority)2373 PRE(sys_getpriority)
2374 {
2375    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2376    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2377 }
2378 
PRE(sys_pwrite64)2379 PRE(sys_pwrite64)
2380 {
2381    *flags |= SfMayBlock;
2382 #if VG_WORDSIZE == 4
2383    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2384          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2385    PRE_REG_READ5(ssize_t, "pwrite64",
2386                  unsigned int, fd, const char *, buf, vki_size_t, count,
2387                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2388 #elif VG_WORDSIZE == 8
2389    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2390          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2391    PRE_REG_READ4(ssize_t, "pwrite64",
2392                  unsigned int, fd, const char *, buf, vki_size_t, count,
2393                  Word, offset);
2394 #else
2395 #  error Unexpected word size
2396 #endif
2397    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2398 }
2399 
PRE(sys_sync)2400 PRE(sys_sync)
2401 {
2402    *flags |= SfMayBlock;
2403    PRINT("sys_sync ( )");
2404    PRE_REG_READ0(long, "sync");
2405 }
2406 
PRE(sys_fstatfs)2407 PRE(sys_fstatfs)
2408 {
2409    FUSE_COMPATIBLE_MAY_BLOCK();
2410    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2411    PRE_REG_READ2(long, "fstatfs",
2412                  unsigned int, fd, struct statfs *, buf);
2413    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2414 }
2415 
POST(sys_fstatfs)2416 POST(sys_fstatfs)
2417 {
2418    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2419 }
2420 
PRE(sys_fstatfs64)2421 PRE(sys_fstatfs64)
2422 {
2423    FUSE_COMPATIBLE_MAY_BLOCK();
2424    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2425    PRE_REG_READ3(long, "fstatfs64",
2426                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2427    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2428 }
POST(sys_fstatfs64)2429 POST(sys_fstatfs64)
2430 {
2431    POST_MEM_WRITE( ARG3, ARG2 );
2432 }
2433 
PRE(sys_getsid)2434 PRE(sys_getsid)
2435 {
2436    PRINT("sys_getsid ( %ld )", ARG1);
2437    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2438 }
2439 
PRE(sys_pread64)2440 PRE(sys_pread64)
2441 {
2442    *flags |= SfMayBlock;
2443 #if VG_WORDSIZE == 4
2444    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2445          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2446    PRE_REG_READ5(ssize_t, "pread64",
2447                  unsigned int, fd, char *, buf, vki_size_t, count,
2448                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2449 #elif VG_WORDSIZE == 8
2450    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2451          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2452    PRE_REG_READ4(ssize_t, "pread64",
2453                  unsigned int, fd, char *, buf, vki_size_t, count,
2454                  Word, offset);
2455 #else
2456 #  error Unexpected word size
2457 #endif
2458    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2459 }
POST(sys_pread64)2460 POST(sys_pread64)
2461 {
2462    vg_assert(SUCCESS);
2463    if (RES > 0) {
2464       POST_MEM_WRITE( ARG2, RES );
2465    }
2466 }
2467 
PRE(sys_mknod)2468 PRE(sys_mknod)
2469 {
2470    FUSE_COMPATIBLE_MAY_BLOCK();
2471    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2472    PRE_REG_READ3(long, "mknod",
2473                  const char *, pathname, int, mode, unsigned, dev);
2474    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2475 }
2476 
PRE(sys_flock)2477 PRE(sys_flock)
2478 {
2479    *flags |= SfMayBlock;
2480    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2481    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2482 }
2483 
2484 // Pre_read a char** argument.
pre_argv_envp(Addr a,ThreadId tid,Char * s1,Char * s2)2485 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2486 {
2487    while (True) {
2488       Addr a_deref;
2489       Addr* a_p = (Addr*)a;
2490       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2491       a_deref = *a_p;
2492       if (0 == a_deref)
2493          break;
2494       PRE_MEM_RASCIIZ( s2, a_deref );
2495       a += sizeof(char*);
2496    }
2497 }
2498 
i_am_the_only_thread(void)2499 static Bool i_am_the_only_thread ( void )
2500 {
2501    Int c = VG_(count_living_threads)();
2502    vg_assert(c >= 1); /* stay sane */
2503    return c == 1;
2504 }
2505 
2506 /* Wait until all other threads disappear. */
VG_(reap_threads)2507 void VG_(reap_threads)(ThreadId self)
2508 {
2509    while (!i_am_the_only_thread()) {
2510       /* Let other thread(s) run */
2511       VG_(vg_yield)();
2512       VG_(poll_signals)(self);
2513    }
2514    vg_assert(i_am_the_only_thread());
2515 }
2516 
2517 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2518 // but it seems to work nonetheless...
PRE(sys_execve)2519 PRE(sys_execve)
2520 {
2521    Char*        path = NULL;       /* path to executable */
2522    Char**       envp = NULL;
2523    Char**       argv = NULL;
2524    Char**       arg2copy;
2525    Char*        launcher_basename = NULL;
2526    ThreadState* tst;
2527    Int          i, j, tot_args;
2528    SysRes       res;
2529    Bool         setuid_allowed, trace_this_child;
2530 
2531    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2532    PRE_REG_READ3(vki_off_t, "execve",
2533                  char *, filename, char **, argv, char **, envp);
2534    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2535    if (ARG2 != 0)
2536       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2537    if (ARG3 != 0)
2538       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2539 
2540    vg_assert(VG_(is_valid_tid)(tid));
2541    tst = VG_(get_ThreadState)(tid);
2542 
2543    /* Erk.  If the exec fails, then the following will have made a
2544       mess of things which makes it hard for us to continue.  The
2545       right thing to do is piece everything together again in
2546       POST(execve), but that's close to impossible.  Instead, we make
2547       an effort to check that the execve will work before actually
2548       doing it. */
2549 
2550    /* Check that the name at least begins in client-accessible storage. */
2551    if (ARG1 == 0 /* obviously bogus */
2552        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2553       SET_STATUS_Failure( VKI_EFAULT );
2554       return;
2555    }
2556 
2557    // debug-only printing
2558    if (0) {
2559       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2560       if (ARG2) {
2561          VG_(printf)("ARG2 = ");
2562          Int q;
2563          HChar** vec = (HChar**)ARG2;
2564          for (q = 0; vec[q]; q++)
2565             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2566          VG_(printf)("\n");
2567       } else {
2568          VG_(printf)("ARG2 = null\n");
2569       }
2570    }
2571 
2572    // Decide whether or not we want to follow along
2573    { // Make 'child_argv' be a pointer to the child's arg vector
2574      // (skipping the exe name)
2575      HChar** child_argv = (HChar**)ARG2;
2576      if (child_argv && child_argv[0] == NULL)
2577         child_argv = NULL;
2578      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2579    }
2580 
2581    // Do the important checks:  it is a file, is executable, permissions are
2582    // ok, etc.  We allow setuid executables to run only in the case when
2583    // we are not simulating them, that is, they to be run natively.
2584    setuid_allowed = trace_this_child  ? False  : True;
2585    res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2586    if (sr_isError(res)) {
2587       SET_STATUS_Failure( sr_Err(res) );
2588       return;
2589    }
2590 
2591    /* If we're tracing the child, and the launcher name looks bogus
2592       (possibly because launcher.c couldn't figure it out, see
2593       comments therein) then we have no option but to fail. */
2594    if (trace_this_child
2595        && (VG_(name_of_launcher) == NULL
2596            || VG_(name_of_launcher)[0] != '/')) {
2597       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2598       return;
2599    }
2600 
2601    /* After this point, we can't recover if the execve fails. */
2602    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2603 
2604 
2605    // Terminate gdbserver if it is active.
2606    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2607       // If the child will not be traced, we need to terminate gdbserver
2608       // to cleanup the gdbserver resources (e.g. the FIFO files).
2609       // If child will be traced, we also terminate gdbserver: the new
2610       // Valgrind will start a fresh gdbserver after exec.
2611       VG_(gdbserver) (0);
2612    }
2613 
2614    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2615       this. (Really, nuke them all, since the new process will make
2616       its own new thread.) */
2617    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2618    VG_(reap_threads)(tid);
2619 
2620    // Set up the child's exe path.
2621    //
2622    if (trace_this_child) {
2623 
2624       // We want to exec the launcher.  Get its pre-remembered path.
2625       path = VG_(name_of_launcher);
2626       // VG_(name_of_launcher) should have been acquired by m_main at
2627       // startup.
2628       vg_assert(path);
2629 
2630       launcher_basename = VG_(strrchr)(path, '/');
2631       if (launcher_basename == NULL || launcher_basename[1] == 0) {
2632          launcher_basename = path;  // hmm, tres dubious
2633       } else {
2634          launcher_basename++;
2635       }
2636 
2637    } else {
2638       path = (Char*)ARG1;
2639    }
2640 
2641    // Set up the child's environment.
2642    //
2643    // Remove the valgrind-specific stuff from the environment so the
2644    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2645    // This is done unconditionally, since if we are tracing the child,
2646    // the child valgrind will set up the appropriate client environment.
2647    // Nb: we make a copy of the environment before trying to mangle it
2648    // as it might be in read-only memory (this was bug #101881).
2649    //
2650    // Then, if tracing the child, set VALGRIND_LIB for it.
2651    //
2652    if (ARG3 == 0) {
2653       envp = NULL;
2654    } else {
2655       envp = VG_(env_clone)( (Char**)ARG3 );
2656       if (envp == NULL) goto hosed;
2657       VG_(env_remove_valgrind_env_stuff)( envp );
2658    }
2659 
2660    if (trace_this_child) {
2661       // Set VALGRIND_LIB in ARG3 (the environment)
2662       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2663    }
2664 
2665    // Set up the child's args.  If not tracing it, they are
2666    // simply ARG2.  Otherwise, they are
2667    //
2668    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2669    //
2670    // except that the first VG_(args_for_valgrind_noexecpass) args
2671    // are omitted.
2672    //
2673    if (!trace_this_child) {
2674       argv = (Char**)ARG2;
2675    } else {
2676       vg_assert( VG_(args_for_valgrind) );
2677       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2678       vg_assert( VG_(args_for_valgrind_noexecpass)
2679                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2680       /* how many args in total will there be? */
2681       // launcher basename
2682       tot_args = 1;
2683       // V's args
2684       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2685       tot_args -= VG_(args_for_valgrind_noexecpass);
2686       // name of client exe
2687       tot_args++;
2688       // args for client exe, skipping [0]
2689       arg2copy = (Char**)ARG2;
2690       if (arg2copy && arg2copy[0]) {
2691          for (i = 1; arg2copy[i]; i++)
2692             tot_args++;
2693       }
2694       // allocate
2695       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2696                           (tot_args+1) * sizeof(HChar*) );
2697       if (argv == 0) goto hosed;
2698       // copy
2699       j = 0;
2700       argv[j++] = launcher_basename;
2701       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2702          if (i < VG_(args_for_valgrind_noexecpass))
2703             continue;
2704          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2705       }
2706       argv[j++] = (Char*)ARG1;
2707       if (arg2copy && arg2copy[0])
2708          for (i = 1; arg2copy[i]; i++)
2709             argv[j++] = arg2copy[i];
2710       argv[j++] = NULL;
2711       // check
2712       vg_assert(j == tot_args+1);
2713    }
2714 
2715    /* restore the DATA rlimit for the child */
2716    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2717 
2718    /*
2719       Set the signal state up for exec.
2720 
2721       We need to set the real signal state to make sure the exec'd
2722       process gets SIG_IGN properly.
2723 
2724       Also set our real sigmask to match the client's sigmask so that
2725       the exec'd child will get the right mask.  First we need to
2726       clear out any pending signals so they they don't get delivered,
2727       which would confuse things.
2728 
2729       XXX This is a bug - the signals should remain pending, and be
2730       delivered to the new process after exec.  There's also a
2731       race-condition, since if someone delivers us a signal between
2732       the sigprocmask and the execve, we'll still get the signal. Oh
2733       well.
2734    */
2735    {
2736       vki_sigset_t allsigs;
2737       vki_siginfo_t info;
2738 
2739       /* What this loop does: it queries SCSS (the signal state that
2740          the client _thinks_ the kernel is in) by calling
2741          VG_(do_sys_sigaction), and modifies the real kernel signal
2742          state accordingly. */
2743       for (i = 1; i < VG_(max_signal); i++) {
2744          vki_sigaction_fromK_t sa_f;
2745          vki_sigaction_toK_t   sa_t;
2746          VG_(do_sys_sigaction)(i, NULL, &sa_f);
2747          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2748          if (sa_t.ksa_handler == VKI_SIG_IGN)
2749             VG_(sigaction)(i, &sa_t, NULL);
2750          else {
2751             sa_t.ksa_handler = VKI_SIG_DFL;
2752             VG_(sigaction)(i, &sa_t, NULL);
2753          }
2754       }
2755 
2756       VG_(sigfillset)(&allsigs);
2757       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2758          ;
2759 
2760       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2761    }
2762 
2763    if (0) {
2764       Char **cpp;
2765       VG_(printf)("exec: %s\n", path);
2766       for (cpp = argv; cpp && *cpp; cpp++)
2767          VG_(printf)("argv: %s\n", *cpp);
2768       if (0)
2769          for (cpp = envp; cpp && *cpp; cpp++)
2770             VG_(printf)("env: %s\n", *cpp);
2771    }
2772 
2773    SET_STATUS_from_SysRes(
2774       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2775    );
2776 
2777    /* If we got here, then the execve failed.  We've already made way
2778       too much of a mess to continue, so we have to abort. */
2779   hosed:
2780    vg_assert(FAILURE);
2781    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2782                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2783    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2784                             "execve() failing, so I'm dying.\n");
2785    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2786                             "or work out how to recover.\n");
2787    VG_(exit)(101);
2788 }
2789 
PRE(sys_access)2790 PRE(sys_access)
2791 {
2792    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2793    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2794    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2795 }
2796 
PRE(sys_alarm)2797 PRE(sys_alarm)
2798 {
2799    PRINT("sys_alarm ( %ld )", ARG1);
2800    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2801 }
2802 
PRE(sys_brk)2803 PRE(sys_brk)
2804 {
2805    Addr brk_limit = VG_(brk_limit);
2806    Addr brk_new;
2807 
2808    /* libc   says: int   brk(void *end_data_segment);
2809       kernel says: void* brk(void* end_data_segment);  (more or less)
2810 
2811       libc returns 0 on success, and -1 (and sets errno) on failure.
2812       Nb: if you ask to shrink the dataseg end below what it
2813       currently is, that always succeeds, even if the dataseg end
2814       doesn't actually change (eg. brk(0)).  Unless it seg faults.
2815 
2816       Kernel returns the new dataseg end.  If the brk() failed, this
2817       will be unchanged from the old one.  That's why calling (kernel)
2818       brk(0) gives the current dataseg end (libc brk() just returns
2819       zero in that case).
2820 
2821       Both will seg fault if you shrink it back into a text segment.
2822    */
2823    PRINT("sys_brk ( %#lx )", ARG1);
2824    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2825 
2826    brk_new = do_brk(ARG1);
2827    SET_STATUS_Success( brk_new );
2828 
2829    if (brk_new == ARG1) {
2830       /* brk() succeeded */
2831       if (brk_new < brk_limit) {
2832          /* successfully shrunk the data segment. */
2833          VG_TRACK( die_mem_brk, (Addr)ARG1,
2834 		   brk_limit-ARG1 );
2835       } else
2836       if (brk_new > brk_limit) {
2837          /* successfully grew the data segment */
2838          VG_TRACK( new_mem_brk, brk_limit,
2839                    ARG1-brk_limit, tid );
2840       }
2841    } else {
2842       /* brk() failed */
2843       vg_assert(brk_limit == brk_new);
2844    }
2845 }
2846 
PRE(sys_chdir)2847 PRE(sys_chdir)
2848 {
2849    FUSE_COMPATIBLE_MAY_BLOCK();
2850    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2851    PRE_REG_READ1(long, "chdir", const char *, path);
2852    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2853 }
2854 
PRE(sys_chmod)2855 PRE(sys_chmod)
2856 {
2857    FUSE_COMPATIBLE_MAY_BLOCK();
2858    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2859    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2860    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2861 }
2862 
PRE(sys_chown)2863 PRE(sys_chown)
2864 {
2865    FUSE_COMPATIBLE_MAY_BLOCK();
2866    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2867    PRE_REG_READ3(long, "chown",
2868                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
2869    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2870 }
2871 
PRE(sys_lchown)2872 PRE(sys_lchown)
2873 {
2874    FUSE_COMPATIBLE_MAY_BLOCK();
2875    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2876    PRE_REG_READ3(long, "lchown",
2877                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
2878    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2879 }
2880 
PRE(sys_close)2881 PRE(sys_close)
2882 {
2883    FUSE_COMPATIBLE_MAY_BLOCK();
2884    PRINT("sys_close ( %ld )", ARG1);
2885    PRE_REG_READ1(long, "close", unsigned int, fd);
2886 
2887    /* Detect and negate attempts by the client to close Valgrind's log fd */
2888    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2889         /* If doing -d style logging (which is to fd=2), don't
2890            allow that to be closed either. */
2891         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2892       SET_STATUS_Failure( VKI_EBADF );
2893 }
2894 
POST(sys_close)2895 POST(sys_close)
2896 {
2897    if (VG_(clo_track_fds)) record_fd_close(ARG1);
2898 }
2899 
PRE(sys_dup)2900 PRE(sys_dup)
2901 {
2902    PRINT("sys_dup ( %ld )", ARG1);
2903    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2904 }
2905 
POST(sys_dup)2906 POST(sys_dup)
2907 {
2908    vg_assert(SUCCESS);
2909    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2910       VG_(close)(RES);
2911       SET_STATUS_Failure( VKI_EMFILE );
2912    } else {
2913       if (VG_(clo_track_fds))
2914          ML_(record_fd_open_named)(tid, RES);
2915    }
2916 }
2917 
PRE(sys_dup2)2918 PRE(sys_dup2)
2919 {
2920    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2921    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2922    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2923       SET_STATUS_Failure( VKI_EBADF );
2924 }
2925 
POST(sys_dup2)2926 POST(sys_dup2)
2927 {
2928    vg_assert(SUCCESS);
2929    if (VG_(clo_track_fds))
2930       ML_(record_fd_open_named)(tid, RES);
2931 }
2932 
PRE(sys_fchdir)2933 PRE(sys_fchdir)
2934 {
2935    FUSE_COMPATIBLE_MAY_BLOCK();
2936    PRINT("sys_fchdir ( %ld )", ARG1);
2937    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2938 }
2939 
PRE(sys_fchown)2940 PRE(sys_fchown)
2941 {
2942    FUSE_COMPATIBLE_MAY_BLOCK();
2943    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2944    PRE_REG_READ3(long, "fchown",
2945                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2946 }
2947 
PRE(sys_fchmod)2948 PRE(sys_fchmod)
2949 {
2950    FUSE_COMPATIBLE_MAY_BLOCK();
2951    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2952    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2953 }
2954 
PRE(sys_newfstat)2955 PRE(sys_newfstat)
2956 {
2957    FUSE_COMPATIBLE_MAY_BLOCK();
2958    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2959    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2960    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2961 }
2962 
POST(sys_newfstat)2963 POST(sys_newfstat)
2964 {
2965    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2966 }
2967 
2968 static vki_sigset_t fork_saved_mask;
2969 
2970 // In Linux, the sys_fork() function varies across architectures, but we
2971 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)2972 PRE(sys_fork)
2973 {
2974    Bool is_child;
2975    Int child_pid;
2976    vki_sigset_t mask;
2977 
2978    PRINT("sys_fork ( )");
2979    PRE_REG_READ0(long, "fork");
2980 
2981    /* Block all signals during fork, so that we can fix things up in
2982       the child without being interrupted. */
2983    VG_(sigfillset)(&mask);
2984    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
2985 
2986    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
2987 
2988    if (!SUCCESS) return;
2989 
2990 #if defined(VGO_linux)
2991    // RES is 0 for child, non-0 (the child's PID) for parent.
2992    is_child = ( RES == 0 ? True : False );
2993    child_pid = ( is_child ? -1 : RES );
2994 #elif defined(VGO_darwin)
2995    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
2996    is_child = RESHI;
2997    child_pid = RES;
2998 #else
2999 #  error Unknown OS
3000 #endif
3001 
3002    VG_(do_atfork_pre)(tid);
3003 
3004    if (is_child) {
3005       VG_(do_atfork_child)(tid);
3006 
3007       /* restore signal mask */
3008       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3009 
3010       /* If --child-silent-after-fork=yes was specified, set the
3011          output file descriptors to 'impossible' values.  This is
3012          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3013          duly stops writing any further output. */
3014       if (VG_(clo_child_silent_after_fork)) {
3015          if (!VG_(log_output_sink).is_socket)
3016             VG_(log_output_sink).fd = -1;
3017          if (!VG_(xml_output_sink).is_socket)
3018             VG_(xml_output_sink).fd = -1;
3019       }
3020 
3021    } else {
3022       VG_(do_atfork_parent)(tid);
3023 
3024       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3025 
3026       /* restore signal mask */
3027       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3028    }
3029 }
3030 
PRE(sys_ftruncate)3031 PRE(sys_ftruncate)
3032 {
3033    *flags |= SfMayBlock;
3034    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3035    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3036 }
3037 
PRE(sys_truncate)3038 PRE(sys_truncate)
3039 {
3040    *flags |= SfMayBlock;
3041    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3042    PRE_REG_READ2(long, "truncate",
3043                  const char *, path, unsigned long, length);
3044    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3045 }
3046 
PRE(sys_ftruncate64)3047 PRE(sys_ftruncate64)
3048 {
3049    *flags |= SfMayBlock;
3050 #if VG_WORDSIZE == 4
3051    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3052    PRE_REG_READ3(long, "ftruncate64",
3053                  unsigned int, fd,
3054                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3055 #else
3056    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3057    PRE_REG_READ2(long, "ftruncate64",
3058                  unsigned int,fd, UWord,length);
3059 #endif
3060 }
3061 
PRE(sys_truncate64)3062 PRE(sys_truncate64)
3063 {
3064    *flags |= SfMayBlock;
3065 #if VG_WORDSIZE == 4
3066    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3067    PRE_REG_READ3(long, "truncate64",
3068                  const char *, path,
3069                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3070 #else
3071    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3072    PRE_REG_READ2(long, "truncate64",
3073                  const char *,path, UWord,length);
3074 #endif
3075    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3076 }
3077 
PRE(sys_getdents)3078 PRE(sys_getdents)
3079 {
3080    *flags |= SfMayBlock;
3081    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3082    PRE_REG_READ3(long, "getdents",
3083                  unsigned int, fd, struct linux_dirent *, dirp,
3084                  unsigned int, count);
3085    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3086 }
3087 
POST(sys_getdents)3088 POST(sys_getdents)
3089 {
3090    vg_assert(SUCCESS);
3091    if (RES > 0)
3092       POST_MEM_WRITE( ARG2, RES );
3093 }
3094 
PRE(sys_getdents64)3095 PRE(sys_getdents64)
3096 {
3097    *flags |= SfMayBlock;
3098    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3099    PRE_REG_READ3(long, "getdents64",
3100                  unsigned int, fd, struct linux_dirent64 *, dirp,
3101                  unsigned int, count);
3102    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3103 }
3104 
POST(sys_getdents64)3105 POST(sys_getdents64)
3106 {
3107    vg_assert(SUCCESS);
3108    if (RES > 0)
3109       POST_MEM_WRITE( ARG2, RES );
3110 }
3111 
PRE(sys_getgroups)3112 PRE(sys_getgroups)
3113 {
3114    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3115    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3116    if (ARG1 > 0)
3117       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3118 }
3119 
POST(sys_getgroups)3120 POST(sys_getgroups)
3121 {
3122    vg_assert(SUCCESS);
3123    if (ARG1 > 0 && RES > 0)
3124       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3125 }
3126 
PRE(sys_getcwd)3127 PRE(sys_getcwd)
3128 {
3129    // Comment from linux/fs/dcache.c:
3130    //   NOTE! The user-level library version returns a character pointer.
3131    //   The kernel system call just returns the length of the buffer filled
3132    //   (which includes the ending '\0' character), or a negative error
3133    //   value.
3134    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3135    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3136    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3137    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3138 }
3139 
POST(sys_getcwd)3140 POST(sys_getcwd)
3141 {
3142    vg_assert(SUCCESS);
3143    if (RES != (Addr)NULL)
3144       POST_MEM_WRITE( ARG1, RES );
3145 }
3146 
PRE(sys_geteuid)3147 PRE(sys_geteuid)
3148 {
3149    PRINT("sys_geteuid ( )");
3150    PRE_REG_READ0(long, "geteuid");
3151 }
3152 
PRE(sys_getegid)3153 PRE(sys_getegid)
3154 {
3155    PRINT("sys_getegid ( )");
3156    PRE_REG_READ0(long, "getegid");
3157 }
3158 
PRE(sys_getgid)3159 PRE(sys_getgid)
3160 {
3161    PRINT("sys_getgid ( )");
3162    PRE_REG_READ0(long, "getgid");
3163 }
3164 
PRE(sys_getpid)3165 PRE(sys_getpid)
3166 {
3167    PRINT("sys_getpid ()");
3168    PRE_REG_READ0(long, "getpid");
3169 }
3170 
PRE(sys_getpgid)3171 PRE(sys_getpgid)
3172 {
3173    PRINT("sys_getpgid ( %ld )", ARG1);
3174    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3175 }
3176 
PRE(sys_getpgrp)3177 PRE(sys_getpgrp)
3178 {
3179    PRINT("sys_getpgrp ()");
3180    PRE_REG_READ0(long, "getpgrp");
3181 }
3182 
PRE(sys_getppid)3183 PRE(sys_getppid)
3184 {
3185    PRINT("sys_getppid ()");
3186    PRE_REG_READ0(long, "getppid");
3187 }
3188 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3189 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3190 {
3191    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3192 
3193 #ifdef _RLIMIT_POSIX_FLAG
3194    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3195    // Unset it here to make the switch case below work correctly.
3196    a1 &= ~_RLIMIT_POSIX_FLAG;
3197 #endif
3198 
3199    switch (a1) {
3200    case VKI_RLIMIT_NOFILE:
3201       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3202       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3203       break;
3204 
3205    case VKI_RLIMIT_DATA:
3206       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3207       break;
3208 
3209    case VKI_RLIMIT_STACK:
3210       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3211       break;
3212    }
3213 }
3214 
PRE(sys_old_getrlimit)3215 PRE(sys_old_getrlimit)
3216 {
3217    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3218    PRE_REG_READ2(long, "old_getrlimit",
3219                  unsigned int, resource, struct rlimit *, rlim);
3220    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3221 }
3222 
POST(sys_old_getrlimit)3223 POST(sys_old_getrlimit)
3224 {
3225    common_post_getrlimit(tid, ARG1, ARG2);
3226 }
3227 
PRE(sys_getrlimit)3228 PRE(sys_getrlimit)
3229 {
3230    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3231    PRE_REG_READ2(long, "getrlimit",
3232                  unsigned int, resource, struct rlimit *, rlim);
3233    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3234 }
3235 
POST(sys_getrlimit)3236 POST(sys_getrlimit)
3237 {
3238    common_post_getrlimit(tid, ARG1, ARG2);
3239 }
3240 
PRE(sys_getrusage)3241 PRE(sys_getrusage)
3242 {
3243    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3244    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3245    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3246 }
3247 
POST(sys_getrusage)3248 POST(sys_getrusage)
3249 {
3250    vg_assert(SUCCESS);
3251    if (RES == 0)
3252       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3253 }
3254 
PRE(sys_gettimeofday)3255 PRE(sys_gettimeofday)
3256 {
3257    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3258    PRE_REG_READ2(long, "gettimeofday",
3259                  struct timeval *, tv, struct timezone *, tz);
3260    // GrP fixme does darwin write to *tz anymore?
3261    if (ARG1 != 0)
3262       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3263    if (ARG2 != 0)
3264       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3265 }
3266 
POST(sys_gettimeofday)3267 POST(sys_gettimeofday)
3268 {
3269    vg_assert(SUCCESS);
3270    if (RES == 0) {
3271       if (ARG1 != 0)
3272          POST_timeval_WRITE( ARG1 );
3273       if (ARG2 != 0)
3274 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3275    }
3276 }
3277 
PRE(sys_settimeofday)3278 PRE(sys_settimeofday)
3279 {
3280    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3281    PRE_REG_READ2(long, "settimeofday",
3282                  struct timeval *, tv, struct timezone *, tz);
3283    if (ARG1 != 0)
3284       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3285    if (ARG2 != 0) {
3286       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3287       /* maybe should warn if tz->tz_dsttime is non-zero? */
3288    }
3289 }
3290 
PRE(sys_getuid)3291 PRE(sys_getuid)
3292 {
3293    PRINT("sys_getuid ( )");
3294    PRE_REG_READ0(long, "getuid");
3295 }
3296 
ML_(PRE_unknown_ioctl)3297 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3298 {
3299    /* We don't have any specific information on it, so
3300       try to do something reasonable based on direction and
3301       size bits.  The encoding scheme is described in
3302       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3303 
3304       According to Simon Hausmann, _IOC_READ means the kernel
3305       writes a value to the ioctl value passed from the user
3306       space and the other way around with _IOC_WRITE. */
3307 
3308    UInt dir  = _VKI_IOC_DIR(request);
3309    UInt size = _VKI_IOC_SIZE(request);
3310    if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3311       /*
3312        * Be very lax about ioctl handling; the only
3313        * assumption is that the size is correct. Doesn't
3314        * require the full buffer to be initialized when
3315        * writing.  Without this, using some device
3316        * drivers with a large number of strange ioctl
3317        * commands becomes very tiresome.
3318        */
3319    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3320       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3321       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3322       static Int moans = 3;
3323       if (moans > 0 && !VG_(clo_xml)) {
3324          moans--;
3325          VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3326                    " with no size/direction hints\n", request);
3327          VG_(umsg)("   This could cause spurious value errors to appear.\n");
3328          VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3329                    "guidance on writing a proper wrapper.\n" );
3330       }
3331    } else {
3332       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3333       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3334       if ((dir & _VKI_IOC_WRITE) && size > 0)
3335          PRE_MEM_READ( "ioctl(generic)", arg, size);
3336       if ((dir & _VKI_IOC_READ) && size > 0)
3337          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3338    }
3339 }
3340 
ML_(POST_unknown_ioctl)3341 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3342 {
3343    /* We don't have any specific information on it, so
3344       try to do something reasonable based on direction and
3345       size bits.  The encoding scheme is described in
3346       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3347 
3348       According to Simon Hausmann, _IOC_READ means the kernel
3349       writes a value to the ioctl value passed from the user
3350       space and the other way around with _IOC_WRITE. */
3351 
3352    UInt dir  = _VKI_IOC_DIR(request);
3353    UInt size = _VKI_IOC_SIZE(request);
3354    if (size > 0 && (dir & _VKI_IOC_READ)
3355        && res == 0
3356        && arg != (Addr)NULL)
3357    {
3358       POST_MEM_WRITE(arg, size);
3359    }
3360 }
3361 
3362 /*
3363    If we're sending a SIGKILL to one of our own threads, then simulate
3364    it rather than really sending the signal, so that the target thread
3365    gets a chance to clean up.  Returns True if we did the killing (or
3366    no killing is necessary), and False if the caller should use the
3367    normal kill syscall.
3368 
3369    "pid" is any pid argument which can be passed to kill; group kills
3370    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3371    they'll most likely hit all the threads and we won't need to worry
3372    about cleanup.  In truth, we can't fully emulate these multicast
3373    kills.
3374 
3375    "tgid" is a thread group id.  If it is not -1, then the target
3376    thread must be in that thread group.
3377  */
ML_(do_sigkill)3378 Bool ML_(do_sigkill)(Int pid, Int tgid)
3379 {
3380    ThreadState *tst;
3381    ThreadId tid;
3382 
3383    if (pid <= 0)
3384       return False;
3385 
3386    tid = VG_(lwpid_to_vgtid)(pid);
3387    if (tid == VG_INVALID_THREADID)
3388       return False;		/* none of our threads */
3389 
3390    tst = VG_(get_ThreadState)(tid);
3391    if (tst == NULL || tst->status == VgTs_Empty)
3392       return False;		/* hm, shouldn't happen */
3393 
3394    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3395       return False;		/* not the right thread group */
3396 
3397    /* Check to see that the target isn't already exiting. */
3398    if (!VG_(is_exiting)(tid)) {
3399       if (VG_(clo_trace_signals))
3400 	 VG_(message)(Vg_DebugMsg,
3401                       "Thread %d being killed with SIGKILL\n",
3402                       tst->tid);
3403 
3404       tst->exitreason = VgSrc_FatalSig;
3405       tst->os_state.fatalsig = VKI_SIGKILL;
3406 
3407       if (!VG_(is_running_thread)(tid))
3408 	 VG_(get_thread_out_of_syscall)(tid);
3409    }
3410 
3411    return True;
3412 }
3413 
PRE(sys_kill)3414 PRE(sys_kill)
3415 {
3416    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3417    PRE_REG_READ2(long, "kill", int, pid, int, sig);
3418    if (!ML_(client_signal_OK)(ARG2)) {
3419       SET_STATUS_Failure( VKI_EINVAL );
3420       return;
3421    }
3422 
3423    /* If we're sending SIGKILL, check to see if the target is one of
3424       our threads and handle it specially. */
3425    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3426       SET_STATUS_Success(0);
3427    else
3428       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3429          affecting how posix-compliant the call is.  I guess it is
3430          harmless to pass the 3rd arg on other platforms; hence pass
3431          it on all. */
3432       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3433 
3434    if (VG_(clo_trace_signals))
3435       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3436 		   ARG2, ARG1);
3437 
3438    /* This kill might have given us a pending signal.  Ask for a check once
3439       the syscall is done. */
3440    *flags |= SfPollAfter;
3441 }
3442 
PRE(sys_link)3443 PRE(sys_link)
3444 {
3445    *flags |= SfMayBlock;
3446    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3447    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3448    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3449    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3450 }
3451 
PRE(sys_newlstat)3452 PRE(sys_newlstat)
3453 {
3454    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3455    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3456    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3457    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3458 }
3459 
POST(sys_newlstat)3460 POST(sys_newlstat)
3461 {
3462    vg_assert(SUCCESS);
3463    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3464 }
3465 
PRE(sys_mkdir)3466 PRE(sys_mkdir)
3467 {
3468    *flags |= SfMayBlock;
3469    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3470    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3471    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3472 }
3473 
PRE(sys_mprotect)3474 PRE(sys_mprotect)
3475 {
3476    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3477    PRE_REG_READ3(long, "mprotect",
3478                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3479 
3480    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3481       SET_STATUS_Failure( VKI_ENOMEM );
3482    }
3483 #if defined(VKI_PROT_GROWSDOWN)
3484    else
3485    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3486       /* Deal with mprotects on growable stack areas.
3487 
3488          The critical files to understand all this are mm/mprotect.c
3489          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3490          glibc.
3491 
3492          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3493          round the start/end address of mprotect to the start/end of
3494          the underlying vma and glibc uses that as an easy way to
3495          change the protection of the stack by calling mprotect on the
3496          last page of the stack with PROT_GROWSDOWN set.
3497 
3498          The sanity check provided by the kernel is that the vma must
3499          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3500       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3501       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3502       NSegment const *rseg;
3503 
3504       vg_assert(aseg);
3505 
3506       if (grows == VKI_PROT_GROWSDOWN) {
3507          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3508          if (rseg &&
3509              rseg->kind == SkResvn &&
3510              rseg->smode == SmUpper &&
3511              rseg->end+1 == aseg->start) {
3512             Addr end = ARG1 + ARG2;
3513             ARG1 = aseg->start;
3514             ARG2 = end - aseg->start;
3515             ARG3 &= ~VKI_PROT_GROWSDOWN;
3516          } else {
3517             SET_STATUS_Failure( VKI_EINVAL );
3518          }
3519       } else if (grows == VKI_PROT_GROWSUP) {
3520          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3521          if (rseg &&
3522              rseg->kind == SkResvn &&
3523              rseg->smode == SmLower &&
3524              aseg->end+1 == rseg->start) {
3525             ARG2 = aseg->end - ARG1 + 1;
3526             ARG3 &= ~VKI_PROT_GROWSUP;
3527          } else {
3528             SET_STATUS_Failure( VKI_EINVAL );
3529          }
3530       } else {
3531          /* both GROWSUP and GROWSDOWN */
3532          SET_STATUS_Failure( VKI_EINVAL );
3533       }
3534    }
3535 #endif   // defined(VKI_PROT_GROWSDOWN)
3536 }
3537 
POST(sys_mprotect)3538 POST(sys_mprotect)
3539 {
3540    Addr a    = ARG1;
3541    SizeT len = ARG2;
3542    Int  prot = ARG3;
3543 
3544    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3545 }
3546 
PRE(sys_munmap)3547 PRE(sys_munmap)
3548 {
3549    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3550    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3551    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3552 
3553    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3554       SET_STATUS_Failure( VKI_EINVAL );
3555 }
3556 
POST(sys_munmap)3557 POST(sys_munmap)
3558 {
3559    Addr  a   = ARG1;
3560    SizeT len = ARG2;
3561 
3562    ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3563 }
3564 
PRE(sys_mincore)3565 PRE(sys_mincore)
3566 {
3567    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3568    PRE_REG_READ3(long, "mincore",
3569                  unsigned long, start, vki_size_t, length,
3570                  unsigned char *, vec);
3571    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3572 }
POST(sys_mincore)3573 POST(sys_mincore)
3574 {
3575    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3576 }
3577 
PRE(sys_nanosleep)3578 PRE(sys_nanosleep)
3579 {
3580    *flags |= SfMayBlock|SfPostOnFail;
3581    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3582    PRE_REG_READ2(long, "nanosleep",
3583                  struct timespec *, req, struct timespec *, rem);
3584    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3585    if (ARG2 != 0)
3586       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3587 }
3588 
POST(sys_nanosleep)3589 POST(sys_nanosleep)
3590 {
3591    vg_assert(SUCCESS || FAILURE);
3592    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3593       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3594 }
3595 
PRE(sys_open)3596 PRE(sys_open)
3597 {
3598    if (ARG2 & VKI_O_CREAT) {
3599       // 3-arg version
3600       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3601       PRE_REG_READ3(long, "open",
3602                     const char *, filename, int, flags, int, mode);
3603    } else {
3604       // 2-arg version
3605       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3606       PRE_REG_READ2(long, "open",
3607                     const char *, filename, int, flags);
3608    }
3609    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3610 
3611 #if defined(VGO_linux)
3612    /* Handle the case where the open is of /proc/self/cmdline or
3613       /proc/<pid>/cmdline, and just give it a copy of the fd for the
3614       fake file we cooked up at startup (in m_main).  Also, seek the
3615       cloned fd back to the start. */
3616    {
3617       HChar  name[30];
3618       Char*  arg1s = (Char*) ARG1;
3619       SysRes sres;
3620 
3621       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3622       if (ML_(safe_to_deref)( arg1s, 1 ) &&
3623           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3624          )
3625       {
3626          sres = VG_(dup)( VG_(cl_cmdline_fd) );
3627          SET_STATUS_from_SysRes( sres );
3628          if (!sr_isError(sres)) {
3629             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3630             if (off < 0)
3631                SET_STATUS_Failure( VKI_EMFILE );
3632          }
3633          return;
3634       }
3635    }
3636 #endif // defined(VGO_linux)
3637 
3638    /* Otherwise handle normally */
3639    *flags |= SfMayBlock;
3640 }
3641 
POST(sys_open)3642 POST(sys_open)
3643 {
3644    vg_assert(SUCCESS);
3645    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3646       VG_(close)(RES);
3647       SET_STATUS_Failure( VKI_EMFILE );
3648    } else {
3649       if (VG_(clo_track_fds))
3650          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3651    }
3652 }
3653 
PRE(sys_read)3654 PRE(sys_read)
3655 {
3656    *flags |= SfMayBlock;
3657    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3658    PRE_REG_READ3(ssize_t, "read",
3659                  unsigned int, fd, char *, buf, vki_size_t, count);
3660 
3661    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3662       SET_STATUS_Failure( VKI_EBADF );
3663    else
3664       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3665 }
3666 
POST(sys_read)3667 POST(sys_read)
3668 {
3669    vg_assert(SUCCESS);
3670    POST_MEM_WRITE( ARG2, RES );
3671 }
3672 
PRE(sys_write)3673 PRE(sys_write)
3674 {
3675    Bool ok;
3676    *flags |= SfMayBlock;
3677    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3678    PRE_REG_READ3(ssize_t, "write",
3679                  unsigned int, fd, const char *, buf, vki_size_t, count);
3680    /* check to see if it is allowed.  If not, try for an exemption from
3681       --sim-hints=enable-outer (used for self hosting). */
3682    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3683    if (!ok && ARG1 == 2/*stderr*/
3684            && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3685       ok = True;
3686    if (!ok)
3687       SET_STATUS_Failure( VKI_EBADF );
3688    else
3689       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3690 }
3691 
PRE(sys_creat)3692 PRE(sys_creat)
3693 {
3694    *flags |= SfMayBlock;
3695    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3696    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3697    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3698 }
3699 
POST(sys_creat)3700 POST(sys_creat)
3701 {
3702    vg_assert(SUCCESS);
3703    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3704       VG_(close)(RES);
3705       SET_STATUS_Failure( VKI_EMFILE );
3706    } else {
3707       if (VG_(clo_track_fds))
3708          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3709    }
3710 }
3711 
PRE(sys_poll)3712 PRE(sys_poll)
3713 {
3714    /* struct pollfd {
3715         int fd;           -- file descriptor
3716         short events;     -- requested events
3717         short revents;    -- returned events
3718       };
3719       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3720    */
3721    UInt i;
3722    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3723    *flags |= SfMayBlock;
3724    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3725    PRE_REG_READ3(long, "poll",
3726                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3727 
3728    for (i = 0; i < ARG2; i++) {
3729       PRE_MEM_READ( "poll(ufds.fd)",
3730                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3731       PRE_MEM_READ( "poll(ufds.events)",
3732                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3733       PRE_MEM_WRITE( "poll(ufds.reventss)",
3734                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3735    }
3736 }
3737 
POST(sys_poll)3738 POST(sys_poll)
3739 {
3740    if (RES >= 0) {
3741       UInt i;
3742       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3743       for (i = 0; i < ARG2; i++)
3744 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3745    }
3746 }
3747 
PRE(sys_readlink)3748 PRE(sys_readlink)
3749 {
3750    FUSE_COMPATIBLE_MAY_BLOCK();
3751    Word saved = SYSNO;
3752 
3753    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3754    PRE_REG_READ3(long, "readlink",
3755                  const char *, path, char *, buf, int, bufsiz);
3756    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3757    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3758 
3759    {
3760 #if defined(VGO_linux)
3761       /*
3762        * Handle the case where readlink is looking at /proc/self/exe or
3763        * /proc/<pid>/exe.
3764        */
3765       HChar name[25];
3766       Char* arg1s = (Char*) ARG1;
3767       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3768       if (ML_(safe_to_deref)(arg1s, 1) &&
3769           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3770          )
3771       {
3772          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3773          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3774                                                          ARG2, ARG3));
3775       } else
3776 #endif // defined(VGO_linux)
3777       {
3778          /* Normal case */
3779          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3780       }
3781    }
3782 
3783    if (SUCCESS && RES > 0)
3784       POST_MEM_WRITE( ARG2, RES );
3785 }
3786 
PRE(sys_readv)3787 PRE(sys_readv)
3788 {
3789    Int i;
3790    struct vki_iovec * vec;
3791    *flags |= SfMayBlock;
3792    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3793    PRE_REG_READ3(ssize_t, "readv",
3794                  unsigned long, fd, const struct iovec *, vector,
3795                  unsigned long, count);
3796    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3797       SET_STATUS_Failure( VKI_EBADF );
3798    } else {
3799       PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3800 
3801       if (ARG2 != 0) {
3802          /* ToDo: don't do any of the following if the vector is invalid */
3803          vec = (struct vki_iovec *)ARG2;
3804          for (i = 0; i < (Int)ARG3; i++)
3805             PRE_MEM_WRITE( "readv(vector[...])",
3806                            (Addr)vec[i].iov_base, vec[i].iov_len );
3807       }
3808    }
3809 }
3810 
POST(sys_readv)3811 POST(sys_readv)
3812 {
3813    vg_assert(SUCCESS);
3814    if (RES > 0) {
3815       Int i;
3816       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3817       Int remains = RES;
3818 
3819       /* RES holds the number of bytes read. */
3820       for (i = 0; i < (Int)ARG3; i++) {
3821 	 Int nReadThisBuf = vec[i].iov_len;
3822 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
3823 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3824 	 remains -= nReadThisBuf;
3825 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3826       }
3827    }
3828 }
3829 
PRE(sys_rename)3830 PRE(sys_rename)
3831 {
3832    FUSE_COMPATIBLE_MAY_BLOCK();
3833    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3834    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3835    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3836    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3837 }
3838 
PRE(sys_rmdir)3839 PRE(sys_rmdir)
3840 {
3841    *flags |= SfMayBlock;
3842    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3843    PRE_REG_READ1(long, "rmdir", const char *, pathname);
3844    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3845 }
3846 
PRE(sys_select)3847 PRE(sys_select)
3848 {
3849    *flags |= SfMayBlock;
3850    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3851    PRE_REG_READ5(long, "select",
3852                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3853                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3854    // XXX: this possibly understates how much memory is read.
3855    if (ARG2 != 0)
3856       PRE_MEM_READ( "select(readfds)",
3857 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3858    if (ARG3 != 0)
3859       PRE_MEM_READ( "select(writefds)",
3860 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3861    if (ARG4 != 0)
3862       PRE_MEM_READ( "select(exceptfds)",
3863 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3864    if (ARG5 != 0)
3865       PRE_timeval_READ( "select(timeout)", ARG5 );
3866 }
3867 
PRE(sys_setgid)3868 PRE(sys_setgid)
3869 {
3870    PRINT("sys_setgid ( %ld )", ARG1);
3871    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3872 }
3873 
PRE(sys_setsid)3874 PRE(sys_setsid)
3875 {
3876    PRINT("sys_setsid ( )");
3877    PRE_REG_READ0(long, "setsid");
3878 }
3879 
PRE(sys_setgroups)3880 PRE(sys_setgroups)
3881 {
3882    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3883    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3884    if (ARG1 > 0)
3885       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3886 }
3887 
PRE(sys_setpgid)3888 PRE(sys_setpgid)
3889 {
3890    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3891    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3892 }
3893 
PRE(sys_setregid)3894 PRE(sys_setregid)
3895 {
3896    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3897    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3898 }
3899 
PRE(sys_setreuid)3900 PRE(sys_setreuid)
3901 {
3902    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3903    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3904 }
3905 
PRE(sys_setrlimit)3906 PRE(sys_setrlimit)
3907 {
3908    UWord arg1 = ARG1;
3909    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3910    PRE_REG_READ2(long, "setrlimit",
3911                  unsigned int, resource, struct rlimit *, rlim);
3912    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3913 
3914 #ifdef _RLIMIT_POSIX_FLAG
3915    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3916    // Unset it here to make the if statements below work correctly.
3917    arg1 &= ~_RLIMIT_POSIX_FLAG;
3918 #endif
3919 
3920    if (ARG2 &&
3921        ((struct vki_rlimit *)ARG2)->rlim_cur > ((struct vki_rlimit *)ARG2)->rlim_max) {
3922       SET_STATUS_Failure( VKI_EINVAL );
3923    }
3924    else if (arg1 == VKI_RLIMIT_NOFILE) {
3925       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3926           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3927          SET_STATUS_Failure( VKI_EPERM );
3928       }
3929       else {
3930          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3931          SET_STATUS_Success( 0 );
3932       }
3933    }
3934    else if (arg1 == VKI_RLIMIT_DATA) {
3935       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3936           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3937          SET_STATUS_Failure( VKI_EPERM );
3938       }
3939       else {
3940          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3941          SET_STATUS_Success( 0 );
3942       }
3943    }
3944    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3945       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3946           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3947          SET_STATUS_Failure( VKI_EPERM );
3948       }
3949       else {
3950          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3951          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3952          SET_STATUS_Success( 0 );
3953       }
3954    }
3955 }
3956 
PRE(sys_setuid)3957 PRE(sys_setuid)
3958 {
3959    PRINT("sys_setuid ( %ld )", ARG1);
3960    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3961 }
3962 
PRE(sys_newstat)3963 PRE(sys_newstat)
3964 {
3965    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3966    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3967    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3968    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3969 }
3970 
POST(sys_newstat)3971 POST(sys_newstat)
3972 {
3973    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3974 }
3975 
PRE(sys_statfs)3976 PRE(sys_statfs)
3977 {
3978    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
3979    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
3980    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
3981    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
3982 }
POST(sys_statfs)3983 POST(sys_statfs)
3984 {
3985    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
3986 }
3987 
PRE(sys_statfs64)3988 PRE(sys_statfs64)
3989 {
3990    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
3991    PRE_REG_READ3(long, "statfs64",
3992                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
3993    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
3994    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
3995 }
POST(sys_statfs64)3996 POST(sys_statfs64)
3997 {
3998    POST_MEM_WRITE( ARG3, ARG2 );
3999 }
4000 
PRE(sys_symlink)4001 PRE(sys_symlink)
4002 {
4003    *flags |= SfMayBlock;
4004    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4005    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4006    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4007    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4008 }
4009 
PRE(sys_time)4010 PRE(sys_time)
4011 {
4012    /* time_t time(time_t *t); */
4013    PRINT("sys_time ( %#lx )",ARG1);
4014    PRE_REG_READ1(long, "time", int *, t);
4015    if (ARG1 != 0) {
4016       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4017    }
4018 }
4019 
POST(sys_time)4020 POST(sys_time)
4021 {
4022    if (ARG1 != 0) {
4023       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4024    }
4025 }
4026 
PRE(sys_times)4027 PRE(sys_times)
4028 {
4029    PRINT("sys_times ( %#lx )", ARG1);
4030    PRE_REG_READ1(long, "times", struct tms *, buf);
4031    if (ARG1 != 0) {
4032       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4033    }
4034 }
4035 
POST(sys_times)4036 POST(sys_times)
4037 {
4038    if (ARG1 != 0) {
4039       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4040    }
4041 }
4042 
PRE(sys_umask)4043 PRE(sys_umask)
4044 {
4045    PRINT("sys_umask ( %ld )", ARG1);
4046    PRE_REG_READ1(long, "umask", int, mask);
4047 }
4048 
PRE(sys_unlink)4049 PRE(sys_unlink)
4050 {
4051    *flags |= SfMayBlock;
4052    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4053    PRE_REG_READ1(long, "unlink", const char *, pathname);
4054    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4055 }
4056 
PRE(sys_newuname)4057 PRE(sys_newuname)
4058 {
4059    PRINT("sys_newuname ( %#lx )", ARG1);
4060    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4061    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4062 }
4063 
POST(sys_newuname)4064 POST(sys_newuname)
4065 {
4066    if (ARG1 != 0) {
4067       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4068    }
4069 }
4070 
PRE(sys_waitpid)4071 PRE(sys_waitpid)
4072 {
4073    *flags |= SfMayBlock;
4074    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4075    PRE_REG_READ3(long, "waitpid",
4076                  vki_pid_t, pid, unsigned int *, status, int, options);
4077 
4078    if (ARG2 != (Addr)NULL)
4079       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4080 }
4081 
POST(sys_waitpid)4082 POST(sys_waitpid)
4083 {
4084    if (ARG2 != (Addr)NULL)
4085       POST_MEM_WRITE( ARG2, sizeof(int) );
4086 }
4087 
PRE(sys_wait4)4088 PRE(sys_wait4)
4089 {
4090    *flags |= SfMayBlock;
4091    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4092 
4093    PRE_REG_READ4(long, "wait4",
4094                  vki_pid_t, pid, unsigned int *, status, int, options,
4095                  struct rusage *, rusage);
4096    if (ARG2 != (Addr)NULL)
4097       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4098    if (ARG4 != (Addr)NULL)
4099       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4100 }
4101 
POST(sys_wait4)4102 POST(sys_wait4)
4103 {
4104    if (ARG2 != (Addr)NULL)
4105       POST_MEM_WRITE( ARG2, sizeof(int) );
4106    if (ARG4 != (Addr)NULL)
4107       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4108 }
4109 
PRE(sys_writev)4110 PRE(sys_writev)
4111 {
4112    Int i;
4113    struct vki_iovec * vec;
4114    *flags |= SfMayBlock;
4115    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4116    PRE_REG_READ3(ssize_t, "writev",
4117                  unsigned long, fd, const struct iovec *, vector,
4118                  unsigned long, count);
4119    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4120       SET_STATUS_Failure( VKI_EBADF );
4121    } else {
4122       PRE_MEM_READ( "writev(vector)",
4123 		     ARG2, ARG3 * sizeof(struct vki_iovec) );
4124       if (ARG2 != 0) {
4125          /* ToDo: don't do any of the following if the vector is invalid */
4126          vec = (struct vki_iovec *)ARG2;
4127          for (i = 0; i < (Int)ARG3; i++)
4128             PRE_MEM_READ( "writev(vector[...])",
4129                            (Addr)vec[i].iov_base, vec[i].iov_len );
4130       }
4131    }
4132 }
4133 
PRE(sys_utimes)4134 PRE(sys_utimes)
4135 {
4136    FUSE_COMPATIBLE_MAY_BLOCK();
4137    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4138    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4139    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4140    if (ARG2 != 0) {
4141       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4142       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4143    }
4144 }
4145 
PRE(sys_acct)4146 PRE(sys_acct)
4147 {
4148    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4149    PRE_REG_READ1(long, "acct", const char *, filename);
4150    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4151 }
4152 
PRE(sys_pause)4153 PRE(sys_pause)
4154 {
4155    *flags |= SfMayBlock;
4156    PRINT("sys_pause ( )");
4157    PRE_REG_READ0(long, "pause");
4158 }
4159 
PRE(sys_sigaltstack)4160 PRE(sys_sigaltstack)
4161 {
4162    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4163    PRE_REG_READ2(int, "sigaltstack",
4164                  const vki_stack_t *, ss, vki_stack_t *, oss);
4165    if (ARG1 != 0) {
4166       const vki_stack_t *ss = (vki_stack_t *)ARG1;
4167       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4168       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4169       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4170    }
4171    if (ARG2 != 0) {
4172       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4173    }
4174 
4175    SET_STATUS_from_SysRes(
4176       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4177                               (vki_stack_t*)ARG2)
4178    );
4179 }
POST(sys_sigaltstack)4180 POST(sys_sigaltstack)
4181 {
4182    vg_assert(SUCCESS);
4183    if (RES == 0 && ARG2 != 0)
4184       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4185 }
4186 
4187 #undef PRE
4188 #undef POST
4189 
4190 #endif // defined(VGO_linux) || defined(VGO_darwin)
4191 
4192 /*--------------------------------------------------------------------*/
4193 /*--- end                                                          ---*/
4194 /*--------------------------------------------------------------------*/
4195 
4196