• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Wrappers for generic Unix system calls                       ---*/
4 /*---                                            syswrap-generic.c ---*/
5 /*--------------------------------------------------------------------*/
6 
7 /*
8    This file is part of Valgrind, a dynamic binary instrumentation
9    framework.
10 
11    Copyright (C) 2000-2011 Julian Seward
12       jseward@acm.org
13 
14    This program is free software; you can redistribute it and/or
15    modify it under the terms of the GNU General Public License as
16    published by the Free Software Foundation; either version 2 of the
17    License, or (at your option) any later version.
18 
19    This program is distributed in the hope that it will be useful, but
20    WITHOUT ANY WARRANTY; without even the implied warranty of
21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22    General Public License for more details.
23 
24    You should have received a copy of the GNU General Public License
25    along with this program; if not, write to the Free Software
26    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27    02111-1307, USA.
28 
29    The GNU General Public License is contained in the file COPYING.
30 */
31 
32 #if defined(VGO_linux) || defined(VGO_darwin)
33 
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_vkiscnums.h"
37 #include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_tool_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 
64 #include "priv_types_n_macros.h"
65 #include "priv_syswrap-generic.h"
66 
67 #include "config.h"
68 
69 
70 /* Returns True iff address range is something the client can
71    plausibly mess with: all of it is either already belongs to the
72    client or is free or a reservation. */
73 
ML_(valid_client_addr)74 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
75                                    const Char *syscallname)
76 {
77    Bool ret;
78 
79    if (size == 0)
80       return True;
81 
82    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
83             (start,size,VKI_PROT_NONE);
84 
85    if (0)
86       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
87 		  syscallname, start, start+size-1, (Int)ret);
88 
89    if (!ret && syscallname != NULL) {
90       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
91                                "to modify addresses %#lx-%#lx\n",
92                                syscallname, start, start+size-1);
93       if (VG_(clo_verbosity) > 1) {
94          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
95       }
96    }
97 
98    return ret;
99 }
100 
101 
ML_(client_signal_OK)102 Bool ML_(client_signal_OK)(Int sigNo)
103 {
104    /* signal 0 is OK for kill */
105    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
106 
107    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
108 
109    return ret;
110 }
111 
112 
113 /* Handy small function to help stop wrappers from segfaulting when
114    presented with bogus client addresses.  Is not used for generating
115    user-visible errors. */
116 
ML_(safe_to_deref)117 Bool ML_(safe_to_deref) ( void* start, SizeT size )
118 {
119    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
120 }
121 
122 
123 /* ---------------------------------------------------------------------
124    Doing mmap, mremap
125    ------------------------------------------------------------------ */
126 
127 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
128    munmap, mprotect (and mremap??) work at the page level.  So addresses
129    and lengths must be adjusted for this. */
130 
131 /* Mash around start and length so that the area exactly covers
132    an integral number of pages.  If we don't do that, memcheck's
133    idea of addressible memory diverges from that of the
134    kernel's, which causes the leak detector to crash. */
135 static
page_align_addr_and_len(Addr * a,SizeT * len)136 void page_align_addr_and_len( Addr* a, SizeT* len)
137 {
138    Addr ra;
139 
140    ra = VG_PGROUNDDN(*a);
141    *len = VG_PGROUNDUP(*a + *len) - ra;
142    *a = ra;
143 }
144 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)145 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
146                                 UInt flags, Int fd, Off64T offset)
147 {
148    Bool d;
149 
150    /* 'a' is the return value from a real kernel mmap, hence: */
151    vg_assert(VG_IS_PAGE_ALIGNED(a));
152    /* whereas len is whatever the syscall supplied.  So: */
153    len = VG_PGROUNDUP(len);
154 
155    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
156 
157    if (d)
158       VG_(discard_translations)( (Addr64)a, (ULong)len,
159                                  "notify_core_of_mmap" );
160 }
161 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)162 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
163 {
164    SizeT fourgig = (1ULL << 32);
165    SizeT guardpage = 10 * fourgig;
166    Bool rr, ww, xx;
167 
168    /* 'a' is the return value from a real kernel mmap, hence: */
169    vg_assert(VG_IS_PAGE_ALIGNED(a));
170    /* whereas len is whatever the syscall supplied.  So: */
171    len = VG_PGROUNDUP(len);
172 
173    rr = toBool(prot & VKI_PROT_READ);
174    ww = toBool(prot & VKI_PROT_WRITE);
175    xx = toBool(prot & VKI_PROT_EXEC);
176 
177 #ifdef VGA_amd64
178    if (len >= fourgig + 2 * guardpage) {
179      VG_(printf)("Valgrind: ignoring NaCl's mmap(84G)\n");
180      return;
181    }
182 #endif  // VGA_amd64
183    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
184 }
185 
186 
187 /* When a client mmap has been successfully done, this function must
188    be called.  It notifies both aspacem and the tool of the new
189    mapping.
190 
191    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
192    it is called from is POST(sys_io_setup).  In particular,
193    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
194    client mmap.  But it doesn't call this function; instead it does the
195    relevant notifications itself.  Here, we just pass di_handle=0 to
196    notify_tool_of_mmap as we have no better information.  But really this
197    function should be done away with; problem is I don't understand what
198    POST(sys_io_setup) does or how it works.
199 
200    [However, this function is used lots for Darwin, because
201     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
202  */
203 void
ML_(notify_core_and_tool_of_mmap)204 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
205                                     UInt flags, Int fd, Off64T offset )
206 {
207    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
208    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
209    // Should it?  --njn
210    notify_core_of_mmap(a, len, prot, flags, fd, offset);
211    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
212 }
213 
214 void
ML_(notify_core_and_tool_of_munmap)215 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
216 {
217    Bool d;
218 
219    page_align_addr_and_len(&a, &len);
220    d = VG_(am_notify_munmap)(a, len);
221    VG_TRACK( die_mem_munmap, a, len );
222    VG_(di_notify_munmap)( a, len );
223    if (d)
224       VG_(discard_translations)( (Addr64)a, (ULong)len,
225                                  "ML_(notify_core_and_tool_of_munmap)" );
226 }
227 
228 void
ML_(notify_core_and_tool_of_mprotect)229 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
230 {
231    Bool rr = toBool(prot & VKI_PROT_READ);
232    Bool ww = toBool(prot & VKI_PROT_WRITE);
233    Bool xx = toBool(prot & VKI_PROT_EXEC);
234    Bool d;
235 
236    page_align_addr_and_len(&a, &len);
237    d = VG_(am_notify_mprotect)(a, len, prot);
238    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
239    VG_(di_notify_mprotect)( a, len, prot );
240    if (d)
241       VG_(discard_translations)( (Addr64)a, (ULong)len,
242                                  "ML_(notify_core_and_tool_of_mprotect)" );
243 }
244 
245 
246 
247 #if HAVE_MREMAP
248 /* Expand (or shrink) an existing mapping, potentially moving it at
249    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
250 */
251 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)252 SysRes do_mremap( Addr old_addr, SizeT old_len,
253                   Addr new_addr, SizeT new_len,
254                   UWord flags, ThreadId tid )
255 {
256 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
257 
258    Bool      ok, d;
259    NSegment const* old_seg;
260    Addr      advised;
261    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
262    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
263 
264    if (0)
265       VG_(printf)("do_remap (old %#lx %ld) (new %#lx %ld) %s %s\n",
266                   old_addr,old_len,new_addr,new_len,
267                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
268                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
269    if (0)
270       VG_(am_show_nsegments)(0, "do_remap: before");
271 
272    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
273       goto eINVAL;
274 
275    if (!VG_IS_PAGE_ALIGNED(old_addr))
276       goto eINVAL;
277 
278    old_len = VG_PGROUNDUP(old_len);
279    new_len = VG_PGROUNDUP(new_len);
280 
281    if (new_len == 0)
282       goto eINVAL;
283 
284    /* kernel doesn't reject this, but we do. */
285    if (old_len == 0)
286       goto eINVAL;
287 
288    /* reject wraparounds */
289    if (old_addr + old_len < old_addr)
290       goto eINVAL;
291    if (f_fixed == True && new_addr + new_len < new_len)
292       goto eINVAL;
293 
294    /* kernel rejects all fixed, no-move requests (which are
295       meaningless). */
296    if (f_fixed == True && f_maymove == False)
297       goto eINVAL;
298 
299    /* Stay away from non-client areas. */
300    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
301       goto eINVAL;
302 
303    /* In all remaining cases, if the old range does not fall within a
304       single segment, fail. */
305    old_seg = VG_(am_find_nsegment)( old_addr );
306    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
307       goto eINVAL;
308    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC)
309       goto eINVAL;
310 
311    vg_assert(old_len > 0);
312    vg_assert(new_len > 0);
313    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
314    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
315    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
316 
317    /* There are 3 remaining cases:
318 
319       * maymove == False
320 
321         new space has to be at old address, so:
322             - shrink    -> unmap end
323             - same size -> do nothing
324             - grow      -> if can grow in-place, do so, else fail
325 
326       * maymove == True, fixed == False
327 
328         new space can be anywhere, so:
329             - shrink    -> unmap end
330             - same size -> do nothing
331             - grow      -> if can grow in-place, do so, else
332                            move to anywhere large enough, else fail
333 
334       * maymove == True, fixed == True
335 
336         new space must be at new address, so:
337 
338             - if new address is not page aligned, fail
339             - if new address range overlaps old one, fail
340             - if new address range cannot be allocated, fail
341             - else move to new address range with new size
342             - else fail
343    */
344 
345    if (f_maymove == False) {
346       /* new space has to be at old address */
347       if (new_len < old_len)
348          goto shrink_in_place;
349       if (new_len > old_len)
350          goto grow_in_place_or_fail;
351       goto same_in_place;
352    }
353 
354    if (f_maymove == True && f_fixed == False) {
355       /* new space can be anywhere */
356       if (new_len < old_len)
357          goto shrink_in_place;
358       if (new_len > old_len)
359          goto grow_in_place_or_move_anywhere_or_fail;
360       goto same_in_place;
361    }
362 
363    if (f_maymove == True && f_fixed == True) {
364       /* new space can only be at the new address */
365       if (!VG_IS_PAGE_ALIGNED(new_addr))
366          goto eINVAL;
367       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
368          /* no overlap */
369       } else {
370          goto eINVAL;
371       }
372       if (new_addr == 0)
373          goto eINVAL;
374          /* VG_(am_get_advisory_client_simple) interprets zero to mean
375             non-fixed, which is not what we want */
376       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
377       if (!ok || advised != new_addr)
378          goto eNOMEM;
379       ok = VG_(am_relocate_nooverlap_client)
380               ( &d, old_addr, old_len, new_addr, new_len );
381       if (ok) {
382          VG_TRACK( copy_mem_remap, old_addr, new_addr,
383                                    MIN_SIZET(old_len,new_len) );
384          if (new_len > old_len)
385             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
386                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
387                       0/*di_handle*/ );
388          VG_TRACK(die_mem_munmap, old_addr, old_len);
389          if (d) {
390             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
391             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
392          }
393          return VG_(mk_SysRes_Success)( new_addr );
394       }
395       goto eNOMEM;
396    }
397 
398    /* end of the 3 cases */
399    /*NOTREACHED*/ vg_assert(0);
400 
401   grow_in_place_or_move_anywhere_or_fail:
402    {
403    /* try growing it in-place */
404    Addr   needA = old_addr + old_len;
405    SSizeT needL = new_len - old_len;
406 
407    vg_assert(needL > 0);
408    if (needA == 0)
409       goto eINVAL;
410       /* VG_(am_get_advisory_client_simple) interprets zero to mean
411          non-fixed, which is not what we want */
412    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
413    if (ok) {
414       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
415          this-or-nothing) is too lenient, and may allow us to trash
416          the next segment along.  So make very sure that the proposed
417          new area really is free.  This is perhaps overly
418          conservative, but it fixes #129866. */
419       NSegment const* segLo = VG_(am_find_nsegment)( needA );
420       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
421       if (segLo == NULL || segHi == NULL
422           || segLo != segHi || segLo->kind != SkFree)
423          ok = False;
424    }
425    if (ok && advised == needA) {
426       ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
427       if (ok) {
428          VG_TRACK( new_mem_mmap, needA, needL,
429                                  old_seg->hasR,
430                                  old_seg->hasW, old_seg->hasX,
431                                  0/*di_handle*/ );
432          if (d)
433             VG_(discard_translations)( needA, needL, "do_remap(3)" );
434          return VG_(mk_SysRes_Success)( old_addr );
435       }
436    }
437 
438    /* that failed.  Look elsewhere. */
439    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
440    if (ok) {
441       Bool oldR = old_seg->hasR;
442       Bool oldW = old_seg->hasW;
443       Bool oldX = old_seg->hasX;
444       /* assert new area does not overlap old */
445       vg_assert(advised+new_len-1 < old_addr
446                 || advised > old_addr+old_len-1);
447       ok = VG_(am_relocate_nooverlap_client)
448               ( &d, old_addr, old_len, advised, new_len );
449       if (ok) {
450          VG_TRACK( copy_mem_remap, old_addr, advised,
451                                    MIN_SIZET(old_len,new_len) );
452          if (new_len > old_len)
453             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
454                       oldR, oldW, oldX, 0/*di_handle*/ );
455          VG_TRACK(die_mem_munmap, old_addr, old_len);
456          if (d) {
457             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
458             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
459          }
460          return VG_(mk_SysRes_Success)( advised );
461       }
462    }
463    goto eNOMEM;
464    }
465    /*NOTREACHED*/ vg_assert(0);
466 
467   grow_in_place_or_fail:
468    {
469    Addr  needA = old_addr + old_len;
470    SizeT needL = new_len - old_len;
471    if (needA == 0)
472       goto eINVAL;
473       /* VG_(am_get_advisory_client_simple) interprets zero to mean
474          non-fixed, which is not what we want */
475    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
476    if (ok) {
477       /* VG_(am_get_advisory_client_simple) (first arg == 0, meaning
478          this-or-nothing) is too lenient, and may allow us to trash
479          the next segment along.  So make very sure that the proposed
480          new area really is free. */
481       NSegment const* segLo = VG_(am_find_nsegment)( needA );
482       NSegment const* segHi = VG_(am_find_nsegment)( needA + needL - 1 );
483       if (segLo == NULL || segHi == NULL
484           || segLo != segHi || segLo->kind != SkFree)
485          ok = False;
486    }
487    if (!ok || advised != needA)
488       goto eNOMEM;
489    ok = VG_(am_extend_map_client)( &d, (NSegment*)old_seg, needL );
490    if (!ok)
491       goto eNOMEM;
492    VG_TRACK( new_mem_mmap, needA, needL,
493                            old_seg->hasR, old_seg->hasW, old_seg->hasX,
494                            0/*di_handle*/ );
495    if (d)
496       VG_(discard_translations)( needA, needL, "do_remap(6)" );
497    return VG_(mk_SysRes_Success)( old_addr );
498    }
499    /*NOTREACHED*/ vg_assert(0);
500 
501   shrink_in_place:
502    {
503    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
504    if (sr_isError(sres))
505       return sres;
506    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
507    if (d)
508       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
509                                  "do_remap(7)" );
510    return VG_(mk_SysRes_Success)( old_addr );
511    }
512    /*NOTREACHED*/ vg_assert(0);
513 
514   same_in_place:
515    return VG_(mk_SysRes_Success)( old_addr );
516    /*NOTREACHED*/ vg_assert(0);
517 
518   eINVAL:
519    return VG_(mk_SysRes_Error)( VKI_EINVAL );
520   eNOMEM:
521    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
522 
523 #  undef MIN_SIZET
524 }
525 #endif /* HAVE_MREMAP */
526 
527 
528 /* ---------------------------------------------------------------------
529    File-descriptor tracking
530    ------------------------------------------------------------------ */
531 
532 /* One of these is allocated for each open file descriptor.  */
533 typedef struct OpenFd
534 {
535    Int fd;                        /* The file descriptor */
536    Char *pathname;                /* NULL if not a regular file or unknown */
537    ExeContext *where;             /* NULL if inherited from parent */
538    struct OpenFd *next, *prev;
539 } OpenFd;
540 
541 /* List of allocated file descriptors. */
542 static OpenFd *allocated_fds = NULL;
543 
544 /* Count of open file descriptors. */
545 static Int fd_count = 0;
546 
547 
548 /* Note the fact that a file descriptor was just closed. */
549 static
record_fd_close(Int fd)550 void record_fd_close(Int fd)
551 {
552    OpenFd *i = allocated_fds;
553 
554    if (fd >= VG_(fd_hard_limit))
555       return;			/* Valgrind internal */
556 
557    while(i) {
558       if(i->fd == fd) {
559          if(i->prev)
560             i->prev->next = i->next;
561          else
562             allocated_fds = i->next;
563          if(i->next)
564             i->next->prev = i->prev;
565          if(i->pathname)
566             VG_(arena_free) (VG_AR_CORE, i->pathname);
567          VG_(arena_free) (VG_AR_CORE, i);
568          fd_count--;
569          break;
570       }
571       i = i->next;
572    }
573 }
574 
575 /* Note the fact that a file descriptor was just opened.  If the
576    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
577    this either indicates a non-standard file (i.e. a pipe or socket or
578    some such thing) or that we don't know the filename.  If the fd is
579    already open, then we're probably doing a dup2() to an existing fd,
580    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)581 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd, char *pathname)
582 {
583    OpenFd *i;
584 
585    if (fd >= VG_(fd_hard_limit))
586       return;			/* Valgrind internal */
587 
588    /* Check to see if this fd is already open. */
589    i = allocated_fds;
590    while (i) {
591       if (i->fd == fd) {
592          if (i->pathname) VG_(arena_free)(VG_AR_CORE, i->pathname);
593          break;
594       }
595       i = i->next;
596    }
597 
598    /* Not already one: allocate an OpenFd */
599    if (i == NULL) {
600       i = VG_(arena_malloc)(VG_AR_CORE, "syswrap.rfdowgn.1", sizeof(OpenFd));
601 
602       i->prev = NULL;
603       i->next = allocated_fds;
604       if(allocated_fds) allocated_fds->prev = i;
605       allocated_fds = i;
606       fd_count++;
607    }
608 
609    i->fd = fd;
610    i->pathname = VG_(arena_strdup)(VG_AR_CORE, "syswrap.rfdowgn.2", pathname);
611    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
612 }
613 
614 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)615 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
616 {
617    static HChar buf[VKI_PATH_MAX];
618    Char* name;
619    if (VG_(resolve_filename)(fd, buf, VKI_PATH_MAX))
620       name = buf;
621    else
622       name = NULL;
623 
624    ML_(record_fd_open_with_given_name)(tid, fd, name);
625 }
626 
627 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)628 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
629 {
630    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
631 }
632 
633 static
unix2name(struct vki_sockaddr_un * sa,UInt len,Char * name)634 Char *unix2name(struct vki_sockaddr_un *sa, UInt len, Char *name)
635 {
636    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
637       VG_(sprintf)(name, "<unknown>");
638    } else {
639       VG_(sprintf)(name, "%s", sa->sun_path);
640    }
641 
642    return name;
643 }
644 
645 static
inet2name(struct vki_sockaddr_in * sa,UInt len,Char * name)646 Char *inet2name(struct vki_sockaddr_in *sa, UInt len, Char *name)
647 {
648    if (sa == NULL || len == 0) {
649       VG_(sprintf)(name, "<unknown>");
650    } else {
651       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
652       if (addr == 0) {
653          VG_(sprintf)(name, "<unbound>");
654       } else {
655          VG_(sprintf)(name, "%u.%u.%u.%u:%u",
656                       (addr>>24) & 0xFF, (addr>>16) & 0xFF,
657                       (addr>>8) & 0xFF, addr & 0xFF,
658                       VG_(ntohs)(sa->sin_port));
659       }
660    }
661 
662    return name;
663 }
664 
665 /*
666  * Try get some details about a socket.
667  */
668 static void
getsockdetails(Int fd)669 getsockdetails(Int fd)
670 {
671    union u {
672       struct vki_sockaddr a;
673       struct vki_sockaddr_in in;
674       struct vki_sockaddr_un un;
675    } laddr;
676    UInt llen;
677 
678    llen = sizeof(laddr);
679    VG_(memset)(&laddr, 0, llen);
680 
681    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
682       switch(laddr.a.sa_family) {
683       case VKI_AF_INET: {
684          static char lname[32];
685          static char pname[32];
686          struct vki_sockaddr_in paddr;
687          UInt plen = sizeof(struct vki_sockaddr_in);
688 
689          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
690             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
691                          inet2name(&(laddr.in), llen, lname),
692                          inet2name(&paddr, plen, pname));
693          } else {
694             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
695                          fd, inet2name(&(laddr.in), llen, lname));
696          }
697          return;
698          }
699       case VKI_AF_UNIX: {
700          static char lname[256];
701          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
702                       unix2name(&(laddr.un), llen, lname));
703          return;
704          }
705       default:
706          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
707                       laddr.a.sa_family, fd);
708          return;
709       }
710    }
711 
712    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
713 }
714 
715 
716 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)717 void VG_(show_open_fds) (void)
718 {
719    OpenFd *i = allocated_fds;
720 
721    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open at exit.\n", fd_count);
722 
723    while (i) {
724       if (i->pathname) {
725          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
726                       i->pathname);
727       } else {
728          Int val;
729          UInt len = sizeof(val);
730 
731          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
732              == -1) {
733             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
734          } else {
735             getsockdetails(i->fd);
736          }
737       }
738 
739       if(i->where) {
740          VG_(pp_ExeContext)(i->where);
741          VG_(message)(Vg_UserMsg, "\n");
742       } else {
743          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
744          VG_(message)(Vg_UserMsg, "\n");
745       }
746 
747       i = i->next;
748    }
749 
750    VG_(message)(Vg_UserMsg, "\n");
751 }
752 
753 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
754    have /proc support compiled in, or a non-Linux kernel), then we need to
755    find out what file descriptors we inherited from our parent process the
756    hard way - by checking each fd in turn. */
757 static
init_preopened_fds_without_proc_self_fd(void)758 void init_preopened_fds_without_proc_self_fd(void)
759 {
760    struct vki_rlimit lim;
761    UInt count;
762    Int i;
763 
764    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
765       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
766          an arbitrarily high number.  1024 happens to be the limit in
767          the 2.4 Linux kernels. */
768       count = 1024;
769    } else {
770       count = lim.rlim_cur;
771    }
772 
773    for (i = 0; i < count; i++)
774       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
775          ML_(record_fd_open_named)(-1, i);
776 }
777 
778 /* Initialize the list of open file descriptors with the file descriptors
779    we inherited from out parent process. */
780 
VG_(init_preopened_fds)781 void VG_(init_preopened_fds)(void)
782 {
783 // DDD: should probably use HAVE_PROC here or similar, instead.
784 #if defined(VGO_linux)
785    Int ret;
786    struct vki_dirent d;
787    SysRes f;
788 
789    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
790    if (sr_isError(f)) {
791       init_preopened_fds_without_proc_self_fd();
792       return;
793    }
794 
795    while ((ret = VG_(getdents)(sr_Res(f), &d, sizeof(d))) != 0) {
796       if (ret == -1)
797          goto out;
798 
799       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
800          Char* s;
801          Int fno = VG_(strtoll10)(d.d_name, &s);
802          if (*s == '\0') {
803             if (fno != sr_Res(f))
804                if (VG_(clo_track_fds))
805                   ML_(record_fd_open_named)(-1, fno);
806          } else {
807             VG_(message)(Vg_DebugMsg,
808                "Warning: invalid file name in /proc/self/fd: %s\n",
809                d.d_name);
810          }
811       }
812 
813       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
814    }
815 
816   out:
817    VG_(close)(sr_Res(f));
818 
819 #elif defined(VGO_darwin)
820    init_preopened_fds_without_proc_self_fd();
821 
822 #else
823 #  error Unknown OS
824 #endif
825 }
826 
827 static
strdupcat(HChar * cc,const Char * s1,const Char * s2,ArenaId aid)828 Char *strdupcat ( HChar* cc, const Char *s1, const Char *s2, ArenaId aid )
829 {
830    UInt len = VG_(strlen) ( s1 ) + VG_(strlen) ( s2 ) + 1;
831    Char *result = VG_(arena_malloc) ( aid, cc, len );
832    VG_(strcpy) ( result, s1 );
833    VG_(strcat) ( result, s2 );
834    return result;
835 }
836 
837 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,Char * msg,Addr base,SizeT size)838 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
839                             Char *msg, Addr base, SizeT size )
840 {
841    Char *outmsg = strdupcat ( "di.syswrap.pmrs.1",
842                               "socketcall.sendmsg", msg, VG_AR_CORE );
843    PRE_MEM_READ( outmsg, base, size );
844    VG_(arena_free) ( VG_AR_CORE, outmsg );
845 }
846 
847 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,Char * msg,Addr base,SizeT size)848 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
849                              Char *msg, Addr base, SizeT size )
850 {
851    Char *outmsg = strdupcat ( "di.syswrap.pmwr.1",
852                               "socketcall.recvmsg", msg, VG_AR_CORE );
853    if ( read )
854       PRE_MEM_READ( outmsg, base, size );
855    else
856       PRE_MEM_WRITE( outmsg, base, size );
857    VG_(arena_free) ( VG_AR_CORE, outmsg );
858 }
859 
860 static
post_mem_write_recvmsg(ThreadId tid,Bool read,Char * fieldName,Addr base,SizeT size)861 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
862                               Char *fieldName, Addr base, SizeT size )
863 {
864    if ( !read )
865       POST_MEM_WRITE( base, size );
866 }
867 
868 static
msghdr_foreachfield(ThreadId tid,struct vki_msghdr * msg,void (* foreach_func)(ThreadId,Bool,Char *,Addr,SizeT))869 void msghdr_foreachfield (
870         ThreadId tid,
871         struct vki_msghdr *msg,
872         void (*foreach_func)( ThreadId, Bool, Char *, Addr, SizeT )
873      )
874 {
875    if ( !msg )
876       return;
877 
878    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_name, sizeof( msg->msg_name ) );
879    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_namelen, sizeof( msg->msg_namelen ) );
880    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iov, sizeof( msg->msg_iov ) );
881    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_iovlen, sizeof( msg->msg_iovlen ) );
882    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_control, sizeof( msg->msg_control ) );
883    foreach_func ( tid, True, "(msg)", (Addr)&msg->msg_controllen, sizeof( msg->msg_controllen ) );
884    foreach_func ( tid, False, "(msg)", (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
885 
886    if ( msg->msg_name )
887       foreach_func ( tid, False,
888                      "(msg.msg_name)",
889                      (Addr)msg->msg_name, msg->msg_namelen );
890 
891    if ( msg->msg_iov ) {
892       struct vki_iovec *iov = msg->msg_iov;
893       UInt i;
894 
895       foreach_func ( tid, True,
896                      "(msg.msg_iov)",
897                      (Addr)iov, msg->msg_iovlen * sizeof( struct vki_iovec ) );
898 
899       for ( i = 0; i < msg->msg_iovlen; ++i, ++iov )
900          foreach_func ( tid, False,
901                         "(msg.msg_iov[i])",
902                         (Addr)iov->iov_base, iov->iov_len );
903    }
904 
905    if ( msg->msg_control )
906       foreach_func ( tid, False,
907                      "(msg.msg_control)",
908                      (Addr)msg->msg_control, msg->msg_controllen );
909 }
910 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)911 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
912 {
913    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
914 
915    while (cm) {
916       if (cm->cmsg_level == VKI_SOL_SOCKET &&
917           cm->cmsg_type == VKI_SCM_RIGHTS ) {
918          Int *fds = (Int *) VKI_CMSG_DATA(cm);
919          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
920                          / sizeof(int);
921          Int i;
922 
923          for (i = 0; i < fdc; i++)
924             if(VG_(clo_track_fds))
925                // XXX: must we check the range on these fds with
926                //      ML_(fd_allowed)()?
927                ML_(record_fd_open_named)(tid, fds[i]);
928       }
929 
930       cm = VKI_CMSG_NXTHDR(msg, cm);
931    }
932 }
933 
934 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
935 static
pre_mem_read_sockaddr(ThreadId tid,Char * description,struct vki_sockaddr * sa,UInt salen)936 void pre_mem_read_sockaddr ( ThreadId tid,
937                              Char *description,
938                              struct vki_sockaddr *sa, UInt salen )
939 {
940    Char *outmsg;
941    struct vki_sockaddr_un*  sun  = (struct vki_sockaddr_un *)sa;
942    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
943    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
944 
945    /* NULL/zero-length sockaddrs are legal */
946    if ( sa == NULL || salen == 0 ) return;
947 
948    outmsg = VG_(arena_malloc) ( VG_AR_CORE, "di.syswrap.pmr_sockaddr.1",
949                                 VG_(strlen)( description ) + 30 );
950 
951    VG_(sprintf) ( outmsg, description, "sa_family" );
952    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
953 
954    switch (sa->sa_family) {
955 
956       case VKI_AF_UNIX:
957          VG_(sprintf) ( outmsg, description, "sun_path" );
958          PRE_MEM_RASCIIZ( outmsg, (Addr) sun->sun_path );
959          // GrP fixme max of sun_len-2? what about nul char?
960          break;
961 
962       case VKI_AF_INET:
963          VG_(sprintf) ( outmsg, description, "sin_port" );
964          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
965          VG_(sprintf) ( outmsg, description, "sin_addr" );
966          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
967          break;
968 
969       case VKI_AF_INET6:
970          VG_(sprintf) ( outmsg, description, "sin6_port" );
971          PRE_MEM_READ( outmsg,
972             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
973          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
974          PRE_MEM_READ( outmsg,
975             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
976          VG_(sprintf) ( outmsg, description, "sin6_addr" );
977          PRE_MEM_READ( outmsg,
978             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
979          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
980          PRE_MEM_READ( outmsg,
981             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
982          break;
983 
984       default:
985          VG_(sprintf) ( outmsg, description, "" );
986          PRE_MEM_READ( outmsg, (Addr) sa, salen );
987          break;
988    }
989 
990    VG_(arena_free) ( VG_AR_CORE, outmsg );
991 }
992 
993 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,Char * s)994 static UInt deref_UInt ( ThreadId tid, Addr a, Char* s )
995 {
996    UInt* a_p = (UInt*)a;
997    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
998    if (a_p == NULL)
999       return 0;
1000    else
1001       return *a_p;
1002 }
1003 
ML_(buf_and_len_pre_check)1004 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1005                                   Char* buf_s, Char* buflen_s )
1006 {
1007    if (VG_(tdict).track_pre_mem_write) {
1008       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1009       if (buflen_in > 0) {
1010          VG_(tdict).track_pre_mem_write(
1011             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1012       }
1013    }
1014 }
1015 
ML_(buf_and_len_post_check)1016 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1017                                    Addr buf_p, Addr buflen_p, Char* s )
1018 {
1019    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1020       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1021       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1022          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1023       }
1024    }
1025 }
1026 
1027 /* ---------------------------------------------------------------------
1028    Data seg end, for brk()
1029    ------------------------------------------------------------------ */
1030 
1031 /*   +--------+------------+
1032      | anon   |    resvn   |
1033      +--------+------------+
1034 
1035      ^     ^  ^
1036      |     |  boundary is page aligned
1037      |     VG_(brk_limit) -- no alignment constraint
1038      VG_(brk_base) -- page aligned -- does not move
1039 
1040      Both the anon part and the reservation part are always at least
1041      one page.
1042 */
1043 
1044 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1045    NEWBRK, else return the current data segment end. */
1046 
do_brk(Addr newbrk)1047 static Addr do_brk ( Addr newbrk )
1048 {
1049    NSegment const* aseg;
1050    NSegment const* rseg;
1051    Addr newbrkP;
1052    SizeT delta;
1053    Bool ok;
1054    Bool debug = False;
1055 
1056    if (debug)
1057       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1058 		  VG_(brk_base), VG_(brk_limit), newbrk);
1059 
1060 #  if 0
1061    if (0) show_segments("in_brk");
1062 #  endif
1063 
1064    if (newbrk < VG_(brk_base))
1065       /* Clearly impossible. */
1066       goto bad;
1067 
1068    if (newbrk >= VG_(brk_base) && newbrk < VG_(brk_limit)) {
1069       /* shrinking the data segment.  Be lazy and don't munmap the
1070          excess area. */
1071       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1072       if (seg && seg->hasT)
1073          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1074                                     "do_brk(shrink)" );
1075       /* Since we're being lazy and not unmapping pages, we have to
1076          zero out the area, so that if the area later comes back into
1077          circulation, it will be filled with zeroes, as if it really
1078          had been unmapped and later remapped.  Be a bit paranoid and
1079          try hard to ensure we're not going to segfault by doing the
1080          write - check both ends of the range are in the same segment
1081          and that segment is writable. */
1082       if (seg) {
1083          /* pre: newbrk < VG_(brk_limit)
1084               => newbrk <= VG_(brk_limit)-1 */
1085          NSegment const * seg2;
1086          vg_assert(newbrk < VG_(brk_limit));
1087          seg2 = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1088          if (seg2 && seg == seg2 && seg->hasW)
1089             VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1090       }
1091 
1092       VG_(brk_limit) = newbrk;
1093       return newbrk;
1094    }
1095 
1096    /* otherwise we're expanding the brk segment. */
1097    if (VG_(brk_limit) > VG_(brk_base))
1098       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1099    else
1100       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1101    rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
1102 
1103    /* These should be assured by setup_client_dataseg in m_main. */
1104    vg_assert(aseg);
1105    vg_assert(rseg);
1106    vg_assert(aseg->kind == SkAnonC);
1107    vg_assert(rseg->kind == SkResvn);
1108    vg_assert(aseg->end+1 == rseg->start);
1109 
1110    vg_assert(newbrk >= VG_(brk_base));
1111    if (newbrk <= rseg->start) {
1112       /* still fits within the anon segment. */
1113       VG_(brk_limit) = newbrk;
1114       return newbrk;
1115    }
1116 
1117    if (newbrk > rseg->end+1 - VKI_PAGE_SIZE) {
1118       /* request is too large -- the resvn would fall below 1 page,
1119          which isn't allowed. */
1120       goto bad;
1121    }
1122 
1123    newbrkP = VG_PGROUNDUP(newbrk);
1124    vg_assert(newbrkP > rseg->start && newbrkP <= rseg->end+1 - VKI_PAGE_SIZE);
1125    delta = newbrkP - rseg->start;
1126    vg_assert(delta > 0);
1127    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1128 
1129    ok = VG_(am_extend_into_adjacent_reservation_client)( (NSegment*)aseg, delta );
1130    if (!ok) goto bad;
1131 
1132    VG_(brk_limit) = newbrk;
1133    return newbrk;
1134 
1135   bad:
1136    return VG_(brk_limit);
1137 }
1138 
1139 
1140 /* ---------------------------------------------------------------------
1141    Vet file descriptors for sanity
1142    ------------------------------------------------------------------ */
1143 /*
1144 > - what does the "Bool soft" parameter mean?
1145 
1146 (Tom Hughes, 3 Oct 05):
1147 
1148 Whether or not to consider a file descriptor invalid if it is above
1149 the current soft limit.
1150 
1151 Basically if we are testing whether a newly created file descriptor is
1152 valid (in a post handler) then we set soft to true, and if we are
1153 testing whether a file descriptor that is about to be used (in a pre
1154 handler) is valid [viz, an already-existing fd] then we set it to false.
1155 
1156 The point is that if the (virtual) soft limit is lowered then any
1157 existing descriptors can still be read/written/closed etc (so long as
1158 they are below the valgrind reserved descriptors) but no new
1159 descriptors can be created above the new soft limit.
1160 
1161 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1162 */
1163 
1164 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1165 Bool ML_(fd_allowed)(Int fd, const Char *syscallname, ThreadId tid, Bool isNewFd)
1166 {
1167    Bool allowed = True;
1168 
1169    /* hard limits always apply */
1170    if (fd < 0 || fd >= VG_(fd_hard_limit))
1171       allowed = False;
1172 
1173    /* hijacking the output fds is never allowed */
1174    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1175       allowed = False;
1176 
1177    /* if creating a new fd (rather than using an existing one), the
1178       soft limit must also be observed */
1179    if (isNewFd && fd >= VG_(fd_soft_limit))
1180       allowed = False;
1181 
1182    /* this looks like it ought to be included, but causes problems: */
1183    /*
1184    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1185       allowed = False;
1186    */
1187    /* The difficulty is as follows: consider a program P which expects
1188       to be able to mess with (redirect) its own stderr (fd 2).
1189       Usually to deal with P we would issue command line flags to send
1190       logging somewhere other than stderr, so as not to disrupt P.
1191       The problem is that -d unilaterally hijacks stderr with no
1192       consultation with P.  And so, if this check is enabled, P will
1193       work OK normally but fail if -d is issued.
1194 
1195       Basically -d is a hack and you take your chances when using it.
1196       It's very useful for low level debugging -- particularly at
1197       startup -- and having its presence change the behaviour of the
1198       client is exactly what we don't want.  */
1199 
1200    /* croak? */
1201    if ((!allowed) && VG_(showing_core_errors)() ) {
1202       VG_(message)(Vg_UserMsg,
1203          "Warning: invalid file descriptor %d in syscall %s()\n",
1204          fd, syscallname);
1205       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1206 	 VG_(message)(Vg_UserMsg,
1207             "   Use --log-fd=<number> to select an alternative log fd.\n");
1208       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1209 	 VG_(message)(Vg_UserMsg,
1210             "   Use --xml-fd=<number> to select an alternative XML "
1211             "output fd.\n");
1212       // DDD: consider always printing this stack trace, it's useful.
1213       // Also consider also making this a proper core error, ie.
1214       // suppressible and all that.
1215       if (VG_(clo_verbosity) > 1) {
1216          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1217       }
1218    }
1219 
1220    return allowed;
1221 }
1222 
1223 
1224 /* ---------------------------------------------------------------------
1225    Deal with a bunch of socket-related syscalls
1226    ------------------------------------------------------------------ */
1227 
1228 /* ------ */
1229 
1230 void
ML_(generic_PRE_sys_socketpair)1231 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1232                                   UWord arg0, UWord arg1,
1233                                   UWord arg2, UWord arg3 )
1234 {
1235    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1236    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1237                   arg3, 2*sizeof(int) );
1238 }
1239 
1240 SysRes
ML_(generic_POST_sys_socketpair)1241 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1242                                    SysRes res,
1243                                    UWord arg0, UWord arg1,
1244                                    UWord arg2, UWord arg3 )
1245 {
1246    SysRes r = res;
1247    Int fd1 = ((Int*)arg3)[0];
1248    Int fd2 = ((Int*)arg3)[1];
1249    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1250    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1251    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1252        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1253       VG_(close)(fd1);
1254       VG_(close)(fd2);
1255       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1256    } else {
1257       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1258       if (VG_(clo_track_fds)) {
1259          ML_(record_fd_open_nameless)(tid, fd1);
1260          ML_(record_fd_open_nameless)(tid, fd2);
1261       }
1262    }
1263    return r;
1264 }
1265 
1266 /* ------ */
1267 
1268 SysRes
ML_(generic_POST_sys_socket)1269 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1270 {
1271    SysRes r = res;
1272    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1273    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1274       VG_(close)(sr_Res(res));
1275       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1276    } else {
1277       if (VG_(clo_track_fds))
1278          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1279    }
1280    return r;
1281 }
1282 
1283 /* ------ */
1284 
1285 void
ML_(generic_PRE_sys_bind)1286 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1287                             UWord arg0, UWord arg1, UWord arg2 )
1288 {
1289    /* int bind(int sockfd, struct sockaddr *my_addr,
1290                int addrlen); */
1291    pre_mem_read_sockaddr(
1292       tid, "socketcall.bind(my_addr.%s)",
1293       (struct vki_sockaddr *) arg1, arg2
1294    );
1295 }
1296 
1297 /* ------ */
1298 
1299 void
ML_(generic_PRE_sys_accept)1300 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1301                               UWord arg0, UWord arg1, UWord arg2 )
1302 {
1303    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1304    Addr addr_p     = arg1;
1305    Addr addrlen_p  = arg2;
1306    if (addr_p != (Addr)NULL)
1307       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1308                                    "socketcall.accept(addr)",
1309                                    "socketcall.accept(addrlen_in)" );
1310 }
1311 
1312 SysRes
ML_(generic_POST_sys_accept)1313 ML_(generic_POST_sys_accept) ( ThreadId tid,
1314                                SysRes res,
1315                                UWord arg0, UWord arg1, UWord arg2 )
1316 {
1317    SysRes r = res;
1318    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1319    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1320       VG_(close)(sr_Res(res));
1321       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1322    } else {
1323       Addr addr_p     = arg1;
1324       Addr addrlen_p  = arg2;
1325       if (addr_p != (Addr)NULL)
1326          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1327                                        "socketcall.accept(addrlen_out)" );
1328       if (VG_(clo_track_fds))
1329           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1330    }
1331    return r;
1332 }
1333 
1334 /* ------ */
1335 
1336 void
ML_(generic_PRE_sys_sendto)1337 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1338                               UWord arg0, UWord arg1, UWord arg2,
1339                               UWord arg3, UWord arg4, UWord arg5 )
1340 {
1341    /* int sendto(int s, const void *msg, int len,
1342                  unsigned int flags,
1343                  const struct sockaddr *to, int tolen); */
1344    PRE_MEM_READ( "socketcall.sendto(msg)",
1345                  arg1, /* msg */
1346                  arg2  /* len */ );
1347    pre_mem_read_sockaddr(
1348       tid, "socketcall.sendto(to.%s)",
1349       (struct vki_sockaddr *) arg4, arg5
1350    );
1351 }
1352 
1353 /* ------ */
1354 
1355 void
ML_(generic_PRE_sys_send)1356 ML_(generic_PRE_sys_send) ( ThreadId tid,
1357                             UWord arg0, UWord arg1, UWord arg2 )
1358 {
1359    /* int send(int s, const void *msg, size_t len, int flags); */
1360    PRE_MEM_READ( "socketcall.send(msg)",
1361                   arg1, /* msg */
1362                   arg2  /* len */ );
1363 
1364 }
1365 
1366 /* ------ */
1367 
1368 void
ML_(generic_PRE_sys_recvfrom)1369 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1370                                 UWord arg0, UWord arg1, UWord arg2,
1371                                 UWord arg3, UWord arg4, UWord arg5 )
1372 {
1373    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1374                    struct sockaddr *from, int *fromlen); */
1375    Addr buf_p      = arg1;
1376    Int  len        = arg2;
1377    Addr from_p     = arg4;
1378    Addr fromlen_p  = arg5;
1379    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1380    if (from_p != (Addr)NULL)
1381       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1382                                    "socketcall.recvfrom(from)",
1383                                    "socketcall.recvfrom(fromlen_in)" );
1384 }
1385 
1386 void
ML_(generic_POST_sys_recvfrom)1387 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1388                                  SysRes res,
1389                                  UWord arg0, UWord arg1, UWord arg2,
1390                                  UWord arg3, UWord arg4, UWord arg5 )
1391 {
1392    Addr buf_p      = arg1;
1393    Int  len        = arg2;
1394    Addr from_p     = arg4;
1395    Addr fromlen_p  = arg5;
1396 
1397    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1398    if (from_p != (Addr)NULL)
1399       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1400                                     "socketcall.recvfrom(fromlen_out)" );
1401    POST_MEM_WRITE( buf_p, len );
1402 }
1403 
1404 /* ------ */
1405 
1406 void
ML_(generic_PRE_sys_recv)1407 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1408                             UWord arg0, UWord arg1, UWord arg2 )
1409 {
1410    /* int recv(int s, void *buf, int len, unsigned int flags); */
1411    /* man 2 recv says:
1412       The  recv call is normally used only on a connected socket
1413       (see connect(2)) and is identical to recvfrom with a  NULL
1414       from parameter.
1415    */
1416    PRE_MEM_WRITE( "socketcall.recv(buf)",
1417                   arg1, /* buf */
1418                   arg2  /* len */ );
1419 }
1420 
1421 void
ML_(generic_POST_sys_recv)1422 ML_(generic_POST_sys_recv) ( ThreadId tid,
1423                              UWord res,
1424                              UWord arg0, UWord arg1, UWord arg2 )
1425 {
1426    if (res >= 0 && arg1 != 0) {
1427       POST_MEM_WRITE( arg1, /* buf */
1428                       arg2  /* len */ );
1429    }
1430 }
1431 
1432 /* ------ */
1433 
1434 void
ML_(generic_PRE_sys_connect)1435 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1436                                UWord arg0, UWord arg1, UWord arg2 )
1437 {
1438    /* int connect(int sockfd,
1439                   struct sockaddr *serv_addr, int addrlen ); */
1440    pre_mem_read_sockaddr( tid,
1441                           "socketcall.connect(serv_addr.%s)",
1442                           (struct vki_sockaddr *) arg1, arg2);
1443 }
1444 
1445 /* ------ */
1446 
1447 void
ML_(generic_PRE_sys_setsockopt)1448 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1449                                   UWord arg0, UWord arg1, UWord arg2,
1450                                   UWord arg3, UWord arg4 )
1451 {
1452    /* int setsockopt(int s, int level, int optname,
1453                      const void *optval, int optlen); */
1454    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1455                  arg3, /* optval */
1456                  arg4  /* optlen */ );
1457 }
1458 
1459 /* ------ */
1460 
1461 void
ML_(generic_PRE_sys_getsockname)1462 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1463                                    UWord arg0, UWord arg1, UWord arg2 )
1464 {
1465    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1466    Addr name_p     = arg1;
1467    Addr namelen_p  = arg2;
1468    /* Nb: name_p cannot be NULL */
1469    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1470                                 "socketcall.getsockname(name)",
1471                                 "socketcall.getsockname(namelen_in)" );
1472 }
1473 
1474 void
ML_(generic_POST_sys_getsockname)1475 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1476                                     SysRes res,
1477                                     UWord arg0, UWord arg1, UWord arg2 )
1478 {
1479    Addr name_p     = arg1;
1480    Addr namelen_p  = arg2;
1481    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1482    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1483                                  "socketcall.getsockname(namelen_out)" );
1484 }
1485 
1486 /* ------ */
1487 
1488 void
ML_(generic_PRE_sys_getpeername)1489 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1490                                    UWord arg0, UWord arg1, UWord arg2 )
1491 {
1492    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1493    Addr name_p     = arg1;
1494    Addr namelen_p  = arg2;
1495    /* Nb: name_p cannot be NULL */
1496    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1497                                 "socketcall.getpeername(name)",
1498                                 "socketcall.getpeername(namelen_in)" );
1499 }
1500 
1501 void
ML_(generic_POST_sys_getpeername)1502 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1503                                     SysRes res,
1504                                     UWord arg0, UWord arg1, UWord arg2 )
1505 {
1506    Addr name_p     = arg1;
1507    Addr namelen_p  = arg2;
1508    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1509    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1510                                  "socketcall.getpeername(namelen_out)" );
1511 }
1512 
1513 /* ------ */
1514 
1515 void
ML_(generic_PRE_sys_sendmsg)1516 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid,
1517                                UWord arg0, UWord arg1 )
1518 {
1519    /* int sendmsg(int s, const struct msghdr *msg, int flags); */
1520    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1521    msghdr_foreachfield ( tid, msg, pre_mem_read_sendmsg );
1522 }
1523 
1524 /* ------ */
1525 
1526 void
ML_(generic_PRE_sys_recvmsg)1527 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid,
1528                                UWord arg0, UWord arg1 )
1529 {
1530    /* int recvmsg(int s, struct msghdr *msg, int flags); */
1531    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1532    msghdr_foreachfield ( tid, msg, pre_mem_write_recvmsg );
1533 }
1534 
1535 void
ML_(generic_POST_sys_recvmsg)1536 ML_(generic_POST_sys_recvmsg) ( ThreadId tid,
1537                                 UWord arg0, UWord arg1 )
1538 {
1539    struct vki_msghdr *msg = (struct vki_msghdr *)arg1;
1540    msghdr_foreachfield( tid, msg, post_mem_write_recvmsg );
1541    check_cmsg_for_fds( tid, msg );
1542 }
1543 
1544 
1545 /* ---------------------------------------------------------------------
1546    Deal with a bunch of IPC related syscalls
1547    ------------------------------------------------------------------ */
1548 
1549 /* ------ */
1550 
1551 void
ML_(generic_PRE_sys_semop)1552 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1553                              UWord arg0, UWord arg1, UWord arg2 )
1554 {
1555    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1556    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1557 }
1558 
1559 /* ------ */
1560 
1561 void
ML_(generic_PRE_sys_semtimedop)1562 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1563                                   UWord arg0, UWord arg1,
1564                                   UWord arg2, UWord arg3 )
1565 {
1566    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1567                      struct timespec *timeout); */
1568    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1569    if (arg3 != 0)
1570       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1571 }
1572 
1573 /* ------ */
1574 
1575 static
get_sem_count(Int semid)1576 UInt get_sem_count( Int semid )
1577 {
1578    struct vki_semid_ds buf;
1579    union vki_semun arg;
1580    SysRes res;
1581 
1582    /* Doesn't actually seem to be necessary, but gcc-4.4.0 20081017
1583       (experimental) otherwise complains that the use in the return
1584       statement below is uninitialised. */
1585    buf.sem_nsems = 0;
1586 
1587    arg.buf = &buf;
1588 
1589 #  ifdef __NR_semctl
1590    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1591 #  else
1592    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1593                           VKI_IPC_STAT, (UWord)&arg);
1594 #  endif
1595    if (sr_isError(res))
1596       return 0;
1597 
1598    return buf.sem_nsems;
1599 }
1600 
1601 void
ML_(generic_PRE_sys_semctl)1602 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1603                               UWord arg0, UWord arg1,
1604                               UWord arg2, UWord arg3 )
1605 {
1606    /* int semctl(int semid, int semnum, int cmd, ...); */
1607    union vki_semun arg = *(union vki_semun *)&arg3;
1608    UInt nsems;
1609    switch (arg2 /* cmd */) {
1610 #if defined(VKI_IPC_INFO)
1611    case VKI_IPC_INFO:
1612    case VKI_SEM_INFO:
1613    case VKI_IPC_INFO|VKI_IPC_64:
1614    case VKI_SEM_INFO|VKI_IPC_64:
1615       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1616                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1617       break;
1618 #endif
1619 
1620    case VKI_IPC_STAT:
1621 #if defined(VKI_SEM_STAT)
1622    case VKI_SEM_STAT:
1623 #endif
1624       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1625                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1626       break;
1627 
1628 #if defined(VKI_IPC_64)
1629    case VKI_IPC_STAT|VKI_IPC_64:
1630 #if defined(VKI_SEM_STAT)
1631    case VKI_SEM_STAT|VKI_IPC_64:
1632 #endif
1633       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1634                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1635       break;
1636 #endif
1637 
1638    case VKI_IPC_SET:
1639       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1640                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1641       break;
1642 
1643 #if defined(VKI_IPC_64)
1644    case VKI_IPC_SET|VKI_IPC_64:
1645       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1646                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1647       break;
1648 #endif
1649 
1650    case VKI_GETALL:
1651 #if defined(VKI_IPC_64)
1652    case VKI_GETALL|VKI_IPC_64:
1653 #endif
1654       nsems = get_sem_count( arg0 );
1655       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1656                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1657       break;
1658 
1659    case VKI_SETALL:
1660 #if defined(VKI_IPC_64)
1661    case VKI_SETALL|VKI_IPC_64:
1662 #endif
1663       nsems = get_sem_count( arg0 );
1664       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1665                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1666       break;
1667    }
1668 }
1669 
1670 void
ML_(generic_POST_sys_semctl)1671 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1672                                UWord res,
1673                                UWord arg0, UWord arg1,
1674                                UWord arg2, UWord arg3 )
1675 {
1676    union vki_semun arg = *(union vki_semun *)&arg3;
1677    UInt nsems;
1678    switch (arg2 /* cmd */) {
1679 #if defined(VKI_IPC_INFO)
1680    case VKI_IPC_INFO:
1681    case VKI_SEM_INFO:
1682    case VKI_IPC_INFO|VKI_IPC_64:
1683    case VKI_SEM_INFO|VKI_IPC_64:
1684       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1685       break;
1686 #endif
1687 
1688    case VKI_IPC_STAT:
1689 #if defined(VKI_SEM_STAT)
1690    case VKI_SEM_STAT:
1691 #endif
1692       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1693       break;
1694 
1695 #if defined(VKI_IPC_64)
1696    case VKI_IPC_STAT|VKI_IPC_64:
1697    case VKI_SEM_STAT|VKI_IPC_64:
1698       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1699       break;
1700 #endif
1701 
1702    case VKI_GETALL:
1703 #if defined(VKI_IPC_64)
1704    case VKI_GETALL|VKI_IPC_64:
1705 #endif
1706       nsems = get_sem_count( arg0 );
1707       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1708       break;
1709    }
1710 }
1711 
1712 /* ------ */
1713 
1714 /* ------ */
1715 
1716 static
get_shm_size(Int shmid)1717 UInt get_shm_size ( Int shmid )
1718 {
1719 #ifdef __NR_shmctl
1720 #  ifdef VKI_IPC_64
1721    struct vki_shmid64_ds buf;
1722 #    ifdef VGP_amd64_linux
1723      /* See bug 222545 comment 7 */
1724      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1725                                      VKI_IPC_STAT, (UWord)&buf);
1726 #    else
1727      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1728                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1729 #    endif
1730 #  else /* !def VKI_IPC_64 */
1731    struct vki_shmid_ds buf;
1732    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
1733 #  endif /* def VKI_IPC_64 */
1734 #else
1735    struct vki_shmid_ds buf;
1736    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
1737                                  VKI_IPC_STAT, 0, (UWord)&buf);
1738 #endif
1739    if (sr_isError(__res))
1740       return 0;
1741 
1742    return buf.shm_segsz;
1743 }
1744 
1745 UWord
ML_(generic_PRE_sys_shmat)1746 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
1747                              UWord arg0, UWord arg1, UWord arg2 )
1748 {
1749    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
1750    UInt  segmentSize = get_shm_size ( arg0 );
1751    UWord tmp;
1752    Bool  ok;
1753    if (arg1 == 0) {
1754       /* arm-linux only: work around the fact that
1755          VG_(am_get_advisory_client_simple) produces something that is
1756          VKI_PAGE_SIZE aligned, whereas what we want is something
1757          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
1758          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
1759          then round the result up to the next VKI_SHMLBA boundary.
1760          See bug 222545 comment 15.  So far, arm-linux is the only
1761          platform where this is known to be necessary. */
1762       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
1763       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1764          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
1765       }
1766       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
1767       if (ok) {
1768          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
1769             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
1770          } else {
1771             arg1 = tmp;
1772          }
1773       }
1774    }
1775    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
1776       arg1 = 0;
1777    return arg1;
1778 }
1779 
1780 void
ML_(generic_POST_sys_shmat)1781 ML_(generic_POST_sys_shmat) ( ThreadId tid,
1782                               UWord res,
1783                               UWord arg0, UWord arg1, UWord arg2 )
1784 {
1785    UInt segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
1786    if ( segmentSize > 0 ) {
1787       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
1788       Bool d;
1789 
1790       if (arg2 & VKI_SHM_RDONLY)
1791          prot &= ~VKI_PROT_WRITE;
1792       /* It isn't exactly correct to pass 0 for the fd and offset
1793          here.  The kernel seems to think the corresponding section
1794          does have dev/ino numbers:
1795 
1796          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
1797 
1798          However there is no obvious way to find them.  In order to
1799          cope with the discrepancy, aspacem's sync checker omits the
1800          dev/ino correspondence check in cases where V does not know
1801          the dev/ino. */
1802       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
1803 
1804       /* we don't distinguish whether it's read-only or
1805        * read-write -- it doesn't matter really. */
1806       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
1807                               0/*di_handle*/ );
1808       if (d)
1809          VG_(discard_translations)( (Addr64)res,
1810                                     (ULong)VG_PGROUNDUP(segmentSize),
1811                                     "ML_(generic_POST_sys_shmat)" );
1812    }
1813 }
1814 
1815 /* ------ */
1816 
1817 Bool
ML_(generic_PRE_sys_shmdt)1818 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
1819 {
1820    /* int shmdt(const void *shmaddr); */
1821    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
1822 }
1823 
1824 void
ML_(generic_POST_sys_shmdt)1825 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
1826 {
1827    NSegment const* s = VG_(am_find_nsegment)(arg0);
1828 
1829    if (s != NULL) {
1830       Addr  s_start = s->start;
1831       SizeT s_len   = s->end+1 - s->start;
1832       Bool  d;
1833 
1834       vg_assert(s->kind == SkShmC);
1835       vg_assert(s->start == arg0);
1836 
1837       d = VG_(am_notify_munmap)(s_start, s_len);
1838       s = NULL; /* s is now invalid */
1839       VG_TRACK( die_mem_munmap, s_start, s_len );
1840       if (d)
1841          VG_(discard_translations)( (Addr64)s_start,
1842                                     (ULong)s_len,
1843                                     "ML_(generic_POST_sys_shmdt)" );
1844    }
1845 }
1846 /* ------ */
1847 
1848 void
ML_(generic_PRE_sys_shmctl)1849 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
1850                               UWord arg0, UWord arg1, UWord arg2 )
1851 {
1852    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
1853    switch (arg1 /* cmd */) {
1854 #if defined(VKI_IPC_INFO)
1855    case VKI_IPC_INFO:
1856       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1857                      arg2, sizeof(struct vki_shminfo) );
1858       break;
1859 #if defined(VKI_IPC_64)
1860    case VKI_IPC_INFO|VKI_IPC_64:
1861       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
1862                      arg2, sizeof(struct vki_shminfo64) );
1863       break;
1864 #endif
1865 #endif
1866 
1867 #if defined(VKI_SHM_INFO)
1868    case VKI_SHM_INFO:
1869 #if defined(VKI_IPC_64)
1870    case VKI_SHM_INFO|VKI_IPC_64:
1871 #endif
1872       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
1873                      arg2, sizeof(struct vki_shm_info) );
1874       break;
1875 #endif
1876 
1877    case VKI_IPC_STAT:
1878 #if defined(VKI_SHM_STAT)
1879    case VKI_SHM_STAT:
1880 #endif
1881       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
1882                      arg2, sizeof(struct vki_shmid_ds) );
1883       break;
1884 
1885 #if defined(VKI_IPC_64)
1886    case VKI_IPC_STAT|VKI_IPC_64:
1887    case VKI_SHM_STAT|VKI_IPC_64:
1888       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
1889                      arg2, sizeof(struct vki_shmid64_ds) );
1890       break;
1891 #endif
1892 
1893    case VKI_IPC_SET:
1894       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1895                     arg2, sizeof(struct vki_shmid_ds) );
1896       break;
1897 
1898 #if defined(VKI_IPC_64)
1899    case VKI_IPC_SET|VKI_IPC_64:
1900       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
1901                     arg2, sizeof(struct vki_shmid64_ds) );
1902       break;
1903 #endif
1904    }
1905 }
1906 
1907 void
ML_(generic_POST_sys_shmctl)1908 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
1909                                UWord res,
1910                                UWord arg0, UWord arg1, UWord arg2 )
1911 {
1912    switch (arg1 /* cmd */) {
1913 #if defined(VKI_IPC_INFO)
1914    case VKI_IPC_INFO:
1915       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
1916       break;
1917    case VKI_IPC_INFO|VKI_IPC_64:
1918       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
1919       break;
1920 #endif
1921 
1922 #if defined(VKI_SHM_INFO)
1923    case VKI_SHM_INFO:
1924    case VKI_SHM_INFO|VKI_IPC_64:
1925       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
1926       break;
1927 #endif
1928 
1929    case VKI_IPC_STAT:
1930 #if defined(VKI_SHM_STAT)
1931    case VKI_SHM_STAT:
1932 #endif
1933       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
1934       break;
1935 
1936 #if defined(VKI_IPC_64)
1937    case VKI_IPC_STAT|VKI_IPC_64:
1938    case VKI_SHM_STAT|VKI_IPC_64:
1939       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
1940       break;
1941 #endif
1942 
1943 
1944    }
1945 }
1946 
1947 
1948 /* ---------------------------------------------------------------------
1949    Generic handler for mmap
1950    ------------------------------------------------------------------ */
1951 
1952 /*
1953  * Although mmap is specified by POSIX and the argument are generally
1954  * consistent across platforms the precise details of the low level
1955  * argument passing conventions differ. For example:
1956  *
1957  * - On x86-linux there is mmap (aka old_mmap) which takes the
1958  *   arguments in a memory block and the offset in bytes; and
1959  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1960  *   way and the offset in pages.
1961  *
1962  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
1963  *   arguments in the normal way and the offset in bytes; and
1964  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
1965  *   way and the offset in pages.
1966  *
1967  * - On amd64-linux everything is simple and there is just the one
1968  *   call, mmap (aka sys_mmap)  which takes the arguments in the
1969  *   normal way and the offset in bytes.
1970  *
1971  * - On s390x-linux there is mmap (aka old_mmap) which takes the
1972  *   arguments in a memory block and the offset in bytes. mmap2
1973  *   is also available (but not exported via unistd.h) with
1974  *   arguments in a memory block and the offset in pages.
1975  *
1976  * To cope with all this we provide a generic handler function here
1977  * and then each platform implements one or more system call handlers
1978  * which call this generic routine after extracting and normalising
1979  * the arguments.
1980  */
1981 
1982 SysRes
ML_(generic_PRE_sys_mmap)1983 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
1984                             UWord arg1, UWord arg2, UWord arg3,
1985                             UWord arg4, UWord arg5, Off64T arg6 )
1986 {
1987    Addr       advised;
1988    SysRes     sres;
1989    MapRequest mreq;
1990    Bool       mreq_ok;
1991 
1992 #if defined(VGO_darwin)
1993    // Nb: we can't use this on Darwin, it has races:
1994    // * needs to RETRY if advisory succeeds but map fails
1995    //   (could have been some other thread in a nonblocking call)
1996    // * needs to not use fixed-position mmap() on Darwin
1997    //   (mmap will cheerfully smash whatever's already there, which might
1998    //   be a new mapping from some other thread in a nonblocking call)
1999    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2000 #endif
2001 
2002    if (arg2 == 0) {
2003       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2004          shall be established. */
2005       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2006    }
2007 
2008    if (!VG_IS_PAGE_ALIGNED(arg1)) {
2009       /* zap any misaligned addresses. */
2010       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2011          to fail.   Here, we catch them all. */
2012       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2013    }
2014 
2015    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2016       /* zap any misaligned offsets. */
2017       /* SuSV3 says: The off argument is constrained to be aligned and
2018          sized according to the value returned by sysconf() when
2019          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2020       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2021    }
2022 
2023    /* Figure out what kind of allocation constraints there are
2024       (fixed/hint/any), and ask aspacem what we should do. */
2025    mreq.start = arg1;
2026    mreq.len   = arg2;
2027    if (arg4 & VKI_MAP_FIXED) {
2028       mreq.rkind = MFixed;
2029    } else
2030    if (arg1 != 0) {
2031       mreq.rkind = MHint;
2032    } else {
2033       mreq.rkind = MAny;
2034    }
2035 
2036    /* Enquire ... */
2037    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2038    if (!mreq_ok) {
2039       /* Our request was bounced, so we'd better fail. */
2040       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2041    }
2042 
2043    /* Otherwise we're OK (so far).  Install aspacem's choice of
2044       address, and let the mmap go through.  */
2045    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2046                                     arg4 | VKI_MAP_FIXED,
2047                                     arg5, arg6);
2048 
2049    /* A refinement: it may be that the kernel refused aspacem's choice
2050       of address.  If we were originally asked for a hinted mapping,
2051       there is still a last chance: try again at any address.
2052       Hence: */
2053    if (mreq.rkind == MHint && sr_isError(sres)) {
2054       mreq.start = 0;
2055       mreq.len   = arg2;
2056       mreq.rkind = MAny;
2057       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2058       if (!mreq_ok) {
2059          /* Our request was bounced, so we'd better fail. */
2060          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2061       }
2062       /* and try again with the kernel */
2063       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2064                                        arg4 | VKI_MAP_FIXED,
2065                                        arg5, arg6);
2066    }
2067 
2068    if (!sr_isError(sres)) {
2069       ULong di_handle;
2070       /* Notify aspacem. */
2071       notify_core_of_mmap(
2072          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2073          arg2, /* length */
2074          arg3, /* prot */
2075          arg4, /* the original flags value */
2076          arg5, /* fd */
2077          arg6  /* offset */
2078       );
2079       /* Load symbols? */
2080       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2081                                        False/*allow_SkFileV*/, (Int)arg5 );
2082       /* Notify the tool. */
2083       notify_tool_of_mmap(
2084          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2085          arg2, /* length */
2086          arg3, /* prot */
2087          di_handle /* so the tool can refer to the read debuginfo later,
2088                       if it wants. */
2089       );
2090    }
2091 
2092    /* Stay sane */
2093    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2094       vg_assert(sr_Res(sres) == arg1);
2095 
2096    return sres;
2097 }
2098 
2099 
2100 /* ---------------------------------------------------------------------
2101    The Main Entertainment ... syscall wrappers
2102    ------------------------------------------------------------------ */
2103 
2104 /* Note: the PRE() and POST() wrappers are for the actual functions
2105    implementing the system calls in the OS kernel.  These mostly have
2106    names like sys_write();  a few have names like old_mmap().  See the
2107    comment for ML_(syscall_table)[] for important info about the __NR_foo
2108    constants and their relationship to the sys_foo() functions.
2109 
2110    Some notes about names used for syscalls and args:
2111    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2112      ambiguity.
2113 
2114    - For error messages, we generally use a somewhat generic name
2115      for the syscall (eg. "write" rather than "sys_write").  This should be
2116      good enough for the average user to understand what is happening,
2117      without confusing them with names like "sys_write".
2118 
2119    - Also, for error messages the arg names are mostly taken from the man
2120      pages (even though many of those man pages are really for glibc
2121      functions of the same name), rather than from the OS kernel source,
2122      for the same reason -- a user presented with a "bogus foo(bar)" arg
2123      will most likely look at the "foo" man page to see which is the "bar"
2124      arg.
2125 
2126    Note that we use our own vki_* types.  The one exception is in
2127    PRE_REG_READn calls, where pointer types haven't been changed, because
2128    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2129    be visible.
2130 
2131    XXX: some of these are arch-specific, and should be factored out.
2132 */
2133 
2134 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2135 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2136 
2137 // Macros to support 64-bit syscall args split into two 32 bit values
2138 #if defined(VG_LITTLEENDIAN)
2139 #define MERGE64(lo,hi)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2140 #define MERGE64_FIRST(name) name##_low
2141 #define MERGE64_SECOND(name) name##_high
2142 #elif defined(VG_BIGENDIAN)
2143 #define MERGE64(hi,lo)   ( ((ULong)(lo)) | (((ULong)(hi)) << 32) )
2144 #define MERGE64_FIRST(name) name##_high
2145 #define MERGE64_SECOND(name) name##_low
2146 #else
2147 #error Unknown endianness
2148 #endif
2149 
PRE(sys_exit)2150 PRE(sys_exit)
2151 {
2152    ThreadState* tst;
2153    /* simple; just make this thread exit */
2154    PRINT("exit( %ld )", ARG1);
2155    PRE_REG_READ1(void, "exit", int, status);
2156    tst = VG_(get_ThreadState)(tid);
2157    /* Set the thread's status to be exiting, then claim that the
2158       syscall succeeded. */
2159    tst->exitreason = VgSrc_ExitThread;
2160    tst->os_state.exitcode = ARG1;
2161    SET_STATUS_Success(0);
2162 }
2163 
PRE(sys_ni_syscall)2164 PRE(sys_ni_syscall)
2165 {
2166    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2167       VG_SYSNUM_STRING(SYSNO));
2168    PRE_REG_READ0(long, "ni_syscall");
2169    SET_STATUS_Failure( VKI_ENOSYS );
2170 }
2171 
PRE(sys_iopl)2172 PRE(sys_iopl)
2173 {
2174    PRINT("sys_iopl ( %ld )", ARG1);
2175    PRE_REG_READ1(long, "iopl", unsigned long, level);
2176 }
2177 
PRE(sys_fsync)2178 PRE(sys_fsync)
2179 {
2180    *flags |= SfMayBlock;
2181    PRINT("sys_fsync ( %ld )", ARG1);
2182    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2183 }
2184 
PRE(sys_fdatasync)2185 PRE(sys_fdatasync)
2186 {
2187    *flags |= SfMayBlock;
2188    PRINT("sys_fdatasync ( %ld )", ARG1);
2189    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2190 }
2191 
PRE(sys_msync)2192 PRE(sys_msync)
2193 {
2194    *flags |= SfMayBlock;
2195    PRINT("sys_msync ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2196    PRE_REG_READ3(long, "msync",
2197                  unsigned long, start, vki_size_t, length, int, flags);
2198    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2199 }
2200 
2201 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2202 // versions of LiS (Linux Streams).  They are not part of the kernel.
2203 // Therefore, we have to provide this type ourself, rather than getting it
2204 // from the kernel sources.
2205 struct vki_pmsg_strbuf {
2206    int     maxlen;         /* no. of bytes in buffer */
2207    int     len;            /* no. of bytes returned */
2208    vki_caddr_t buf;        /* pointer to data */
2209 };
PRE(sys_getpmsg)2210 PRE(sys_getpmsg)
2211 {
2212    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2213    struct vki_pmsg_strbuf *ctrl;
2214    struct vki_pmsg_strbuf *data;
2215    *flags |= SfMayBlock;
2216    PRINT("sys_getpmsg ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
2217    PRE_REG_READ5(int, "getpmsg",
2218                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2219                  int *, bandp, int *, flagsp);
2220    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2221    data = (struct vki_pmsg_strbuf *)ARG3;
2222    if (ctrl && ctrl->maxlen > 0)
2223       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2224    if (data && data->maxlen > 0)
2225       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2226    if (ARG4)
2227       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2228    if (ARG5)
2229       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2230 }
POST(sys_getpmsg)2231 POST(sys_getpmsg)
2232 {
2233    struct vki_pmsg_strbuf *ctrl;
2234    struct vki_pmsg_strbuf *data;
2235    vg_assert(SUCCESS);
2236    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2237    data = (struct vki_pmsg_strbuf *)ARG3;
2238    if (RES == 0 && ctrl && ctrl->len > 0) {
2239       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2240    }
2241    if (RES == 0 && data && data->len > 0) {
2242       POST_MEM_WRITE( (Addr)data->buf, data->len);
2243    }
2244 }
2245 
PRE(sys_putpmsg)2246 PRE(sys_putpmsg)
2247 {
2248    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2249    struct vki_pmsg_strbuf *ctrl;
2250    struct vki_pmsg_strbuf *data;
2251    *flags |= SfMayBlock;
2252    PRINT("sys_putpmsg ( %ld, %#lx, %#lx, %ld, %ld )", ARG1,ARG2,ARG3,ARG4,ARG5);
2253    PRE_REG_READ5(int, "putpmsg",
2254                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2255                  int, band, int, flags);
2256    ctrl = (struct vki_pmsg_strbuf *)ARG2;
2257    data = (struct vki_pmsg_strbuf *)ARG3;
2258    if (ctrl && ctrl->len > 0)
2259       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2260    if (data && data->len > 0)
2261       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2262 }
2263 
PRE(sys_getitimer)2264 PRE(sys_getitimer)
2265 {
2266    struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2267    PRINT("sys_getitimer ( %ld, %#lx )", ARG1, ARG2);
2268    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2269 
2270    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2271    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2272 }
2273 
POST(sys_getitimer)2274 POST(sys_getitimer)
2275 {
2276    if (ARG2 != (Addr)NULL) {
2277       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2278       POST_timeval_WRITE( &(value->it_interval) );
2279       POST_timeval_WRITE( &(value->it_value) );
2280    }
2281 }
2282 
PRE(sys_setitimer)2283 PRE(sys_setitimer)
2284 {
2285    PRINT("sys_setitimer ( %ld, %#lx, %#lx )", ARG1,ARG2,ARG3);
2286    PRE_REG_READ3(long, "setitimer",
2287                  int, which,
2288                  struct itimerval *, value, struct itimerval *, ovalue);
2289    if (ARG2 != (Addr)NULL) {
2290       struct vki_itimerval *value = (struct vki_itimerval*)ARG2;
2291       PRE_timeval_READ( "setitimer(&value->it_interval)",
2292                          &(value->it_interval));
2293       PRE_timeval_READ( "setitimer(&value->it_value)",
2294                          &(value->it_value));
2295    }
2296    if (ARG3 != (Addr)NULL) {
2297       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2298       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2299                          &(ovalue->it_interval));
2300       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2301                          &(ovalue->it_value));
2302    }
2303 }
2304 
POST(sys_setitimer)2305 POST(sys_setitimer)
2306 {
2307    if (ARG3 != (Addr)NULL) {
2308       struct vki_itimerval *ovalue = (struct vki_itimerval*)ARG3;
2309       POST_timeval_WRITE( &(ovalue->it_interval) );
2310       POST_timeval_WRITE( &(ovalue->it_value) );
2311    }
2312 }
2313 
PRE(sys_chroot)2314 PRE(sys_chroot)
2315 {
2316    PRINT("sys_chroot ( %#lx )", ARG1);
2317    PRE_REG_READ1(long, "chroot", const char *, path);
2318    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2319 }
2320 
PRE(sys_madvise)2321 PRE(sys_madvise)
2322 {
2323    *flags |= SfMayBlock;
2324    PRINT("sys_madvise ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
2325    PRE_REG_READ3(long, "madvise",
2326                  unsigned long, start, vki_size_t, length, int, advice);
2327 }
2328 
2329 #if HAVE_MREMAP
PRE(sys_mremap)2330 PRE(sys_mremap)
2331 {
2332    // Nb: this is different to the glibc version described in the man pages,
2333    // which lacks the fifth 'new_address' argument.
2334    if (ARG4 & VKI_MREMAP_FIXED) {
2335       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx, %#lx )",
2336             ARG1, (ULong)ARG2, ARG3, ARG4, ARG5);
2337       PRE_REG_READ5(unsigned long, "mremap",
2338                     unsigned long, old_addr, unsigned long, old_size,
2339                     unsigned long, new_size, unsigned long, flags,
2340                     unsigned long, new_addr);
2341    } else {
2342       PRINT("sys_mremap ( %#lx, %llu, %ld, 0x%lx )",
2343             ARG1, (ULong)ARG2, ARG3, ARG4);
2344       PRE_REG_READ4(unsigned long, "mremap",
2345                     unsigned long, old_addr, unsigned long, old_size,
2346                     unsigned long, new_size, unsigned long, flags);
2347    }
2348    SET_STATUS_from_SysRes(
2349       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2350    );
2351 }
2352 #endif /* HAVE_MREMAP */
2353 
PRE(sys_nice)2354 PRE(sys_nice)
2355 {
2356    PRINT("sys_nice ( %ld )", ARG1);
2357    PRE_REG_READ1(long, "nice", int, inc);
2358 }
2359 
PRE(sys_mlock)2360 PRE(sys_mlock)
2361 {
2362    *flags |= SfMayBlock;
2363    PRINT("sys_mlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2364    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2365 }
2366 
PRE(sys_munlock)2367 PRE(sys_munlock)
2368 {
2369    *flags |= SfMayBlock;
2370    PRINT("sys_munlock ( %#lx, %llu )", ARG1, (ULong)ARG2);
2371    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2372 }
2373 
PRE(sys_mlockall)2374 PRE(sys_mlockall)
2375 {
2376    *flags |= SfMayBlock;
2377    PRINT("sys_mlockall ( %lx )", ARG1);
2378    PRE_REG_READ1(long, "mlockall", int, flags);
2379 }
2380 
PRE(sys_setpriority)2381 PRE(sys_setpriority)
2382 {
2383    PRINT("sys_setpriority ( %ld, %ld, %ld )", ARG1, ARG2, ARG3);
2384    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2385 }
2386 
PRE(sys_getpriority)2387 PRE(sys_getpriority)
2388 {
2389    PRINT("sys_getpriority ( %ld, %ld )", ARG1, ARG2);
2390    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2391 }
2392 
PRE(sys_pwrite64)2393 PRE(sys_pwrite64)
2394 {
2395    *flags |= SfMayBlock;
2396 #if VG_WORDSIZE == 4
2397    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2398          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2399    PRE_REG_READ5(ssize_t, "pwrite64",
2400                  unsigned int, fd, const char *, buf, vki_size_t, count,
2401                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2402 #elif VG_WORDSIZE == 8
2403    PRINT("sys_pwrite64 ( %ld, %#lx, %llu, %lld )",
2404          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2405    PRE_REG_READ4(ssize_t, "pwrite64",
2406                  unsigned int, fd, const char *, buf, vki_size_t, count,
2407                  Word, offset);
2408 #else
2409 #  error Unexpected word size
2410 #endif
2411    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2412 }
2413 
PRE(sys_sync)2414 PRE(sys_sync)
2415 {
2416    *flags |= SfMayBlock;
2417    PRINT("sys_sync ( )");
2418    PRE_REG_READ0(long, "sync");
2419 }
2420 
PRE(sys_fstatfs)2421 PRE(sys_fstatfs)
2422 {
2423    FUSE_COMPATIBLE_MAY_BLOCK();
2424    PRINT("sys_fstatfs ( %ld, %#lx )",ARG1,ARG2);
2425    PRE_REG_READ2(long, "fstatfs",
2426                  unsigned int, fd, struct statfs *, buf);
2427    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2428 }
2429 
POST(sys_fstatfs)2430 POST(sys_fstatfs)
2431 {
2432    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2433 }
2434 
PRE(sys_fstatfs64)2435 PRE(sys_fstatfs64)
2436 {
2437    FUSE_COMPATIBLE_MAY_BLOCK();
2438    PRINT("sys_fstatfs64 ( %ld, %llu, %#lx )",ARG1,(ULong)ARG2,ARG3);
2439    PRE_REG_READ3(long, "fstatfs64",
2440                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2441    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2442 }
POST(sys_fstatfs64)2443 POST(sys_fstatfs64)
2444 {
2445    POST_MEM_WRITE( ARG3, ARG2 );
2446 }
2447 
PRE(sys_getsid)2448 PRE(sys_getsid)
2449 {
2450    PRINT("sys_getsid ( %ld )", ARG1);
2451    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2452 }
2453 
PRE(sys_pread64)2454 PRE(sys_pread64)
2455 {
2456    *flags |= SfMayBlock;
2457 #if VG_WORDSIZE == 4
2458    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2459          ARG1, ARG2, (ULong)ARG3, MERGE64(ARG4,ARG5));
2460    PRE_REG_READ5(ssize_t, "pread64",
2461                  unsigned int, fd, char *, buf, vki_size_t, count,
2462                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2463 #elif VG_WORDSIZE == 8
2464    PRINT("sys_pread64 ( %ld, %#lx, %llu, %lld )",
2465          ARG1, ARG2, (ULong)ARG3, (Long)ARG4);
2466    PRE_REG_READ4(ssize_t, "pread64",
2467                  unsigned int, fd, char *, buf, vki_size_t, count,
2468                  Word, offset);
2469 #else
2470 #  error Unexpected word size
2471 #endif
2472    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2473 }
POST(sys_pread64)2474 POST(sys_pread64)
2475 {
2476    vg_assert(SUCCESS);
2477    if (RES > 0) {
2478       POST_MEM_WRITE( ARG2, RES );
2479    }
2480 }
2481 
PRE(sys_mknod)2482 PRE(sys_mknod)
2483 {
2484    FUSE_COMPATIBLE_MAY_BLOCK();
2485    PRINT("sys_mknod ( %#lx(%s), 0x%lx, 0x%lx )", ARG1, (char*)ARG1, ARG2, ARG3 );
2486    PRE_REG_READ3(long, "mknod",
2487                  const char *, pathname, int, mode, unsigned, dev);
2488    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2489 }
2490 
PRE(sys_flock)2491 PRE(sys_flock)
2492 {
2493    *flags |= SfMayBlock;
2494    PRINT("sys_flock ( %ld, %ld )", ARG1, ARG2 );
2495    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2496 }
2497 
2498 // Pre_read a char** argument.
pre_argv_envp(Addr a,ThreadId tid,Char * s1,Char * s2)2499 static void pre_argv_envp(Addr a, ThreadId tid, Char* s1, Char* s2)
2500 {
2501    while (True) {
2502       Addr a_deref;
2503       Addr* a_p = (Addr*)a;
2504       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2505       a_deref = *a_p;
2506       if (0 == a_deref)
2507          break;
2508       PRE_MEM_RASCIIZ( s2, a_deref );
2509       a += sizeof(char*);
2510    }
2511 }
2512 
i_am_the_only_thread(void)2513 static Bool i_am_the_only_thread ( void )
2514 {
2515    Int c = VG_(count_living_threads)();
2516    vg_assert(c >= 1); /* stay sane */
2517    return c == 1;
2518 }
2519 
2520 /* Wait until all other threads disappear. */
VG_(reap_threads)2521 void VG_(reap_threads)(ThreadId self)
2522 {
2523    while (!i_am_the_only_thread()) {
2524       /* Let other thread(s) run */
2525       VG_(vg_yield)();
2526       VG_(poll_signals)(self);
2527    }
2528    vg_assert(i_am_the_only_thread());
2529 }
2530 
2531 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2532 // but it seems to work nonetheless...
PRE(sys_execve)2533 PRE(sys_execve)
2534 {
2535    Char*        path = NULL;       /* path to executable */
2536    Char**       envp = NULL;
2537    Char**       argv = NULL;
2538    Char**       arg2copy;
2539    Char*        launcher_basename = NULL;
2540    ThreadState* tst;
2541    Int          i, j, tot_args;
2542    SysRes       res;
2543    Bool         setuid_allowed, trace_this_child;
2544 
2545    PRINT("sys_execve ( %#lx(%s), %#lx, %#lx )", ARG1, (char*)ARG1, ARG2, ARG3);
2546    PRE_REG_READ3(vki_off_t, "execve",
2547                  char *, filename, char **, argv, char **, envp);
2548    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2549    if (ARG2 != 0)
2550       pre_argv_envp( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2551    if (ARG3 != 0)
2552       pre_argv_envp( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2553 
2554    vg_assert(VG_(is_valid_tid)(tid));
2555    tst = VG_(get_ThreadState)(tid);
2556 
2557    /* Erk.  If the exec fails, then the following will have made a
2558       mess of things which makes it hard for us to continue.  The
2559       right thing to do is piece everything together again in
2560       POST(execve), but that's close to impossible.  Instead, we make
2561       an effort to check that the execve will work before actually
2562       doing it. */
2563 
2564    /* Check that the name at least begins in client-accessible storage. */
2565    if (ARG1 == 0 /* obviously bogus */
2566        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2567       SET_STATUS_Failure( VKI_EFAULT );
2568       return;
2569    }
2570    // debug-only printing
2571    if (0) {
2572       VG_(printf)("ARG1 = %p(%s)\n", (void*)ARG1, (HChar*)ARG1);
2573       if (ARG2) {
2574          VG_(printf)("ARG2 = ");
2575          Int q;
2576          HChar** vec = (HChar**)ARG2;
2577          for (q = 0; vec[q]; q++)
2578             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2579          VG_(printf)("\n");
2580       } else {
2581          VG_(printf)("ARG2 = null\n");
2582       }
2583    }
2584 
2585    // Decide whether or not we want to follow along
2586    { // Make 'child_argv' be a pointer to the child's arg vector
2587      // (skipping the exe name)
2588      HChar** child_argv = (HChar**)ARG2;
2589      if (child_argv && child_argv[0] == NULL)
2590         child_argv = NULL;
2591      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG1, child_argv );
2592    }
2593 
2594    // Do the important checks:  it is a file, is executable, permissions are
2595    // ok, etc.  We allow setuid executables to run only in the case when
2596    // we are not simulating them, that is, they to be run natively.
2597    setuid_allowed = trace_this_child  ? False  : True;
2598    res = VG_(pre_exec_check)((const Char*)ARG1, NULL, setuid_allowed);
2599    if (sr_isError(res)) {
2600       SET_STATUS_Failure( sr_Err(res) );
2601       return;
2602    }
2603 
2604    /* If we're tracing the child, and the launcher name looks bogus
2605       (possibly because launcher.c couldn't figure it out, see
2606       comments therein) then we have no option but to fail. */
2607    if (trace_this_child
2608        && (VG_(name_of_launcher) == NULL
2609            || VG_(name_of_launcher)[0] != '/')) {
2610       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2611       return;
2612    }
2613 
2614    /* After this point, we can't recover if the execve fails. */
2615    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (Char*)ARG1);
2616 
2617 
2618    // Terminate gdbserver if it is active.
2619    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
2620       // If the child will not be traced, we need to terminate gdbserver
2621       // to cleanup the gdbserver resources (e.g. the FIFO files).
2622       // If child will be traced, we also terminate gdbserver: the new
2623       // Valgrind will start a fresh gdbserver after exec.
2624       VG_(gdbserver) (0);
2625    }
2626 
2627    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
2628       this. (Really, nuke them all, since the new process will make
2629       its own new thread.) */
2630    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
2631    VG_(reap_threads)(tid);
2632 
2633    // Set up the child's exe path.
2634    //
2635    if (trace_this_child) {
2636 
2637       // We want to exec the launcher.  Get its pre-remembered path.
2638       path = VG_(name_of_launcher);
2639       // VG_(name_of_launcher) should have been acquired by m_main at
2640       // startup.
2641       vg_assert(path);
2642 
2643       launcher_basename = VG_(strrchr)(path, '/');
2644       if (launcher_basename == NULL || launcher_basename[1] == 0) {
2645          launcher_basename = path;  // hmm, tres dubious
2646       } else {
2647          launcher_basename++;
2648       }
2649 
2650    } else {
2651       path = (Char*)ARG1;
2652       if (VG_(clo_xml)) {
2653         VG_(printf_xml)("\n<execv/>\n\n</valgrindoutput>\n\n");
2654       } else {
2655         VG_(umsg)("execv called - the tool will now quit\n");
2656       }
2657    }
2658 
2659    // Set up the child's environment.
2660    //
2661    // Remove the valgrind-specific stuff from the environment so the
2662    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
2663    // This is done unconditionally, since if we are tracing the child,
2664    // the child valgrind will set up the appropriate client environment.
2665    // Nb: we make a copy of the environment before trying to mangle it
2666    // as it might be in read-only memory (this was bug #101881).
2667    //
2668    // Then, if tracing the child, set VALGRIND_LIB for it.
2669    //
2670    if (ARG3 == 0) {
2671       envp = NULL;
2672    } else {
2673       envp = VG_(env_clone)( (Char**)ARG3 );
2674       if (envp == NULL) goto hosed;
2675       VG_(env_remove_valgrind_env_stuff)( envp );
2676    }
2677 
2678    if (trace_this_child) {
2679       // Set VALGRIND_LIB in ARG3 (the environment)
2680       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
2681    }
2682 
2683    // Set up the child's args.  If not tracing it, they are
2684    // simply ARG2.  Otherwise, they are
2685    //
2686    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
2687    //
2688    // except that the first VG_(args_for_valgrind_noexecpass) args
2689    // are omitted.
2690    //
2691    if (!trace_this_child) {
2692       argv = (Char**)ARG2;
2693    } else {
2694       vg_assert( VG_(args_for_valgrind) );
2695       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
2696       vg_assert( VG_(args_for_valgrind_noexecpass)
2697                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
2698       /* how many args in total will there be? */
2699       // launcher basename
2700       tot_args = 1;
2701       // V's args
2702       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
2703       tot_args -= VG_(args_for_valgrind_noexecpass);
2704       // name of client exe
2705       tot_args++;
2706       // args for client exe, skipping [0]
2707       arg2copy = (Char**)ARG2;
2708       if (arg2copy && arg2copy[0]) {
2709          for (i = 1; arg2copy[i]; i++)
2710             tot_args++;
2711       }
2712       // allocate
2713       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
2714                           (tot_args+1) * sizeof(HChar*) );
2715       if (argv == 0) goto hosed;
2716       // copy
2717       j = 0;
2718       argv[j++] = launcher_basename;
2719       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
2720          if (i < VG_(args_for_valgrind_noexecpass))
2721             continue;
2722          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
2723       }
2724       argv[j++] = (Char*)ARG1;
2725       if (arg2copy && arg2copy[0])
2726          for (i = 1; arg2copy[i]; i++)
2727             argv[j++] = arg2copy[i];
2728       argv[j++] = NULL;
2729       // check
2730       vg_assert(j == tot_args+1);
2731    }
2732 
2733    /* restore the DATA rlimit for the child */
2734    VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
2735 
2736    /*
2737       Set the signal state up for exec.
2738 
2739       We need to set the real signal state to make sure the exec'd
2740       process gets SIG_IGN properly.
2741 
2742       Also set our real sigmask to match the client's sigmask so that
2743       the exec'd child will get the right mask.  First we need to
2744       clear out any pending signals so they they don't get delivered,
2745       which would confuse things.
2746 
2747       XXX This is a bug - the signals should remain pending, and be
2748       delivered to the new process after exec.  There's also a
2749       race-condition, since if someone delivers us a signal between
2750       the sigprocmask and the execve, we'll still get the signal. Oh
2751       well.
2752    */
2753    {
2754       vki_sigset_t allsigs;
2755       vki_siginfo_t info;
2756 
2757       /* What this loop does: it queries SCSS (the signal state that
2758          the client _thinks_ the kernel is in) by calling
2759          VG_(do_sys_sigaction), and modifies the real kernel signal
2760          state accordingly. */
2761       for (i = 1; i < VG_(max_signal); i++) {
2762          vki_sigaction_fromK_t sa_f;
2763          vki_sigaction_toK_t   sa_t;
2764          VG_(do_sys_sigaction)(i, NULL, &sa_f);
2765          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
2766          if (sa_t.ksa_handler == VKI_SIG_IGN)
2767             VG_(sigaction)(i, &sa_t, NULL);
2768          else {
2769             sa_t.ksa_handler = VKI_SIG_DFL;
2770             VG_(sigaction)(i, &sa_t, NULL);
2771          }
2772       }
2773 
2774       VG_(sigfillset)(&allsigs);
2775       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
2776          ;
2777 
2778       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
2779    }
2780 
2781    if (0) {
2782       Char **cpp;
2783       VG_(printf)("exec: %s\n", path);
2784       for (cpp = argv; cpp && *cpp; cpp++)
2785          VG_(printf)("argv: %s\n", *cpp);
2786       if (0)
2787          for (cpp = envp; cpp && *cpp; cpp++)
2788             VG_(printf)("env: %s\n", *cpp);
2789    }
2790 
2791    SET_STATUS_from_SysRes(
2792       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
2793    );
2794 
2795    /* If we got here, then the execve failed.  We've already made way
2796       too much of a mess to continue, so we have to abort. */
2797   hosed:
2798    vg_assert(FAILURE);
2799    VG_(message)(Vg_UserMsg, "execve(%#lx(%s), %#lx, %#lx) failed, errno %ld\n",
2800                 ARG1, (char*)ARG1, ARG2, ARG3, ERR);
2801    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
2802                             "execve() failing, so I'm dying.\n");
2803    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
2804                             "or work out how to recover.\n");
2805    VG_(exit)(101);
2806 }
2807 
PRE(sys_access)2808 PRE(sys_access)
2809 {
2810    PRINT("sys_access ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2811    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
2812    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
2813 }
2814 
PRE(sys_alarm)2815 PRE(sys_alarm)
2816 {
2817    PRINT("sys_alarm ( %ld )", ARG1);
2818    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
2819 }
2820 
PRE(sys_brk)2821 PRE(sys_brk)
2822 {
2823    Addr brk_limit = VG_(brk_limit);
2824    Addr brk_new;
2825 
2826    /* libc   says: int   brk(void *end_data_segment);
2827       kernel says: void* brk(void* end_data_segment);  (more or less)
2828 
2829       libc returns 0 on success, and -1 (and sets errno) on failure.
2830       Nb: if you ask to shrink the dataseg end below what it
2831       currently is, that always succeeds, even if the dataseg end
2832       doesn't actually change (eg. brk(0)).  Unless it seg faults.
2833 
2834       Kernel returns the new dataseg end.  If the brk() failed, this
2835       will be unchanged from the old one.  That's why calling (kernel)
2836       brk(0) gives the current dataseg end (libc brk() just returns
2837       zero in that case).
2838 
2839       Both will seg fault if you shrink it back into a text segment.
2840    */
2841    PRINT("sys_brk ( %#lx )", ARG1);
2842    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
2843 
2844    brk_new = do_brk(ARG1);
2845    SET_STATUS_Success( brk_new );
2846 
2847    if (brk_new == ARG1) {
2848       /* brk() succeeded */
2849       if (brk_new < brk_limit) {
2850          /* successfully shrunk the data segment. */
2851          VG_TRACK( die_mem_brk, (Addr)ARG1,
2852 		   brk_limit-ARG1 );
2853       } else
2854       if (brk_new > brk_limit) {
2855          /* successfully grew the data segment */
2856          VG_TRACK( new_mem_brk, brk_limit,
2857                    ARG1-brk_limit, tid );
2858       }
2859    } else {
2860       /* brk() failed */
2861       vg_assert(brk_limit == brk_new);
2862    }
2863 }
2864 
PRE(sys_chdir)2865 PRE(sys_chdir)
2866 {
2867    FUSE_COMPATIBLE_MAY_BLOCK();
2868    PRINT("sys_chdir ( %#lx(%s) )", ARG1,(char*)ARG1);
2869    PRE_REG_READ1(long, "chdir", const char *, path);
2870    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
2871 }
2872 
PRE(sys_chmod)2873 PRE(sys_chmod)
2874 {
2875    FUSE_COMPATIBLE_MAY_BLOCK();
2876    PRINT("sys_chmod ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
2877    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
2878    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
2879 }
2880 
PRE(sys_chown)2881 PRE(sys_chown)
2882 {
2883    FUSE_COMPATIBLE_MAY_BLOCK();
2884    PRINT("sys_chown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2885    PRE_REG_READ3(long, "chown",
2886                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
2887    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
2888 }
2889 
PRE(sys_lchown)2890 PRE(sys_lchown)
2891 {
2892    FUSE_COMPATIBLE_MAY_BLOCK();
2893    PRINT("sys_lchown ( %#lx(%s), 0x%lx, 0x%lx )", ARG1,(char*)ARG1,ARG2,ARG3);
2894    PRE_REG_READ3(long, "lchown",
2895                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
2896    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
2897 }
2898 
PRE(sys_close)2899 PRE(sys_close)
2900 {
2901    FUSE_COMPATIBLE_MAY_BLOCK();
2902    PRINT("sys_close ( %ld )", ARG1);
2903    PRE_REG_READ1(long, "close", unsigned int, fd);
2904 
2905    /* Detect and negate attempts by the client to close Valgrind's log fd */
2906    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
2907         /* If doing -d style logging (which is to fd=2), don't
2908            allow that to be closed either. */
2909         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
2910       SET_STATUS_Failure( VKI_EBADF );
2911 }
2912 
POST(sys_close)2913 POST(sys_close)
2914 {
2915    if (VG_(clo_track_fds)) record_fd_close(ARG1);
2916 }
2917 
PRE(sys_dup)2918 PRE(sys_dup)
2919 {
2920    PRINT("sys_dup ( %ld )", ARG1);
2921    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
2922 }
2923 
POST(sys_dup)2924 POST(sys_dup)
2925 {
2926    vg_assert(SUCCESS);
2927    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
2928       VG_(close)(RES);
2929       SET_STATUS_Failure( VKI_EMFILE );
2930    } else {
2931       if (VG_(clo_track_fds))
2932          ML_(record_fd_open_named)(tid, RES);
2933    }
2934 }
2935 
PRE(sys_dup2)2936 PRE(sys_dup2)
2937 {
2938    PRINT("sys_dup2 ( %ld, %ld )", ARG1,ARG2);
2939    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
2940    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
2941       SET_STATUS_Failure( VKI_EBADF );
2942 }
2943 
POST(sys_dup2)2944 POST(sys_dup2)
2945 {
2946    vg_assert(SUCCESS);
2947    if (VG_(clo_track_fds))
2948       ML_(record_fd_open_named)(tid, RES);
2949 }
2950 
PRE(sys_fchdir)2951 PRE(sys_fchdir)
2952 {
2953    FUSE_COMPATIBLE_MAY_BLOCK();
2954    PRINT("sys_fchdir ( %ld )", ARG1);
2955    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
2956 }
2957 
PRE(sys_fchown)2958 PRE(sys_fchown)
2959 {
2960    FUSE_COMPATIBLE_MAY_BLOCK();
2961    PRINT("sys_fchown ( %ld, %ld, %ld )", ARG1,ARG2,ARG3);
2962    PRE_REG_READ3(long, "fchown",
2963                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
2964 }
2965 
PRE(sys_fchmod)2966 PRE(sys_fchmod)
2967 {
2968    FUSE_COMPATIBLE_MAY_BLOCK();
2969    PRINT("sys_fchmod ( %ld, %ld )", ARG1,ARG2);
2970    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
2971 }
2972 
PRE(sys_newfstat)2973 PRE(sys_newfstat)
2974 {
2975    FUSE_COMPATIBLE_MAY_BLOCK();
2976    PRINT("sys_newfstat ( %ld, %#lx )", ARG1,ARG2);
2977    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
2978    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
2979 }
2980 
POST(sys_newfstat)2981 POST(sys_newfstat)
2982 {
2983    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
2984 }
2985 
2986 static vki_sigset_t fork_saved_mask;
2987 
2988 // In Linux, the sys_fork() function varies across architectures, but we
2989 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)2990 PRE(sys_fork)
2991 {
2992    Bool is_child;
2993    Int child_pid;
2994    vki_sigset_t mask;
2995 
2996    PRINT("sys_fork ( )");
2997    PRE_REG_READ0(long, "fork");
2998 
2999    /* Block all signals during fork, so that we can fix things up in
3000       the child without being interrupted. */
3001    VG_(sigfillset)(&mask);
3002    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3003 
3004    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3005 
3006    if (!SUCCESS) return;
3007 
3008 #if defined(VGO_linux)
3009    // RES is 0 for child, non-0 (the child's PID) for parent.
3010    is_child = ( RES == 0 ? True : False );
3011    child_pid = ( is_child ? -1 : RES );
3012 #elif defined(VGO_darwin)
3013    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3014    is_child = RESHI;
3015    child_pid = RES;
3016 #else
3017 #  error Unknown OS
3018 #endif
3019 
3020    VG_(do_atfork_pre)(tid);
3021 
3022    if (is_child) {
3023       VG_(do_atfork_child)(tid);
3024 
3025       /* restore signal mask */
3026       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3027 
3028       /* If --child-silent-after-fork=yes was specified, set the
3029          output file descriptors to 'impossible' values.  This is
3030          noticed by send_bytes_to_logging_sink in m_libcprint.c, which
3031          duly stops writing any further output. */
3032       if (VG_(clo_child_silent_after_fork)) {
3033          if (!VG_(log_output_sink).is_socket)
3034             VG_(log_output_sink).fd = -1;
3035          if (!VG_(xml_output_sink).is_socket)
3036             VG_(xml_output_sink).fd = -1;
3037       }
3038 
3039    } else {
3040       VG_(do_atfork_parent)(tid);
3041 
3042       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3043 
3044       /* restore signal mask */
3045       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3046    }
3047 }
3048 
PRE(sys_ftruncate)3049 PRE(sys_ftruncate)
3050 {
3051    *flags |= SfMayBlock;
3052    PRINT("sys_ftruncate ( %ld, %ld )", ARG1,ARG2);
3053    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3054 }
3055 
PRE(sys_truncate)3056 PRE(sys_truncate)
3057 {
3058    *flags |= SfMayBlock;
3059    PRINT("sys_truncate ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3060    PRE_REG_READ2(long, "truncate",
3061                  const char *, path, unsigned long, length);
3062    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3063 }
3064 
PRE(sys_ftruncate64)3065 PRE(sys_ftruncate64)
3066 {
3067    *flags |= SfMayBlock;
3068 #if VG_WORDSIZE == 4
3069    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, MERGE64(ARG2,ARG3));
3070    PRE_REG_READ3(long, "ftruncate64",
3071                  unsigned int, fd,
3072                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3073 #else
3074    PRINT("sys_ftruncate64 ( %ld, %lld )", ARG1, (Long)ARG2);
3075    PRE_REG_READ2(long, "ftruncate64",
3076                  unsigned int,fd, UWord,length);
3077 #endif
3078 }
3079 
PRE(sys_truncate64)3080 PRE(sys_truncate64)
3081 {
3082    *flags |= SfMayBlock;
3083 #if VG_WORDSIZE == 4
3084    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)MERGE64(ARG2, ARG3));
3085    PRE_REG_READ3(long, "truncate64",
3086                  const char *, path,
3087                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3088 #else
3089    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3090    PRE_REG_READ2(long, "truncate64",
3091                  const char *,path, UWord,length);
3092 #endif
3093    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3094 }
3095 
PRE(sys_getdents)3096 PRE(sys_getdents)
3097 {
3098    *flags |= SfMayBlock;
3099    PRINT("sys_getdents ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
3100    PRE_REG_READ3(long, "getdents",
3101                  unsigned int, fd, struct linux_dirent *, dirp,
3102                  unsigned int, count);
3103    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3104 }
3105 
POST(sys_getdents)3106 POST(sys_getdents)
3107 {
3108    vg_assert(SUCCESS);
3109    if (RES > 0)
3110       POST_MEM_WRITE( ARG2, RES );
3111 }
3112 
PRE(sys_getdents64)3113 PRE(sys_getdents64)
3114 {
3115    *flags |= SfMayBlock;
3116    PRINT("sys_getdents64 ( %ld, %#lx, %ld )",ARG1,ARG2,ARG3);
3117    PRE_REG_READ3(long, "getdents64",
3118                  unsigned int, fd, struct linux_dirent64 *, dirp,
3119                  unsigned int, count);
3120    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3121 }
3122 
POST(sys_getdents64)3123 POST(sys_getdents64)
3124 {
3125    vg_assert(SUCCESS);
3126    if (RES > 0)
3127       POST_MEM_WRITE( ARG2, RES );
3128 }
3129 
PRE(sys_getgroups)3130 PRE(sys_getgroups)
3131 {
3132    PRINT("sys_getgroups ( %ld, %#lx )", ARG1, ARG2);
3133    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3134    if (ARG1 > 0)
3135       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3136 }
3137 
POST(sys_getgroups)3138 POST(sys_getgroups)
3139 {
3140    vg_assert(SUCCESS);
3141    if (ARG1 > 0 && RES > 0)
3142       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3143 }
3144 
PRE(sys_getcwd)3145 PRE(sys_getcwd)
3146 {
3147    // Comment from linux/fs/dcache.c:
3148    //   NOTE! The user-level library version returns a character pointer.
3149    //   The kernel system call just returns the length of the buffer filled
3150    //   (which includes the ending '\0' character), or a negative error
3151    //   value.
3152    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3153    PRINT("sys_getcwd ( %#lx, %llu )", ARG1,(ULong)ARG2);
3154    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3155    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3156 }
3157 
POST(sys_getcwd)3158 POST(sys_getcwd)
3159 {
3160    vg_assert(SUCCESS);
3161    if (RES != (Addr)NULL)
3162       POST_MEM_WRITE( ARG1, RES );
3163 }
3164 
PRE(sys_geteuid)3165 PRE(sys_geteuid)
3166 {
3167    PRINT("sys_geteuid ( )");
3168    PRE_REG_READ0(long, "geteuid");
3169 }
3170 
PRE(sys_getegid)3171 PRE(sys_getegid)
3172 {
3173    PRINT("sys_getegid ( )");
3174    PRE_REG_READ0(long, "getegid");
3175 }
3176 
PRE(sys_getgid)3177 PRE(sys_getgid)
3178 {
3179    PRINT("sys_getgid ( )");
3180    PRE_REG_READ0(long, "getgid");
3181 }
3182 
PRE(sys_getpid)3183 PRE(sys_getpid)
3184 {
3185    PRINT("sys_getpid ()");
3186    PRE_REG_READ0(long, "getpid");
3187 }
3188 
PRE(sys_getpgid)3189 PRE(sys_getpgid)
3190 {
3191    PRINT("sys_getpgid ( %ld )", ARG1);
3192    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3193 }
3194 
PRE(sys_getpgrp)3195 PRE(sys_getpgrp)
3196 {
3197    PRINT("sys_getpgrp ()");
3198    PRE_REG_READ0(long, "getpgrp");
3199 }
3200 
PRE(sys_getppid)3201 PRE(sys_getppid)
3202 {
3203    PRINT("sys_getppid ()");
3204    PRE_REG_READ0(long, "getppid");
3205 }
3206 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3207 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3208 {
3209    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3210 
3211 #ifdef _RLIMIT_POSIX_FLAG
3212    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3213    // Unset it here to make the switch case below work correctly.
3214    a1 &= ~_RLIMIT_POSIX_FLAG;
3215 #endif
3216 
3217    switch (a1) {
3218    case VKI_RLIMIT_NOFILE:
3219       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3220       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3221       break;
3222 
3223    case VKI_RLIMIT_DATA:
3224       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3225       break;
3226 
3227    case VKI_RLIMIT_STACK:
3228       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3229       break;
3230    }
3231 }
3232 
PRE(sys_old_getrlimit)3233 PRE(sys_old_getrlimit)
3234 {
3235    PRINT("sys_old_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3236    PRE_REG_READ2(long, "old_getrlimit",
3237                  unsigned int, resource, struct rlimit *, rlim);
3238    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3239 }
3240 
POST(sys_old_getrlimit)3241 POST(sys_old_getrlimit)
3242 {
3243    common_post_getrlimit(tid, ARG1, ARG2);
3244 }
3245 
PRE(sys_getrlimit)3246 PRE(sys_getrlimit)
3247 {
3248    PRINT("sys_getrlimit ( %ld, %#lx )", ARG1,ARG2);
3249    PRE_REG_READ2(long, "getrlimit",
3250                  unsigned int, resource, struct rlimit *, rlim);
3251    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3252 }
3253 
POST(sys_getrlimit)3254 POST(sys_getrlimit)
3255 {
3256    common_post_getrlimit(tid, ARG1, ARG2);
3257 }
3258 
PRE(sys_getrusage)3259 PRE(sys_getrusage)
3260 {
3261    PRINT("sys_getrusage ( %ld, %#lx )", ARG1,ARG2);
3262    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3263    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3264 }
3265 
POST(sys_getrusage)3266 POST(sys_getrusage)
3267 {
3268    vg_assert(SUCCESS);
3269    if (RES == 0)
3270       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3271 }
3272 
PRE(sys_gettimeofday)3273 PRE(sys_gettimeofday)
3274 {
3275    PRINT("sys_gettimeofday ( %#lx, %#lx )", ARG1,ARG2);
3276    PRE_REG_READ2(long, "gettimeofday",
3277                  struct timeval *, tv, struct timezone *, tz);
3278    // GrP fixme does darwin write to *tz anymore?
3279    if (ARG1 != 0)
3280       PRE_timeval_WRITE( "gettimeofday(tv)", ARG1 );
3281    if (ARG2 != 0)
3282       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3283 }
3284 
POST(sys_gettimeofday)3285 POST(sys_gettimeofday)
3286 {
3287    vg_assert(SUCCESS);
3288    if (RES == 0) {
3289       if (ARG1 != 0)
3290          POST_timeval_WRITE( ARG1 );
3291       if (ARG2 != 0)
3292 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3293    }
3294 }
3295 
PRE(sys_settimeofday)3296 PRE(sys_settimeofday)
3297 {
3298    PRINT("sys_settimeofday ( %#lx, %#lx )", ARG1,ARG2);
3299    PRE_REG_READ2(long, "settimeofday",
3300                  struct timeval *, tv, struct timezone *, tz);
3301    if (ARG1 != 0)
3302       PRE_timeval_READ( "settimeofday(tv)", ARG1 );
3303    if (ARG2 != 0) {
3304       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3305       /* maybe should warn if tz->tz_dsttime is non-zero? */
3306    }
3307 }
3308 
PRE(sys_getuid)3309 PRE(sys_getuid)
3310 {
3311    PRINT("sys_getuid ( )");
3312    PRE_REG_READ0(long, "getuid");
3313 }
3314 
ML_(PRE_unknown_ioctl)3315 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3316 {
3317    /* We don't have any specific information on it, so
3318       try to do something reasonable based on direction and
3319       size bits.  The encoding scheme is described in
3320       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3321 
3322       According to Simon Hausmann, _IOC_READ means the kernel
3323       writes a value to the ioctl value passed from the user
3324       space and the other way around with _IOC_WRITE. */
3325 
3326    UInt dir  = _VKI_IOC_DIR(request);
3327    UInt size = _VKI_IOC_SIZE(request);
3328    if (VG_(strstr)(VG_(clo_sim_hints), "lax-ioctls") != NULL) {
3329       /*
3330        * Be very lax about ioctl handling; the only
3331        * assumption is that the size is correct. Doesn't
3332        * require the full buffer to be initialized when
3333        * writing.  Without this, using some device
3334        * drivers with a large number of strange ioctl
3335        * commands becomes very tiresome.
3336        */
3337    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3338       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3339       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3340       static Int moans = 3;
3341       if (moans > 0 && !VG_(clo_xml)) {
3342          moans--;
3343          VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3344                    " with no size/direction hints\n", request);
3345          VG_(umsg)("   This could cause spurious value errors to appear.\n");
3346          VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3347                    "guidance on writing a proper wrapper.\n" );
3348       }
3349    } else {
3350       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3351       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3352       if ((dir & _VKI_IOC_WRITE) && size > 0)
3353          PRE_MEM_READ( "ioctl(generic)", arg, size);
3354       if ((dir & _VKI_IOC_READ) && size > 0)
3355          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3356    }
3357 }
3358 
ML_(POST_unknown_ioctl)3359 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3360 {
3361    /* We don't have any specific information on it, so
3362       try to do something reasonable based on direction and
3363       size bits.  The encoding scheme is described in
3364       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3365 
3366       According to Simon Hausmann, _IOC_READ means the kernel
3367       writes a value to the ioctl value passed from the user
3368       space and the other way around with _IOC_WRITE. */
3369 
3370    UInt dir  = _VKI_IOC_DIR(request);
3371    UInt size = _VKI_IOC_SIZE(request);
3372    if (size > 0 && (dir & _VKI_IOC_READ)
3373        && res == 0
3374        && arg != (Addr)NULL)
3375    {
3376       POST_MEM_WRITE(arg, size);
3377    }
3378 }
3379 
3380 /*
3381    If we're sending a SIGKILL to one of our own threads, then simulate
3382    it rather than really sending the signal, so that the target thread
3383    gets a chance to clean up.  Returns True if we did the killing (or
3384    no killing is necessary), and False if the caller should use the
3385    normal kill syscall.
3386 
3387    "pid" is any pid argument which can be passed to kill; group kills
3388    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3389    they'll most likely hit all the threads and we won't need to worry
3390    about cleanup.  In truth, we can't fully emulate these multicast
3391    kills.
3392 
3393    "tgid" is a thread group id.  If it is not -1, then the target
3394    thread must be in that thread group.
3395  */
ML_(do_sigkill)3396 Bool ML_(do_sigkill)(Int pid, Int tgid)
3397 {
3398    ThreadState *tst;
3399    ThreadId tid;
3400 
3401    if (pid <= 0)
3402       return False;
3403 
3404    tid = VG_(lwpid_to_vgtid)(pid);
3405    if (tid == VG_INVALID_THREADID)
3406       return False;		/* none of our threads */
3407 
3408    tst = VG_(get_ThreadState)(tid);
3409    if (tst == NULL || tst->status == VgTs_Empty)
3410       return False;		/* hm, shouldn't happen */
3411 
3412    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3413       return False;		/* not the right thread group */
3414 
3415    /* Check to see that the target isn't already exiting. */
3416    if (!VG_(is_exiting)(tid)) {
3417       if (VG_(clo_trace_signals))
3418 	 VG_(message)(Vg_DebugMsg,
3419                       "Thread %d being killed with SIGKILL\n",
3420                       tst->tid);
3421 
3422       tst->exitreason = VgSrc_FatalSig;
3423       tst->os_state.fatalsig = VKI_SIGKILL;
3424 
3425       if (!VG_(is_running_thread)(tid))
3426 	 VG_(get_thread_out_of_syscall)(tid);
3427    }
3428 
3429    return True;
3430 }
3431 
PRE(sys_kill)3432 PRE(sys_kill)
3433 {
3434    PRINT("sys_kill ( %ld, %ld )", ARG1,ARG2);
3435    PRE_REG_READ2(long, "kill", int, pid, int, sig);
3436    if (!ML_(client_signal_OK)(ARG2)) {
3437       SET_STATUS_Failure( VKI_EINVAL );
3438       return;
3439    }
3440 
3441    /* If we're sending SIGKILL, check to see if the target is one of
3442       our threads and handle it specially. */
3443    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3444       SET_STATUS_Success(0);
3445    else
3446       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3447          affecting how posix-compliant the call is.  I guess it is
3448          harmless to pass the 3rd arg on other platforms; hence pass
3449          it on all. */
3450       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3451 
3452    if (VG_(clo_trace_signals))
3453       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3454 		   ARG2, ARG1);
3455 
3456    /* This kill might have given us a pending signal.  Ask for a check once
3457       the syscall is done. */
3458    *flags |= SfPollAfter;
3459 }
3460 
PRE(sys_link)3461 PRE(sys_link)
3462 {
3463    *flags |= SfMayBlock;
3464    PRINT("sys_link ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3465    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3466    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3467    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3468 }
3469 
PRE(sys_newlstat)3470 PRE(sys_newlstat)
3471 {
3472    PRINT("sys_newlstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3473    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3474    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3475    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3476 }
3477 
POST(sys_newlstat)3478 POST(sys_newlstat)
3479 {
3480    vg_assert(SUCCESS);
3481    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3482 }
3483 
PRE(sys_mkdir)3484 PRE(sys_mkdir)
3485 {
3486    *flags |= SfMayBlock;
3487    PRINT("sys_mkdir ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3488    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3489    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3490 }
3491 
PRE(sys_mprotect)3492 PRE(sys_mprotect)
3493 {
3494    PRINT("sys_mprotect ( %#lx, %llu, %ld )", ARG1,(ULong)ARG2,ARG3);
3495    PRE_REG_READ3(long, "mprotect",
3496                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3497 
3498    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3499       SET_STATUS_Failure( VKI_ENOMEM );
3500    }
3501 #if defined(VKI_PROT_GROWSDOWN)
3502    else
3503    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3504       /* Deal with mprotects on growable stack areas.
3505 
3506          The critical files to understand all this are mm/mprotect.c
3507          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3508          glibc.
3509 
3510          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3511          round the start/end address of mprotect to the start/end of
3512          the underlying vma and glibc uses that as an easy way to
3513          change the protection of the stack by calling mprotect on the
3514          last page of the stack with PROT_GROWSDOWN set.
3515 
3516          The sanity check provided by the kernel is that the vma must
3517          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3518       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3519       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3520       NSegment const *rseg;
3521 
3522       vg_assert(aseg);
3523 
3524       if (grows == VKI_PROT_GROWSDOWN) {
3525          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, False/*backwards*/ );
3526          if (rseg &&
3527              rseg->kind == SkResvn &&
3528              rseg->smode == SmUpper &&
3529              rseg->end+1 == aseg->start) {
3530             Addr end = ARG1 + ARG2;
3531             ARG1 = aseg->start;
3532             ARG2 = end - aseg->start;
3533             ARG3 &= ~VKI_PROT_GROWSDOWN;
3534          } else {
3535             SET_STATUS_Failure( VKI_EINVAL );
3536          }
3537       } else if (grows == VKI_PROT_GROWSUP) {
3538          rseg = VG_(am_next_nsegment)( (NSegment*)aseg, True/*forwards*/ );
3539          if (rseg &&
3540              rseg->kind == SkResvn &&
3541              rseg->smode == SmLower &&
3542              aseg->end+1 == rseg->start) {
3543             ARG2 = aseg->end - ARG1 + 1;
3544             ARG3 &= ~VKI_PROT_GROWSUP;
3545          } else {
3546             SET_STATUS_Failure( VKI_EINVAL );
3547          }
3548       } else {
3549          /* both GROWSUP and GROWSDOWN */
3550          SET_STATUS_Failure( VKI_EINVAL );
3551       }
3552    }
3553 #endif   // defined(VKI_PROT_GROWSDOWN)
3554 }
3555 
POST(sys_mprotect)3556 POST(sys_mprotect)
3557 {
3558    Addr a    = ARG1;
3559    SizeT len = ARG2;
3560    Int  prot = ARG3;
3561 
3562    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3563 }
3564 
PRE(sys_munmap)3565 PRE(sys_munmap)
3566 {
3567    if (0) VG_(printf)("  munmap( %#lx )\n", ARG1);
3568    PRINT("sys_munmap ( %#lx, %llu )", ARG1,(ULong)ARG2);
3569    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3570 
3571    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3572       SET_STATUS_Failure( VKI_EINVAL );
3573 }
3574 
POST(sys_munmap)3575 POST(sys_munmap)
3576 {
3577    Addr  a   = ARG1;
3578    SizeT len = ARG2;
3579 
3580    ML_(notify_core_and_tool_of_munmap)( (Addr64)a, (ULong)len );
3581 }
3582 
PRE(sys_mincore)3583 PRE(sys_mincore)
3584 {
3585    PRINT("sys_mincore ( %#lx, %llu, %#lx )", ARG1,(ULong)ARG2,ARG3);
3586    PRE_REG_READ3(long, "mincore",
3587                  unsigned long, start, vki_size_t, length,
3588                  unsigned char *, vec);
3589    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3590 }
POST(sys_mincore)3591 POST(sys_mincore)
3592 {
3593    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3594 }
3595 
PRE(sys_nanosleep)3596 PRE(sys_nanosleep)
3597 {
3598    *flags |= SfMayBlock|SfPostOnFail;
3599    PRINT("sys_nanosleep ( %#lx, %#lx )", ARG1,ARG2);
3600    PRE_REG_READ2(long, "nanosleep",
3601                  struct timespec *, req, struct timespec *, rem);
3602    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
3603    if (ARG2 != 0)
3604       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
3605 }
3606 
POST(sys_nanosleep)3607 POST(sys_nanosleep)
3608 {
3609    vg_assert(SUCCESS || FAILURE);
3610    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
3611       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
3612 }
3613 
PRE(sys_open)3614 PRE(sys_open)
3615 {
3616    if (ARG2 & VKI_O_CREAT) {
3617       // 3-arg version
3618       PRINT("sys_open ( %#lx(%s), %ld, %ld )",ARG1,(char*)ARG1,ARG2,ARG3);
3619       PRE_REG_READ3(long, "open",
3620                     const char *, filename, int, flags, int, mode);
3621    } else {
3622       // 2-arg version
3623       PRINT("sys_open ( %#lx(%s), %ld )",ARG1,(char*)ARG1,ARG2);
3624       PRE_REG_READ2(long, "open",
3625                     const char *, filename, int, flags);
3626    }
3627    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
3628 
3629 #if defined(VGO_linux)
3630    /* Handle the case where the open is of /proc/self/cmdline or
3631       /proc/<pid>/cmdline, and just give it a copy of the fd for the
3632       fake file we cooked up at startup (in m_main).  Also, seek the
3633       cloned fd back to the start. */
3634    {
3635       HChar  name[30];
3636       Char*  arg1s = (Char*) ARG1;
3637       SysRes sres;
3638 
3639       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
3640       if (ML_(safe_to_deref)( arg1s, 1 ) &&
3641           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))
3642          )
3643       {
3644          sres = VG_(dup)( VG_(cl_cmdline_fd) );
3645          SET_STATUS_from_SysRes( sres );
3646          if (!sr_isError(sres)) {
3647             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
3648             if (off < 0)
3649                SET_STATUS_Failure( VKI_EMFILE );
3650          }
3651          return;
3652       }
3653    }
3654 #endif // defined(VGO_linux)
3655 
3656    /* Otherwise handle normally */
3657    *flags |= SfMayBlock;
3658 }
3659 
POST(sys_open)3660 POST(sys_open)
3661 {
3662    vg_assert(SUCCESS);
3663    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
3664       VG_(close)(RES);
3665       SET_STATUS_Failure( VKI_EMFILE );
3666    } else {
3667       if (VG_(clo_track_fds))
3668          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3669    }
3670 }
3671 
PRE(sys_read)3672 PRE(sys_read)
3673 {
3674    *flags |= SfMayBlock;
3675    PRINT("sys_read ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3676    PRE_REG_READ3(ssize_t, "read",
3677                  unsigned int, fd, char *, buf, vki_size_t, count);
3678 
3679    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
3680       SET_STATUS_Failure( VKI_EBADF );
3681    else
3682       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
3683 }
3684 
POST(sys_read)3685 POST(sys_read)
3686 {
3687    vg_assert(SUCCESS);
3688    POST_MEM_WRITE( ARG2, RES );
3689 }
3690 
PRE(sys_write)3691 PRE(sys_write)
3692 {
3693    Bool ok;
3694    *flags |= SfMayBlock;
3695    PRINT("sys_write ( %ld, %#lx, %llu )", ARG1, ARG2, (ULong)ARG3);
3696    PRE_REG_READ3(ssize_t, "write",
3697                  unsigned int, fd, const char *, buf, vki_size_t, count);
3698    /* check to see if it is allowed.  If not, try for an exemption from
3699       --sim-hints=enable-outer (used for self hosting). */
3700    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
3701    if (!ok && ARG1 == 2/*stderr*/
3702            && VG_(strstr)(VG_(clo_sim_hints),"enable-outer"))
3703       ok = True;
3704    if (!ok)
3705       SET_STATUS_Failure( VKI_EBADF );
3706    else
3707       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
3708 }
3709 
PRE(sys_creat)3710 PRE(sys_creat)
3711 {
3712    *flags |= SfMayBlock;
3713    PRINT("sys_creat ( %#lx(%s), %ld )", ARG1,(char*)ARG1,ARG2);
3714    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
3715    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
3716 }
3717 
POST(sys_creat)3718 POST(sys_creat)
3719 {
3720    vg_assert(SUCCESS);
3721    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
3722       VG_(close)(RES);
3723       SET_STATUS_Failure( VKI_EMFILE );
3724    } else {
3725       if (VG_(clo_track_fds))
3726          ML_(record_fd_open_with_given_name)(tid, RES, (Char*)ARG1);
3727    }
3728 }
3729 
PRE(sys_poll)3730 PRE(sys_poll)
3731 {
3732    /* struct pollfd {
3733         int fd;           -- file descriptor
3734         short events;     -- requested events
3735         short revents;    -- returned events
3736       };
3737       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
3738    */
3739    UInt i;
3740    struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3741    *flags |= SfMayBlock;
3742    PRINT("sys_poll ( %#lx, %ld, %ld )\n", ARG1,ARG2,ARG3);
3743    PRE_REG_READ3(long, "poll",
3744                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
3745 
3746    for (i = 0; i < ARG2; i++) {
3747       PRE_MEM_READ( "poll(ufds.fd)",
3748                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
3749       PRE_MEM_READ( "poll(ufds.events)",
3750                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
3751       PRE_MEM_WRITE( "poll(ufds.reventss)",
3752                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3753    }
3754 }
3755 
POST(sys_poll)3756 POST(sys_poll)
3757 {
3758    if (RES >= 0) {
3759       UInt i;
3760       struct vki_pollfd* ufds = (struct vki_pollfd *)ARG1;
3761       for (i = 0; i < ARG2; i++)
3762 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
3763    }
3764 }
3765 
PRE(sys_readlink)3766 PRE(sys_readlink)
3767 {
3768    FUSE_COMPATIBLE_MAY_BLOCK();
3769    Word saved = SYSNO;
3770 
3771    PRINT("sys_readlink ( %#lx(%s), %#lx, %llu )", ARG1,(char*)ARG1,ARG2,(ULong)ARG3);
3772    PRE_REG_READ3(long, "readlink",
3773                  const char *, path, char *, buf, int, bufsiz);
3774    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
3775    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
3776 
3777    {
3778 #if defined(VGO_linux)
3779       /*
3780        * Handle the case where readlink is looking at /proc/self/exe or
3781        * /proc/<pid>/exe.
3782        */
3783       HChar name[25];
3784       Char* arg1s = (Char*) ARG1;
3785       VG_(sprintf)(name, "/proc/%d/exe", VG_(getpid)());
3786       if (ML_(safe_to_deref)(arg1s, 1) &&
3787           (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/exe"))
3788          )
3789       {
3790          VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_exec_fd));
3791          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
3792                                                          ARG2, ARG3));
3793       } else
3794 #endif // defined(VGO_linux)
3795       {
3796          /* Normal case */
3797          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
3798       }
3799    }
3800 
3801    if (SUCCESS && RES > 0)
3802       POST_MEM_WRITE( ARG2, RES );
3803 }
3804 
PRE(sys_readv)3805 PRE(sys_readv)
3806 {
3807    Int i;
3808    struct vki_iovec * vec;
3809    *flags |= SfMayBlock;
3810    PRINT("sys_readv ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
3811    PRE_REG_READ3(ssize_t, "readv",
3812                  unsigned long, fd, const struct iovec *, vector,
3813                  unsigned long, count);
3814    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
3815       SET_STATUS_Failure( VKI_EBADF );
3816    } else {
3817       PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
3818 
3819       if (ARG2 != 0) {
3820          /* ToDo: don't do any of the following if the vector is invalid */
3821          vec = (struct vki_iovec *)ARG2;
3822          for (i = 0; i < (Int)ARG3; i++)
3823             PRE_MEM_WRITE( "readv(vector[...])",
3824                            (Addr)vec[i].iov_base, vec[i].iov_len );
3825       }
3826    }
3827 }
3828 
POST(sys_readv)3829 POST(sys_readv)
3830 {
3831    vg_assert(SUCCESS);
3832    if (RES > 0) {
3833       Int i;
3834       struct vki_iovec * vec = (struct vki_iovec *)ARG2;
3835       Int remains = RES;
3836 
3837       /* RES holds the number of bytes read. */
3838       for (i = 0; i < (Int)ARG3; i++) {
3839 	 Int nReadThisBuf = vec[i].iov_len;
3840 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
3841 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
3842 	 remains -= nReadThisBuf;
3843 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
3844       }
3845    }
3846 }
3847 
PRE(sys_rename)3848 PRE(sys_rename)
3849 {
3850    FUSE_COMPATIBLE_MAY_BLOCK();
3851    PRINT("sys_rename ( %#lx(%s), %#lx(%s) )", ARG1,(char*)ARG1,ARG2,(char*)ARG2);
3852    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
3853    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
3854    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
3855 }
3856 
PRE(sys_rmdir)3857 PRE(sys_rmdir)
3858 {
3859    *flags |= SfMayBlock;
3860    PRINT("sys_rmdir ( %#lx(%s) )", ARG1,(char*)ARG1);
3861    PRE_REG_READ1(long, "rmdir", const char *, pathname);
3862    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
3863 }
3864 
PRE(sys_select)3865 PRE(sys_select)
3866 {
3867    *flags |= SfMayBlock;
3868    PRINT("sys_select ( %ld, %#lx, %#lx, %#lx, %#lx )", ARG1,ARG2,ARG3,ARG4,ARG5);
3869    PRE_REG_READ5(long, "select",
3870                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
3871                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
3872    // XXX: this possibly understates how much memory is read.
3873    if (ARG2 != 0)
3874       PRE_MEM_READ( "select(readfds)",
3875 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
3876    if (ARG3 != 0)
3877       PRE_MEM_READ( "select(writefds)",
3878 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
3879    if (ARG4 != 0)
3880       PRE_MEM_READ( "select(exceptfds)",
3881 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
3882    if (ARG5 != 0)
3883       PRE_timeval_READ( "select(timeout)", ARG5 );
3884 }
3885 
PRE(sys_setgid)3886 PRE(sys_setgid)
3887 {
3888    PRINT("sys_setgid ( %ld )", ARG1);
3889    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
3890 }
3891 
PRE(sys_setsid)3892 PRE(sys_setsid)
3893 {
3894    PRINT("sys_setsid ( )");
3895    PRE_REG_READ0(long, "setsid");
3896 }
3897 
PRE(sys_setgroups)3898 PRE(sys_setgroups)
3899 {
3900    PRINT("setgroups ( %llu, %#lx )", (ULong)ARG1, ARG2);
3901    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
3902    if (ARG1 > 0)
3903       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3904 }
3905 
PRE(sys_setpgid)3906 PRE(sys_setpgid)
3907 {
3908    PRINT("setpgid ( %ld, %ld )", ARG1, ARG2);
3909    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
3910 }
3911 
PRE(sys_setregid)3912 PRE(sys_setregid)
3913 {
3914    PRINT("sys_setregid ( %ld, %ld )", ARG1, ARG2);
3915    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
3916 }
3917 
PRE(sys_setreuid)3918 PRE(sys_setreuid)
3919 {
3920    PRINT("sys_setreuid ( 0x%lx, 0x%lx )", ARG1, ARG2);
3921    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
3922 }
3923 
PRE(sys_setrlimit)3924 PRE(sys_setrlimit)
3925 {
3926    UWord arg1 = ARG1;
3927    PRINT("sys_setrlimit ( %ld, %#lx )", ARG1,ARG2);
3928    PRE_REG_READ2(long, "setrlimit",
3929                  unsigned int, resource, struct rlimit *, rlim);
3930    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3931 
3932 #ifdef _RLIMIT_POSIX_FLAG
3933    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
3934    // Unset it here to make the if statements below work correctly.
3935    arg1 &= ~_RLIMIT_POSIX_FLAG;
3936 #endif
3937 
3938    if (arg1 == VKI_RLIMIT_NOFILE) {
3939       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
3940           ((struct vki_rlimit *)ARG2)->rlim_max != VG_(fd_hard_limit)) {
3941          SET_STATUS_Failure( VKI_EPERM );
3942       }
3943       else {
3944          VG_(fd_soft_limit) = ((struct vki_rlimit *)ARG2)->rlim_cur;
3945          SET_STATUS_Success( 0 );
3946       }
3947    }
3948    else if (arg1 == VKI_RLIMIT_DATA) {
3949       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_data).rlim_max ||
3950           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_data).rlim_max) {
3951          SET_STATUS_Failure( VKI_EPERM );
3952       }
3953       else {
3954          VG_(client_rlimit_data) = *(struct vki_rlimit *)ARG2;
3955          SET_STATUS_Success( 0 );
3956       }
3957    }
3958    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
3959       if (((struct vki_rlimit *)ARG2)->rlim_cur > VG_(client_rlimit_stack).rlim_max ||
3960           ((struct vki_rlimit *)ARG2)->rlim_max > VG_(client_rlimit_stack).rlim_max) {
3961          SET_STATUS_Failure( VKI_EPERM );
3962       }
3963       else {
3964          VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)ARG2)->rlim_cur;
3965          VG_(client_rlimit_stack) = *(struct vki_rlimit *)ARG2;
3966          SET_STATUS_Success( 0 );
3967       }
3968    }
3969 }
3970 
PRE(sys_setuid)3971 PRE(sys_setuid)
3972 {
3973    PRINT("sys_setuid ( %ld )", ARG1);
3974    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
3975 }
3976 
PRE(sys_newstat)3977 PRE(sys_newstat)
3978 {
3979    PRINT("sys_newstat ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
3980    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
3981    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
3982    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
3983 }
3984 
POST(sys_newstat)3985 POST(sys_newstat)
3986 {
3987    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3988 }
3989 
PRE(sys_statfs)3990 PRE(sys_statfs)
3991 {
3992    PRINT("sys_statfs ( %#lx(%s), %#lx )",ARG1,(char*)ARG1,ARG2);
3993    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
3994    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
3995    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
3996 }
POST(sys_statfs)3997 POST(sys_statfs)
3998 {
3999    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4000 }
4001 
PRE(sys_statfs64)4002 PRE(sys_statfs64)
4003 {
4004    PRINT("sys_statfs64 ( %#lx(%s), %llu, %#lx )",ARG1,(char*)ARG1,(ULong)ARG2,ARG3);
4005    PRE_REG_READ3(long, "statfs64",
4006                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
4007    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4008    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4009 }
POST(sys_statfs64)4010 POST(sys_statfs64)
4011 {
4012    POST_MEM_WRITE( ARG3, ARG2 );
4013 }
4014 
PRE(sys_symlink)4015 PRE(sys_symlink)
4016 {
4017    *flags |= SfMayBlock;
4018    PRINT("sys_symlink ( %#lx(%s), %#lx(%s) )",ARG1,(char*)ARG1,ARG2,(char*)ARG2);
4019    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4020    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4021    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4022 }
4023 
PRE(sys_time)4024 PRE(sys_time)
4025 {
4026    /* time_t time(time_t *t); */
4027    PRINT("sys_time ( %#lx )",ARG1);
4028    PRE_REG_READ1(long, "time", int *, t);
4029    if (ARG1 != 0) {
4030       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4031    }
4032 }
4033 
POST(sys_time)4034 POST(sys_time)
4035 {
4036    if (ARG1 != 0) {
4037       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4038    }
4039 }
4040 
PRE(sys_times)4041 PRE(sys_times)
4042 {
4043    PRINT("sys_times ( %#lx )", ARG1);
4044    PRE_REG_READ1(long, "times", struct tms *, buf);
4045    if (ARG1 != 0) {
4046       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4047    }
4048 }
4049 
POST(sys_times)4050 POST(sys_times)
4051 {
4052    if (ARG1 != 0) {
4053       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4054    }
4055 }
4056 
PRE(sys_umask)4057 PRE(sys_umask)
4058 {
4059    PRINT("sys_umask ( %ld )", ARG1);
4060    PRE_REG_READ1(long, "umask", int, mask);
4061 }
4062 
PRE(sys_unlink)4063 PRE(sys_unlink)
4064 {
4065    *flags |= SfMayBlock;
4066    PRINT("sys_unlink ( %#lx(%s) )", ARG1,(char*)ARG1);
4067    PRE_REG_READ1(long, "unlink", const char *, pathname);
4068    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4069 }
4070 
PRE(sys_newuname)4071 PRE(sys_newuname)
4072 {
4073    PRINT("sys_newuname ( %#lx )", ARG1);
4074    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4075    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4076 }
4077 
POST(sys_newuname)4078 POST(sys_newuname)
4079 {
4080    if (ARG1 != 0) {
4081       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4082    }
4083 }
4084 
PRE(sys_waitpid)4085 PRE(sys_waitpid)
4086 {
4087    *flags |= SfMayBlock;
4088    PRINT("sys_waitpid ( %ld, %#lx, %ld )", ARG1,ARG2,ARG3);
4089    PRE_REG_READ3(long, "waitpid",
4090                  vki_pid_t, pid, unsigned int *, status, int, options);
4091 
4092    if (ARG2 != (Addr)NULL)
4093       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4094 }
4095 
POST(sys_waitpid)4096 POST(sys_waitpid)
4097 {
4098    if (ARG2 != (Addr)NULL)
4099       POST_MEM_WRITE( ARG2, sizeof(int) );
4100 }
4101 
PRE(sys_wait4)4102 PRE(sys_wait4)
4103 {
4104    *flags |= SfMayBlock;
4105    PRINT("sys_wait4 ( %ld, %#lx, %ld, %#lx )", ARG1,ARG2,ARG3,ARG4);
4106 
4107    PRE_REG_READ4(long, "wait4",
4108                  vki_pid_t, pid, unsigned int *, status, int, options,
4109                  struct rusage *, rusage);
4110    if (ARG2 != (Addr)NULL)
4111       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4112    if (ARG4 != (Addr)NULL)
4113       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4114 }
4115 
POST(sys_wait4)4116 POST(sys_wait4)
4117 {
4118    if (ARG2 != (Addr)NULL)
4119       POST_MEM_WRITE( ARG2, sizeof(int) );
4120    if (ARG4 != (Addr)NULL)
4121       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4122 }
4123 
PRE(sys_writev)4124 PRE(sys_writev)
4125 {
4126    Int i;
4127    struct vki_iovec * vec;
4128    *flags |= SfMayBlock;
4129    PRINT("sys_writev ( %ld, %#lx, %llu )",ARG1,ARG2,(ULong)ARG3);
4130    PRE_REG_READ3(ssize_t, "writev",
4131                  unsigned long, fd, const struct iovec *, vector,
4132                  unsigned long, count);
4133    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4134       SET_STATUS_Failure( VKI_EBADF );
4135    } else {
4136       PRE_MEM_READ( "writev(vector)",
4137 		     ARG2, ARG3 * sizeof(struct vki_iovec) );
4138       if (ARG2 != 0) {
4139          /* ToDo: don't do any of the following if the vector is invalid */
4140          vec = (struct vki_iovec *)ARG2;
4141          for (i = 0; i < (Int)ARG3; i++)
4142             PRE_MEM_READ( "writev(vector[...])",
4143                            (Addr)vec[i].iov_base, vec[i].iov_len );
4144       }
4145    }
4146 }
4147 
PRE(sys_utimes)4148 PRE(sys_utimes)
4149 {
4150    FUSE_COMPATIBLE_MAY_BLOCK();
4151    PRINT("sys_utimes ( %#lx(%s), %#lx )", ARG1,(char*)ARG1,ARG2);
4152    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4153    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4154    if (ARG2 != 0) {
4155       PRE_timeval_READ( "utimes(tvp[0])", ARG2 );
4156       PRE_timeval_READ( "utimes(tvp[1])", ARG2+sizeof(struct vki_timeval) );
4157    }
4158 }
4159 
PRE(sys_acct)4160 PRE(sys_acct)
4161 {
4162    PRINT("sys_acct ( %#lx(%s) )", ARG1,(char*)ARG1);
4163    PRE_REG_READ1(long, "acct", const char *, filename);
4164    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4165 }
4166 
PRE(sys_pause)4167 PRE(sys_pause)
4168 {
4169    *flags |= SfMayBlock;
4170    PRINT("sys_pause ( )");
4171    PRE_REG_READ0(long, "pause");
4172 }
4173 
PRE(sys_sigaltstack)4174 PRE(sys_sigaltstack)
4175 {
4176    PRINT("sigaltstack ( %#lx, %#lx )",ARG1,ARG2);
4177    PRE_REG_READ2(int, "sigaltstack",
4178                  const vki_stack_t *, ss, vki_stack_t *, oss);
4179    if (ARG1 != 0) {
4180       const vki_stack_t *ss = (vki_stack_t *)ARG1;
4181       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4182       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4183       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4184    }
4185    if (ARG2 != 0) {
4186       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4187    }
4188 
4189    SET_STATUS_from_SysRes(
4190       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)ARG1,
4191                               (vki_stack_t*)ARG2)
4192    );
4193 }
POST(sys_sigaltstack)4194 POST(sys_sigaltstack)
4195 {
4196    vg_assert(SUCCESS);
4197    if (RES == 0 && ARG2 != 0)
4198       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4199 }
4200 
4201 #undef PRE
4202 #undef POST
4203 
4204 #endif // defined(VGO_linux) || defined(VGO_darwin)
4205 
4206 /*--------------------------------------------------------------------*/
4207 /*--- end                                                          ---*/
4208 /*--------------------------------------------------------------------*/
4209