• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling.                               scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2012 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /*
32    Overview
33 
34    Valgrind tries to emulate the kernel's threading as closely as
35    possible.  The client does all threading via the normal syscalls
36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
37    the same process structure as would be created without Valgrind.
38    There are no extra threads.
39 
40    The main difference is that Valgrind only allows one client thread
41    to run at once.  This is controlled with the CPU Big Lock,
42    "the_BigLock".  Any time a thread wants to run client code or
43    manipulate any shared state (which is anything other than its own
44    ThreadState entry), it must hold the_BigLock.
45 
46    When a thread is about to block in a blocking syscall, it releases
47    the_BigLock, and re-takes it when it becomes runnable again (either
48    because the syscall finished, or we took a signal).
49 
50    VG_(scheduler) therefore runs in each thread.  It returns only when
51    the thread is exiting, either because it exited itself, or it was
52    told to exit by another thread.
53 
54    This file is almost entirely OS-independent.  The details of how
55    the OS handles threading and signalling are abstracted away and
56    implemented elsewhere.  [Some of the functions have worked their
57    way back for the moment, until we do an OS port in earnest...]
58 */
59 
60 
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h"    // __NR_sched_yield
65 #include "pub_core_libcsetjmp.h"   // to keep _threadstate.h happy
66 #include "pub_core_threadstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h"         // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h"      // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h"     // for VG_(gdbserver) and VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_signals.h"
85 #include "pub_core_stacks.h"
86 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
87 #include "pub_core_syscall.h"
88 #include "pub_core_syswrap.h"
89 #include "pub_core_tooliface.h"
90 #include "pub_core_translate.h"     // For VG_(translate)()
91 #include "pub_core_transtab.h"
92 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
93 #include "priv_sched-lock.h"
94 #include "pub_core_scheduler.h"     // self
95 #include "pub_core_redir.h"
96 
97 
98 /* ---------------------------------------------------------------------
99    Types and globals for the scheduler.
100    ------------------------------------------------------------------ */
101 
102 /* ThreadId and ThreadState are defined elsewhere*/
103 
104 /* Defines the thread-scheduling timeslice, in terms of the number of
105    basic blocks we attempt to run each thread for.  Smaller values
106    give finer interleaving but much increased scheduling overheads. */
107 #define SCHEDULING_QUANTUM   100000
108 
109 /* If False, a fault is Valgrind-internal (ie, a bug) */
110 Bool VG_(in_generated_code) = False;
111 
112 /* 64-bit counter for the number of basic blocks done. */
113 static ULong bbs_done = 0;
114 
115 /* Counter to see if vgdb activity is to be verified.
116    When nr of bbs done reaches vgdb_next_poll, scheduler will
117    poll for gdbserver activity. VG_(force_vgdb_poll) and
118    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
119    to control when the next poll will be done. */
120 static ULong vgdb_next_poll;
121 
122 /* Forwards */
123 static void do_client_request ( ThreadId tid );
124 static void scheduler_sanity ( ThreadId tid );
125 static void mostly_clear_thread_record ( ThreadId tid );
126 
127 /* Stats. */
128 static ULong n_scheduling_events_MINOR = 0;
129 static ULong n_scheduling_events_MAJOR = 0;
130 
131 /* Stats: number of XIndirs, and number that missed in the fast
132    cache. */
133 static ULong stats__n_xindirs = 0;
134 static ULong stats__n_xindir_misses = 0;
135 
136 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
137    have to do 64 bit incs on the hot path through
138    VG_(cp_disp_xindir). */
139 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
140 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
141 
142 /* Sanity checking counts. */
143 static UInt sanity_fast_count = 0;
144 static UInt sanity_slow_count = 0;
145 
VG_(print_scheduler_stats)146 void VG_(print_scheduler_stats)(void)
147 {
148    VG_(message)(Vg_DebugMsg,
149       "scheduler: %'llu event checks.\n", bbs_done );
150    VG_(message)(Vg_DebugMsg,
151                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
152                 stats__n_xindirs, stats__n_xindir_misses,
153                 stats__n_xindirs / (stats__n_xindir_misses
154                                     ? stats__n_xindir_misses : 1));
155    VG_(message)(Vg_DebugMsg,
156       "scheduler: %'llu/%'llu major/minor sched events.\n",
157       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
158    VG_(message)(Vg_DebugMsg,
159                 "   sanity: %d cheap, %d expensive checks.\n",
160                 sanity_fast_count, sanity_slow_count );
161 }
162 
163 /*
164  * Mutual exclusion object used to serialize threads.
165  */
166 static struct sched_lock *the_BigLock;
167 
168 
169 /* ---------------------------------------------------------------------
170    Helper functions for the scheduler.
171    ------------------------------------------------------------------ */
172 
173 static
print_sched_event(ThreadId tid,Char * what)174 void print_sched_event ( ThreadId tid, Char* what )
175 {
176    VG_(message)(Vg_DebugMsg, "  SCHED[%d]: %s\n", tid, what );
177 }
178 
179 /* For showing SB counts, if the user asks to see them. */
180 #define SHOW_SBCOUNT_EVERY (20ULL * 1000 * 1000)
181 static ULong bbs_done_lastcheck = 0;
182 
183 static
maybe_show_sb_counts(void)184 void maybe_show_sb_counts ( void )
185 {
186    Long delta = bbs_done - bbs_done_lastcheck;
187    vg_assert(delta >= 0);
188    if (UNLIKELY(delta >= SHOW_SBCOUNT_EVERY)) {
189       VG_(umsg)("%'lld superblocks executed\n", bbs_done);
190       bbs_done_lastcheck = bbs_done;
191    }
192 }
193 
194 static
name_of_sched_event(UInt event)195 HChar* name_of_sched_event ( UInt event )
196 {
197    switch (event) {
198       case VEX_TRC_JMP_TINVAL:         return "TINVAL";
199       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
200       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
201       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
202       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
203       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
204       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
205       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
206       case VEX_TRC_JMP_YIELD:          return "YIELD";
207       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
208       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
209       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
210       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
211       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
212       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
213       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
214       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
215       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
216 
217       case VG_TRC_BORING:              return "VG_BORING";
218       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
219       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
220       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
221       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
222       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
223       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
224       default:                         return "??UNKNOWN??";
225   }
226 }
227 
228 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)229 ThreadId VG_(alloc_ThreadState) ( void )
230 {
231    Int i;
232    for (i = 1; i < VG_N_THREADS; i++) {
233       if (VG_(threads)[i].status == VgTs_Empty) {
234 	 VG_(threads)[i].status = VgTs_Init;
235 	 VG_(threads)[i].exitreason = VgSrc_None;
236          return i;
237       }
238    }
239    VG_(printf)("vg_alloc_ThreadState: no free slots available\n");
240    VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n");
241    VG_(core_panic)("VG_N_THREADS is too low");
242    /*NOTREACHED*/
243 }
244 
245 /*
246    Mark a thread as Runnable.  This will block until the_BigLock is
247    available, so that we get exclusive access to all the shared
248    structures and the CPU.  Up until we get the_BigLock, we must not
249    touch any shared state.
250 
251    When this returns, we'll actually be running.
252  */
VG_(acquire_BigLock)253 void VG_(acquire_BigLock)(ThreadId tid, HChar* who)
254 {
255    ThreadState *tst;
256 
257 #if 0
258    if (VG_(clo_trace_sched)) {
259       HChar buf[100];
260       vg_assert(VG_(strlen)(who) <= 100-50);
261       VG_(sprintf)(buf, "waiting for lock (%s)", who);
262       print_sched_event(tid, buf);
263    }
264 #endif
265 
266    /* First, acquire the_BigLock.  We can't do anything else safely
267       prior to this point.  Even doing debug printing prior to this
268       point is, technically, wrong. */
269    VG_(acquire_BigLock_LL)(NULL);
270 
271    tst = VG_(get_ThreadState)(tid);
272 
273    vg_assert(tst->status != VgTs_Runnable);
274 
275    tst->status = VgTs_Runnable;
276 
277    if (VG_(running_tid) != VG_INVALID_THREADID)
278       VG_(printf)("tid %d found %d running\n", tid, VG_(running_tid));
279    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
280    VG_(running_tid) = tid;
281 
282    { Addr gsp = VG_(get_SP)(tid);
283      VG_(unknown_SP_update)(gsp, gsp, 0/*unknown origin*/);
284    }
285 
286    if (VG_(clo_trace_sched)) {
287       HChar buf[150];
288       vg_assert(VG_(strlen)(who) <= 150-50);
289       VG_(sprintf)(buf, " acquired lock (%s)", who);
290       print_sched_event(tid, buf);
291    }
292 }
293 
294 /*
295    Set a thread into a sleeping state, and give up exclusive access to
296    the CPU.  On return, the thread must be prepared to block until it
297    is ready to run again (generally this means blocking in a syscall,
298    but it may mean that we remain in a Runnable state and we're just
299    yielding the CPU to another thread).
300  */
VG_(release_BigLock)301 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate, HChar* who)
302 {
303    ThreadState *tst = VG_(get_ThreadState)(tid);
304 
305    vg_assert(tst->status == VgTs_Runnable);
306 
307    vg_assert(sleepstate == VgTs_WaitSys ||
308 	     sleepstate == VgTs_Yielding);
309 
310    tst->status = sleepstate;
311 
312    vg_assert(VG_(running_tid) == tid);
313    VG_(running_tid) = VG_INVALID_THREADID;
314 
315    if (VG_(clo_trace_sched)) {
316       Char buf[200];
317       vg_assert(VG_(strlen)(who) <= 200-100);
318       VG_(sprintf)(buf, "releasing lock (%s) -> %s",
319                         who, VG_(name_of_ThreadStatus)(sleepstate));
320       print_sched_event(tid, buf);
321    }
322 
323    /* Release the_BigLock; this will reschedule any runnable
324       thread. */
325    VG_(release_BigLock_LL)(NULL);
326 }
327 
init_BigLock(void)328 static void init_BigLock(void)
329 {
330    vg_assert(!the_BigLock);
331    the_BigLock = ML_(create_sched_lock)();
332 }
333 
deinit_BigLock(void)334 static void deinit_BigLock(void)
335 {
336    ML_(destroy_sched_lock)(the_BigLock);
337    the_BigLock = NULL;
338 }
339 
340 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)341 void VG_(acquire_BigLock_LL) ( HChar* who )
342 {
343    ML_(acquire_sched_lock)(the_BigLock);
344 }
345 
346 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)347 void VG_(release_BigLock_LL) ( HChar* who )
348 {
349    ML_(release_sched_lock)(the_BigLock);
350 }
351 
VG_(owns_BigLock_LL)352 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
353 {
354    return (ML_(get_sched_lock_owner)(the_BigLock)
355            == VG_(threads)[tid].os_state.lwpid);
356 }
357 
358 
359 /* Clear out the ThreadState and release the semaphore. Leaves the
360    ThreadState in VgTs_Zombie state, so that it doesn't get
361    reallocated until the caller is really ready. */
VG_(exit_thread)362 void VG_(exit_thread)(ThreadId tid)
363 {
364    vg_assert(VG_(is_valid_tid)(tid));
365    vg_assert(VG_(is_running_thread)(tid));
366    vg_assert(VG_(is_exiting)(tid));
367 
368    mostly_clear_thread_record(tid);
369    VG_(running_tid) = VG_INVALID_THREADID;
370 
371    /* There should still be a valid exitreason for this thread */
372    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
373 
374    if (VG_(clo_trace_sched))
375       print_sched_event(tid, "release lock in VG_(exit_thread)");
376 
377    VG_(release_BigLock_LL)(NULL);
378 }
379 
380 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
381    out of the syscall and onto doing the next thing, whatever that is.
382    If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)383 void VG_(get_thread_out_of_syscall)(ThreadId tid)
384 {
385    vg_assert(VG_(is_valid_tid)(tid));
386    vg_assert(!VG_(is_running_thread)(tid));
387 
388    if (VG_(threads)[tid].status == VgTs_WaitSys) {
389       if (VG_(clo_trace_signals)) {
390 	 VG_(message)(Vg_DebugMsg,
391                       "get_thread_out_of_syscall zaps tid %d lwp %d\n",
392 		      tid, VG_(threads)[tid].os_state.lwpid);
393       }
394 #     if defined(VGO_darwin)
395       {
396          // GrP fixme use mach primitives on darwin?
397          // GrP fixme thread_abort_safely?
398          // GrP fixme race for thread with WaitSys set but not in syscall yet?
399          extern kern_return_t thread_abort(mach_port_t);
400          thread_abort(VG_(threads)[tid].os_state.lwpid);
401       }
402 #     else
403       {
404          __attribute__((unused))
405          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
406          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
407             I'm really not sure.  Here's a race scenario which argues
408             that we shoudn't; but equally I'm not sure the scenario is
409             even possible, because of constraints caused by the question
410             of who holds the BigLock when.
411 
412             Target thread tid does sys_read on a socket and blocks.  This
413             function gets called, and we observe correctly that tid's
414             status is WaitSys but then for whatever reason this function
415             goes very slowly for a while.  Then data arrives from
416             wherever, tid's sys_read returns, tid exits.  Then we do
417             tkill on tid, but tid no longer exists; tkill returns an
418             error code and the assert fails. */
419          /* vg_assert(r == 0); */
420       }
421 #     endif
422    }
423 }
424 
425 /*
426    Yield the CPU for a short time to let some other thread run.
427  */
VG_(vg_yield)428 void VG_(vg_yield)(void)
429 {
430    ThreadId tid = VG_(running_tid);
431 
432    vg_assert(tid != VG_INVALID_THREADID);
433    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
434 
435    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
436 
437    /*
438       Tell the kernel we're yielding.
439     */
440    VG_(do_syscall0)(__NR_sched_yield);
441 
442    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
443 }
444 
445 
446 /* Set the standard set of blocked signals, used whenever we're not
447    running a client syscall. */
block_signals(void)448 static void block_signals(void)
449 {
450    vki_sigset_t mask;
451 
452    VG_(sigfillset)(&mask);
453 
454    /* Don't block these because they're synchronous */
455    VG_(sigdelset)(&mask, VKI_SIGSEGV);
456    VG_(sigdelset)(&mask, VKI_SIGBUS);
457    VG_(sigdelset)(&mask, VKI_SIGFPE);
458    VG_(sigdelset)(&mask, VKI_SIGILL);
459    VG_(sigdelset)(&mask, VKI_SIGTRAP);
460 
461    /* Can't block these anyway */
462    VG_(sigdelset)(&mask, VKI_SIGSTOP);
463    VG_(sigdelset)(&mask, VKI_SIGKILL);
464 
465    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
466 }
467 
os_state_clear(ThreadState * tst)468 static void os_state_clear(ThreadState *tst)
469 {
470    tst->os_state.lwpid       = 0;
471    tst->os_state.threadgroup = 0;
472 #  if defined(VGO_linux)
473    /* no other fields to clear */
474 #  elif defined(VGO_darwin)
475    tst->os_state.post_mach_trap_fn = NULL;
476    tst->os_state.pthread           = 0;
477    tst->os_state.func_arg          = 0;
478    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
479    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
480    tst->os_state.wq_jmpbuf_valid   = False;
481    tst->os_state.remote_port       = 0;
482    tst->os_state.msgh_id           = 0;
483    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
484 #  else
485 #    error "Unknown OS"
486 #  endif
487 }
488 
os_state_init(ThreadState * tst)489 static void os_state_init(ThreadState *tst)
490 {
491    tst->os_state.valgrind_stack_base    = 0;
492    tst->os_state.valgrind_stack_init_SP = 0;
493    os_state_clear(tst);
494 }
495 
496 static
mostly_clear_thread_record(ThreadId tid)497 void mostly_clear_thread_record ( ThreadId tid )
498 {
499    vki_sigset_t savedmask;
500 
501    vg_assert(tid >= 0 && tid < VG_N_THREADS);
502    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
503    VG_(threads)[tid].tid = tid;
504 
505    /* Leave the thread in Zombie, so that it doesn't get reallocated
506       until the caller is finally done with the thread stack. */
507    VG_(threads)[tid].status               = VgTs_Zombie;
508 
509    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
510    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
511 
512    os_state_clear(&VG_(threads)[tid]);
513 
514    /* start with no altstack */
515    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
516    VG_(threads)[tid].altstack.ss_size = 0;
517    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
518 
519    VG_(clear_out_queued_signals)(tid, &savedmask);
520 
521    VG_(threads)[tid].sched_jmpbuf_valid = False;
522 }
523 
524 /*
525    Called in the child after fork.  If the parent has multiple
526    threads, then we've inherited a VG_(threads) array describing them,
527    but only the thread which called fork() is actually alive in the
528    child.  This functions needs to clean up all those other thread
529    structures.
530 
531    Whichever tid in the parent which called fork() becomes the
532    master_tid in the child.  That's because the only living slot in
533    VG_(threads) in the child after fork is VG_(threads)[tid], and it
534    would be too hard to try to re-number the thread and relocate the
535    thread state down to VG_(threads)[1].
536 
537    This function also needs to reinitialize the_BigLock, since
538    otherwise we may end up sharing its state with the parent, which
539    would be deeply confusing.
540 */
sched_fork_cleanup(ThreadId me)541 static void sched_fork_cleanup(ThreadId me)
542 {
543    ThreadId tid;
544    vg_assert(VG_(running_tid) == me);
545 
546 #  if defined(VGO_darwin)
547    // GrP fixme hack reset Mach ports
548    VG_(mach_init)();
549 #  endif
550 
551    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
552    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
553 
554    /* clear out all the unused thread slots */
555    for (tid = 1; tid < VG_N_THREADS; tid++) {
556       if (tid != me) {
557          mostly_clear_thread_record(tid);
558 	 VG_(threads)[tid].status = VgTs_Empty;
559          VG_(clear_syscallInfo)(tid);
560       }
561    }
562 
563    /* re-init and take the sema */
564    deinit_BigLock();
565    init_BigLock();
566    VG_(acquire_BigLock_LL)(NULL);
567 }
568 
569 
570 /* First phase of initialisation of the scheduler.  Initialise the
571    bigLock, zeroise the VG_(threads) structure and decide on the
572    ThreadId of the root thread.
573 */
VG_(scheduler_init_phase1)574 ThreadId VG_(scheduler_init_phase1) ( void )
575 {
576    Int i;
577    ThreadId tid_main;
578 
579    VG_(debugLog)(1,"sched","sched_init_phase1\n");
580 
581    if (VG_(clo_fair_sched) != disable_fair_sched
582        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
583        && VG_(clo_fair_sched) == enable_fair_sched)
584    {
585       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
586       VG_(exit)(1);
587    }
588 
589    if (VG_(clo_verbosity) > 1) {
590       VG_(message)(Vg_DebugMsg,
591                    "Scheduler: using %s scheduler lock implementation.\n",
592                    ML_(get_sched_lock_name)());
593    }
594 
595    init_BigLock();
596 
597    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
598       /* Paranoia .. completely zero it out. */
599       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
600 
601       VG_(threads)[i].sig_queue = NULL;
602 
603       os_state_init(&VG_(threads)[i]);
604       mostly_clear_thread_record(i);
605 
606       VG_(threads)[i].status                    = VgTs_Empty;
607       VG_(threads)[i].client_stack_szB          = 0;
608       VG_(threads)[i].client_stack_highest_word = (Addr)NULL;
609       VG_(threads)[i].err_disablement_level     = 0;
610    }
611 
612    tid_main = VG_(alloc_ThreadState)();
613 
614    /* Bleh.  Unfortunately there are various places in the system that
615       assume that the main thread has a ThreadId of 1.
616       - Helgrind (possibly)
617       - stack overflow message in default_action() in m_signals.c
618       - definitely a lot more places
619    */
620    vg_assert(tid_main == 1);
621 
622    return tid_main;
623 }
624 
625 
626 /* Second phase of initialisation of the scheduler.  Given the root
627    ThreadId computed by first phase of initialisation, fill in stack
628    details and acquire bigLock.  Initialise the scheduler.  This is
629    called at startup.  The caller subsequently initialises the guest
630    state components of this main thread.
631 */
VG_(scheduler_init_phase2)632 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
633                                   Addr     clstack_end,
634                                   SizeT    clstack_size )
635 {
636    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%d, "
637                    "cls_end=0x%lx, cls_sz=%ld\n",
638                    tid_main, clstack_end, clstack_size);
639 
640    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
641    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
642 
643    VG_(threads)[tid_main].client_stack_highest_word
644       = clstack_end + 1 - sizeof(UWord);
645    VG_(threads)[tid_main].client_stack_szB
646       = clstack_size;
647 
648    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
649 }
650 
651 
652 /* ---------------------------------------------------------------------
653    Helpers for running translations.
654    ------------------------------------------------------------------ */
655 
656 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
657    mask state, but does need to pass "val" through.  jumped must be a
658    volatile UWord. */
659 #define SCHEDSETJMP(tid, jumped, stmt)					\
660    do {									\
661       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
662 									\
663       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
664       if ((jumped) == ((UWord)0)) {                                     \
665 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
666 	 _qq_tst->sched_jmpbuf_valid = True;				\
667 	 stmt;								\
668       }	else if (VG_(clo_trace_sched))					\
669 	 VG_(printf)("SCHEDSETJMP(line %d) tid %d, jumped=%ld\n",       \
670                      __LINE__, tid, jumped);                            \
671       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
672       _qq_tst->sched_jmpbuf_valid = False;				\
673    } while(0)
674 
675 
676 /* Do various guest state alignment checks prior to running a thread.
677    Specifically, check that what we have matches Vex's guest state
678    layout requirements.  See libvex.h for details, but in short the
679    requirements are: There must be no holes in between the primary
680    guest state, its two copies, and the spill area.  In short, all 4
681    areas must have a 16-aligned size and be 16-aligned, and placed
682    back-to-back. */
do_pre_run_checks(ThreadState * tst)683 static void do_pre_run_checks ( ThreadState* tst )
684 {
685    Addr a_vex     = (Addr) & tst->arch.vex;
686    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
687    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
688    Addr a_spill   = (Addr) & tst->arch.vex_spill;
689    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
690    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
691    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
692    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
693 
694    if (0)
695    VG_(printf)("gst %p %d, sh1 %p %d, "
696                "sh2 %p %d, spill %p %d\n",
697                (void*)a_vex, sz_vex,
698                (void*)a_vexsh1, sz_vexsh1,
699                (void*)a_vexsh2, sz_vexsh2,
700                (void*)a_spill, sz_spill );
701 
702    vg_assert(VG_IS_16_ALIGNED(sz_vex));
703    vg_assert(VG_IS_16_ALIGNED(sz_vexsh1));
704    vg_assert(VG_IS_16_ALIGNED(sz_vexsh2));
705    vg_assert(VG_IS_16_ALIGNED(sz_spill));
706 
707    vg_assert(VG_IS_16_ALIGNED(a_vex));
708    vg_assert(VG_IS_16_ALIGNED(a_vexsh1));
709    vg_assert(VG_IS_16_ALIGNED(a_vexsh2));
710    vg_assert(VG_IS_16_ALIGNED(a_spill));
711 
712    /* Check that the guest state and its two shadows have the same
713       size, and that there are no holes in between.  The latter is
714       important because Memcheck assumes that it can reliably access
715       the shadows by indexing off a pointer to the start of the
716       primary guest state area. */
717    vg_assert(sz_vex == sz_vexsh1);
718    vg_assert(sz_vex == sz_vexsh2);
719    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
720    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
721    /* Also check there's no hole between the second shadow area and
722       the spill area. */
723    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
724    vg_assert(a_vex + 3 * sz_vex == a_spill);
725 
726 #  if defined(VGA_x86)
727    /* x86 XMM regs must form an array, ie, have no holes in
728       between. */
729    vg_assert(
730       (offsetof(VexGuestX86State,guest_XMM7)
731        - offsetof(VexGuestX86State,guest_XMM0))
732       == (8/*#regs*/-1) * 16/*bytes per reg*/
733    );
734    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
735    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
736    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
737    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
738    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
739 #  endif
740 
741 #  if defined(VGA_amd64)
742    /* amd64 YMM regs must form an array, ie, have no holes in
743       between. */
744    vg_assert(
745       (offsetof(VexGuestAMD64State,guest_YMM16)
746        - offsetof(VexGuestAMD64State,guest_YMM0))
747       == (17/*#regs*/-1) * 32/*bytes per reg*/
748    );
749    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
750    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
751    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
752    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
753    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
754 #  endif
755 
756 #  if defined(VGA_ppc32) || defined(VGA_ppc64)
757    /* ppc guest_state vector regs must be 16 byte aligned for
758       loads/stores.  This is important! */
759    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
760    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
761    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
762    /* be extra paranoid .. */
763    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
764    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
765    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
766 #  endif
767 
768 #  if defined(VGA_arm)
769    /* arm guest_state VFP regs must be 8 byte aligned for
770       loads/stores.  Let's use 16 just to be on the safe side. */
771    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
772    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
773    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
774    /* be extra paranoid .. */
775    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
776    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
777    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
778 #  endif
779 
780 #  if defined(VGA_s390x)
781    /* no special requirements */
782 #  endif
783 
784 #  if defined(VGA_mips32)
785   /* no special requirements */
786 #  endif
787 }
788 
789 // NO_VGDB_POLL value ensures vgdb is not polled, while
790 // VGDB_POLL_ASAP ensures that the next scheduler call
791 // will cause a poll.
792 #define NO_VGDB_POLL    0xffffffffffffffffULL
793 #define VGDB_POLL_ASAP  0x0ULL
794 
VG_(disable_vgdb_poll)795 void VG_(disable_vgdb_poll) (void )
796 {
797    vgdb_next_poll = NO_VGDB_POLL;
798 }
VG_(force_vgdb_poll)799 void VG_(force_vgdb_poll) ( void )
800 {
801    vgdb_next_poll = VGDB_POLL_ASAP;
802 }
803 
804 /* Run the thread tid for a while, and return a VG_TRC_* value
805    indicating why VG_(disp_run_translations) stopped, and possibly an
806    auxiliary word.  Also, only allow the thread to run for at most
807    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
808    is False, we are running ordinary redir'd translations, and we
809    should therefore start by looking up the guest next IP in TT.  If
810    it is True then we ignore the guest next IP and just run from
811    alt_host_addr, which presumably points at host code for a no-redir
812    translation.
813 
814    Return results are placed in two_words.  two_words[0] is set to the
815    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
816    the address to patch is placed in two_words[1].
817 */
818 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)819 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
820                               /*MOD*/Int*   dispatchCtrP,
821                               ThreadId      tid,
822                               HWord         alt_host_addr,
823                               Bool          use_alt_host_addr )
824 {
825    volatile HWord        jumped         = 0;
826    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
827    volatile Int          done_this_time = 0;
828    volatile HWord        host_code_addr = 0;
829 
830    /* Paranoia */
831    vg_assert(VG_(is_valid_tid)(tid));
832    vg_assert(VG_(is_running_thread)(tid));
833    vg_assert(!VG_(is_exiting)(tid));
834    vg_assert(*dispatchCtrP > 0);
835 
836    tst = VG_(get_ThreadState)(tid);
837    do_pre_run_checks( (ThreadState*)tst );
838    /* end Paranoia */
839 
840    /* Futz with the XIndir stats counters. */
841    vg_assert(VG_(stats__n_xindirs_32) == 0);
842    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
843 
844    /* Clear return area. */
845    two_words[0] = two_words[1] = 0;
846 
847    /* Figure out where we're starting from. */
848    if (use_alt_host_addr) {
849       /* unusual case -- no-redir translation */
850       host_code_addr = alt_host_addr;
851    } else {
852       /* normal case -- redir translation */
853       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
854       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
855          host_code_addr = VG_(tt_fast)[cno].host;
856       else {
857          AddrH res   = 0;
858          /* not found in VG_(tt_fast). Searching here the transtab
859             improves the performance compared to returning directly
860             to the scheduler. */
861          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
862                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
863                                             True/*upd cache*/
864                                             );
865          if (LIKELY(found)) {
866             host_code_addr = res;
867          } else {
868             /* At this point, we know that we intended to start at a
869                normal redir translation, but it was not found.  In
870                which case we can return now claiming it's not
871                findable. */
872             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
873             return;
874          }
875       }
876    }
877    /* We have either a no-redir or a redir translation. */
878    vg_assert(host_code_addr != 0); /* implausible */
879 
880    /* there should be no undealt-with signals */
881    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
882 
883    /* Set up event counter stuff for the run. */
884    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
885    tst->arch.vex.host_EvC_FAILADDR
886       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
887 
888    if (0) {
889       vki_sigset_t m;
890       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
891       vg_assert(err == 0);
892       VG_(printf)("tid %d: entering code with unblocked signals: ", tid);
893       for (i = 1; i <= _VKI_NSIG; i++)
894          if (!VG_(sigismember)(&m, i))
895             VG_(printf)("%d ", i);
896       VG_(printf)("\n");
897    }
898 
899    /* Set up return-value area. */
900 
901    // Tell the tool this thread is about to run client code
902    VG_TRACK( start_client_code, tid, bbs_done );
903 
904    vg_assert(VG_(in_generated_code) == False);
905    VG_(in_generated_code) = True;
906 
907    SCHEDSETJMP(
908       tid,
909       jumped,
910       VG_(disp_run_translations)(
911          two_words,
912          (void*)&tst->arch.vex,
913          host_code_addr
914       )
915    );
916 
917    vg_assert(VG_(in_generated_code) == True);
918    VG_(in_generated_code) = False;
919 
920    if (jumped != (HWord)0) {
921       /* We get here if the client took a fault that caused our signal
922          handler to longjmp. */
923       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
924       two_words[0] = VG_TRC_FAULT_SIGNAL;
925       two_words[1] = 0;
926       block_signals();
927    }
928 
929    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
930       and zero out the 32-bit ones in preparation for the next run of
931       generated code. */
932    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
933    VG_(stats__n_xindirs_32) = 0;
934    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
935    VG_(stats__n_xindir_misses_32) = 0;
936 
937    /* Inspect the event counter. */
938    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
939    vg_assert(tst->arch.vex.host_EvC_FAILADDR
940              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
941 
942    done_this_time = *dispatchCtrP - ((Int)tst->arch.vex.host_EvC_COUNTER + 1);
943 
944    vg_assert(done_this_time >= 0);
945    bbs_done += (ULong)done_this_time;
946 
947    *dispatchCtrP -= done_this_time;
948    vg_assert(*dispatchCtrP >= 0);
949 
950    // Tell the tool this thread has stopped running client code
951    VG_TRACK( stop_client_code, tid, bbs_done );
952 
953    if (bbs_done >= vgdb_next_poll) {
954       if (VG_(clo_vgdb_poll))
955          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
956       else
957          /* value was changed due to gdbserver invocation via ptrace */
958          vgdb_next_poll = NO_VGDB_POLL;
959       if (VG_(gdbserver_activity) (tid))
960          VG_(gdbserver) (tid);
961    }
962 
963    /* TRC value and possible auxiliary patch-address word are already
964       in two_words[0] and [1] respectively, as a result of the call to
965       VG_(run_innerloop). */
966    /* Stay sane .. */
967    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
968        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
969       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
970    } else {
971       vg_assert(two_words[1] == 0); /* nobody messed with it */
972    }
973 }
974 
975 
976 /* ---------------------------------------------------------------------
977    The scheduler proper.
978    ------------------------------------------------------------------ */
979 
handle_tt_miss(ThreadId tid)980 static void handle_tt_miss ( ThreadId tid )
981 {
982    Bool found;
983    Addr ip = VG_(get_IP)(tid);
984 
985    /* Trivial event.  Miss in the fast-cache.  Do a full
986       lookup for it. */
987    found = VG_(search_transtab)( NULL, NULL, NULL,
988                                  ip, True/*upd_fast_cache*/ );
989    if (UNLIKELY(!found)) {
990       /* Not found; we need to request a translation. */
991       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
992                           bbs_done, True/*allow redirection*/ )) {
993          found = VG_(search_transtab)( NULL, NULL, NULL,
994                                        ip, True );
995          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
996 
997       } else {
998 	 // If VG_(translate)() fails, it's because it had to throw a
999 	 // signal because the client jumped to a bad address.  That
1000 	 // means that either a signal has been set up for delivery,
1001 	 // or the thread has been marked for termination.  Either
1002 	 // way, we just need to go back into the scheduler loop.
1003       }
1004    }
1005 }
1006 
1007 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1008 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1009 {
1010    Bool found          = False;
1011    Addr ip             = VG_(get_IP)(tid);
1012    UInt to_sNo         = (UInt)-1;
1013    UInt to_tteNo       = (UInt)-1;
1014 
1015    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1016                                  ip, False/*dont_upd_fast_cache*/ );
1017    if (!found) {
1018       /* Not found; we need to request a translation. */
1019       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1020                           bbs_done, True/*allow redirection*/ )) {
1021          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1022                                        ip, False );
1023          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1024       } else {
1025 	 // If VG_(translate)() fails, it's because it had to throw a
1026 	 // signal because the client jumped to a bad address.  That
1027 	 // means that either a signal has been set up for delivery,
1028 	 // or the thread has been marked for termination.  Either
1029 	 // way, we just need to go back into the scheduler loop.
1030         return;
1031       }
1032    }
1033    vg_assert(found);
1034    vg_assert(to_sNo != -1);
1035    vg_assert(to_tteNo != -1);
1036 
1037    /* So, finally we know where to patch through to.  Do the patching
1038       and update the various admin tables that allow it to be undone
1039       in the case that the destination block gets deleted. */
1040    VG_(tt_tc_do_chaining)( place_to_chain,
1041                            to_sNo, to_tteNo, toFastEP );
1042 }
1043 
handle_syscall(ThreadId tid,UInt trc)1044 static void handle_syscall(ThreadId tid, UInt trc)
1045 {
1046    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1047    volatile UWord jumped;
1048 
1049    /* Syscall may or may not block; either way, it will be
1050       complete by the time this call returns, and we'll be
1051       runnable again.  We could take a signal while the
1052       syscall runs. */
1053 
1054    if (VG_(clo_sanity_level >= 3))
1055       VG_(am_do_sync_check)("(BEFORE SYSCALL)",__FILE__,__LINE__);
1056 
1057    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1058 
1059    if (VG_(clo_sanity_level >= 3))
1060       VG_(am_do_sync_check)("(AFTER SYSCALL)",__FILE__,__LINE__);
1061 
1062    if (!VG_(is_running_thread)(tid))
1063       VG_(printf)("tid %d not running; VG_(running_tid)=%d, tid %d status %d\n",
1064 		  tid, VG_(running_tid), tid, tst->status);
1065    vg_assert(VG_(is_running_thread)(tid));
1066 
1067    if (jumped != (UWord)0) {
1068       block_signals();
1069       VG_(poll_signals)(tid);
1070    }
1071 }
1072 
1073 /* tid just requested a jump to the noredir version of its current
1074    program counter.  So make up that translation if needed, run it,
1075    and return the resulting thread return code in two_words[]. */
1076 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1077 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1078                            /*MOD*/Int*   dispatchCtrP,
1079                            ThreadId tid )
1080 {
1081    /* Clear return area. */
1082    two_words[0] = two_words[1] = 0;
1083 
1084    AddrH hcode = 0;
1085    Addr  ip    = VG_(get_IP)(tid);
1086 
1087    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1088    if (!found) {
1089       /* Not found; we need to request a translation. */
1090       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1091                           False/*NO REDIRECTION*/ )) {
1092 
1093          found = VG_(search_unredir_transtab)( &hcode, ip );
1094          vg_assert2(found, "unredir translation missing after creation?!");
1095       } else {
1096 	 // If VG_(translate)() fails, it's because it had to throw a
1097 	 // signal because the client jumped to a bad address.  That
1098 	 // means that either a signal has been set up for delivery,
1099 	 // or the thread has been marked for termination.  Either
1100 	 // way, we just need to go back into the scheduler loop.
1101          two_words[0] = VG_TRC_BORING;
1102          return;
1103       }
1104 
1105    }
1106 
1107    vg_assert(found);
1108    vg_assert(hcode != 0);
1109 
1110    /* Otherwise run it and return the resulting VG_TRC_* value. */
1111    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1112    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1113                            hcode, True/*use hcode*/ );
1114 }
1115 
1116 
1117 /*
1118    Run a thread until it wants to exit.
1119 
1120    We assume that the caller has already called VG_(acquire_BigLock) for
1121    us, so we own the VCPU.  Also, all signals are blocked.
1122  */
VG_(scheduler)1123 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1124 {
1125    /* Holds the remaining size of this thread's "timeslice". */
1126    Int dispatch_ctr = 0;
1127 
1128    ThreadState *tst = VG_(get_ThreadState)(tid);
1129    static Bool vgdb_startup_action_done = False;
1130 
1131    if (VG_(clo_trace_sched))
1132       print_sched_event(tid, "entering VG_(scheduler)");
1133 
1134    /* Do vgdb initialization (but once). Only the first (main) task
1135       starting up will do the below.
1136       Initialize gdbserver earlier than at the first
1137       thread VG_(scheduler) is causing problems:
1138       * at the end of VG_(scheduler_init_phase2) :
1139         The main thread is in VgTs_Init state, but in a not yet
1140         consistent state => the thread cannot be reported to gdb
1141         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1142         back the guest registers to gdb).
1143       * at end of valgrind_main, just
1144         before VG_(main_thread_wrapper_NORETURN)(1) :
1145         The main thread is still in VgTs_Init state but in a
1146         more advanced state. However, the thread state is not yet
1147         completely initialized : a.o., the os_state is not yet fully
1148         set => the thread is then not properly reported to gdb,
1149         which is then confused (causing e.g. a duplicate thread be
1150         shown, without thread id).
1151       * it would be possible to initialize gdbserver "lower" in the
1152         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1153         these are platform dependent and the place at which
1154         the thread state is completely initialized is not
1155         specific anymore to the main thread (so a similar "do it only
1156         once" would be needed).
1157 
1158         => a "once only" initialization here is the best compromise. */
1159    if (!vgdb_startup_action_done) {
1160       vg_assert(tid == 1); // it must be the main thread.
1161       vgdb_startup_action_done = True;
1162       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1163          /* If we have to poll, ensures we do an initial poll at first
1164             scheduler call. Otherwise, ensure no poll (unless interrupted
1165             by ptrace). */
1166          if (VG_(clo_vgdb_poll))
1167             VG_(force_vgdb_poll) ();
1168          else
1169             VG_(disable_vgdb_poll) ();
1170 
1171          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1172          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1173             changed yet. */
1174 
1175          VG_(gdbserver_prerun_action) (1);
1176       } else {
1177          VG_(disable_vgdb_poll) ();
1178       }
1179    }
1180 
1181    /* set the proper running signal mask */
1182    block_signals();
1183 
1184    vg_assert(VG_(is_running_thread)(tid));
1185 
1186    dispatch_ctr = SCHEDULING_QUANTUM;
1187 
1188    while (!VG_(is_exiting)(tid)) {
1189 
1190       vg_assert(dispatch_ctr >= 0);
1191       if (dispatch_ctr == 0) {
1192 
1193 	 /* Our slice is done, so yield the CPU to another thread.  On
1194             Linux, this doesn't sleep between sleeping and running,
1195             since that would take too much time. */
1196 
1197 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
1198             cause async thread cancellation (canceller.c) to terminate
1199             in finite time; else it is in some kind of race/starvation
1200             situation and completion is arbitrarily delayed (although
1201             this is not a deadlock).
1202 
1203             Unfortunately these sleeps cause MPI jobs not to terminate
1204             sometimes (some kind of livelock).  So sleeping once
1205             every N opportunities appears to work. */
1206 
1207 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1208             sys_yield also helps the problem, whilst not crashing apps. */
1209 
1210 	 VG_(release_BigLock)(tid, VgTs_Yielding,
1211                                    "VG_(scheduler):timeslice");
1212 	 /* ------------ now we don't have The Lock ------------ */
1213 
1214 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1215 	 /* ------------ now we do have The Lock ------------ */
1216 
1217 	 /* OK, do some relatively expensive housekeeping stuff */
1218 	 scheduler_sanity(tid);
1219 	 VG_(sanity_check_general)(False);
1220 
1221 	 /* Look for any pending signals for this thread, and set them up
1222 	    for delivery */
1223 	 VG_(poll_signals)(tid);
1224 
1225 	 if (VG_(is_exiting)(tid))
1226 	    break;		/* poll_signals picked up a fatal signal */
1227 
1228 	 /* For stats purposes only. */
1229 	 n_scheduling_events_MAJOR++;
1230 
1231 	 /* Figure out how many bbs to ask vg_run_innerloop to do.  Note
1232 	    that it decrements the counter before testing it for zero, so
1233 	    that if tst->dispatch_ctr is set to N you get at most N-1
1234 	    iterations.  Also this means that tst->dispatch_ctr must
1235 	    exceed zero before entering the innerloop.  Also also, the
1236 	    decrement is done before the bb is actually run, so you
1237 	    always get at least one decrement even if nothing happens. */
1238          // FIXME is this right?
1239          dispatch_ctr = SCHEDULING_QUANTUM;
1240 
1241 	 /* paranoia ... */
1242 	 vg_assert(tst->tid == tid);
1243 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1244       }
1245 
1246       /* For stats purposes only. */
1247       n_scheduling_events_MINOR++;
1248 
1249       if (0)
1250          VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs\n",
1251                                    tid, dispatch_ctr - 1 );
1252 
1253       HWord trc[2]; /* "two_words" */
1254       run_thread_for_a_while( &trc[0],
1255                               &dispatch_ctr,
1256                               tid, 0/*ignored*/, False );
1257 
1258       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1259 	 HChar buf[50];
1260 	 VG_(sprintf)(buf, "TRC: %s", name_of_sched_event(trc[0]));
1261 	 print_sched_event(tid, buf);
1262       }
1263 
1264       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1265          /* If we got a request to run a no-redir version of
1266             something, do so now -- handle_noredir_jump just (creates
1267             and) runs that one translation.  The flip side is that the
1268             noredir translation can't itself return another noredir
1269             request -- that would be nonsensical.  It can, however,
1270             return VG_TRC_BORING, which just means keep going as
1271             normal. */
1272          /* Note that the fact that we need to continue with a
1273             no-redir jump is not recorded anywhere else in this
1274             thread's state.  So we *must* execute the block right now
1275             -- we can't fail to execute it and later resume with it,
1276             because by then we'll have forgotten the fact that it
1277             should be run as no-redir, but will get run as a normal
1278             potentially-redir'd, hence screwing up.  This really ought
1279             to be cleaned up, by noting in the guest state that the
1280             next block to be executed should be no-redir.  Then we can
1281             suspend and resume at any point, which isn't the case at
1282             the moment. */
1283          handle_noredir_jump( &trc[0],
1284                               &dispatch_ctr,
1285                               tid );
1286          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1287 
1288          /* This can't be allowed to happen, since it means the block
1289             didn't execute, and we have no way to resume-as-noredir
1290             after we get more timeslice.  But I don't think it ever
1291             can, since handle_noredir_jump will assert if the counter
1292             is zero on entry. */
1293          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1294 
1295          /* A no-redir translation can't return with a chain-me
1296             request, since chaining in the no-redir cache is too
1297             complex. */
1298          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1299                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1300       }
1301 
1302       switch (trc[0]) {
1303       case VEX_TRC_JMP_BORING:
1304          /* assisted dispatch, no event.  Used by no-redir
1305             translations to force return to the scheduler. */
1306       case VG_TRC_BORING:
1307          /* no special event, just keep going. */
1308          break;
1309 
1310       case VG_TRC_INNER_FASTMISS:
1311 	 vg_assert(dispatch_ctr > 0);
1312 	 handle_tt_miss(tid);
1313 	 break;
1314 
1315       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1316          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1317          handle_chain_me(tid, (void*)trc[1], False);
1318          break;
1319       }
1320 
1321       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1322          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1323          handle_chain_me(tid, (void*)trc[1], True);
1324          break;
1325       }
1326 
1327       case VEX_TRC_JMP_CLIENTREQ:
1328 	 do_client_request(tid);
1329 	 break;
1330 
1331       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1332       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1333       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1334       case VEX_TRC_JMP_SYS_SYSCALL: /* amd64-linux, ppc32-linux, amd64-darwin */
1335 	 handle_syscall(tid, trc[0]);
1336 	 if (VG_(clo_sanity_level) > 2)
1337 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
1338 	 break;
1339 
1340       case VEX_TRC_JMP_YIELD:
1341 	 /* Explicit yield, because this thread is in a spin-lock
1342 	    or something.  Only let the thread run for a short while
1343             longer.  Because swapping to another thread is expensive,
1344             we're prepared to let this thread eat a little more CPU
1345             before swapping to another.  That means that short term
1346             spins waiting for hardware to poke memory won't cause a
1347             thread swap. */
1348 	 if (dispatch_ctr > 2000)
1349             dispatch_ctr = 2000;
1350 	 break;
1351 
1352       case VG_TRC_INNER_COUNTERZERO:
1353 	 /* Timeslice is out.  Let a new thread be scheduled. */
1354 	 vg_assert(dispatch_ctr == 0);
1355 	 break;
1356 
1357       case VG_TRC_FAULT_SIGNAL:
1358 	 /* Everything should be set up (either we're exiting, or
1359 	    about to start in a signal handler). */
1360 	 break;
1361 
1362       case VEX_TRC_JMP_MAPFAIL:
1363          /* Failure of arch-specific address translation (x86/amd64
1364             segment override use) */
1365          /* jrs 2005 03 11: is this correct? */
1366          VG_(synth_fault)(tid);
1367          break;
1368 
1369       case VEX_TRC_JMP_EMWARN: {
1370          static Int  counts[EmWarn_NUMBER];
1371          static Bool counts_initted = False;
1372          VexEmWarn ew;
1373          HChar*    what;
1374          Bool      show;
1375          Int       q;
1376          if (!counts_initted) {
1377             counts_initted = True;
1378             for (q = 0; q < EmWarn_NUMBER; q++)
1379                counts[q] = 0;
1380          }
1381          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1382          what = (ew < 0 || ew >= EmWarn_NUMBER)
1383                    ? "unknown (?!)"
1384                    : LibVEX_EmWarn_string(ew);
1385          show = (ew < 0 || ew >= EmWarn_NUMBER)
1386                    ? True
1387                    : counts[ew]++ < 3;
1388          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1389             VG_(message)( Vg_UserMsg,
1390                           "Emulation warning: unsupported action:\n");
1391             VG_(message)( Vg_UserMsg, "  %s\n", what);
1392             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1393          }
1394          break;
1395       }
1396 
1397       case VEX_TRC_JMP_EMFAIL: {
1398          VexEmWarn ew;
1399          HChar*    what;
1400          ew   = (VexEmWarn)VG_(threads)[tid].arch.vex.guest_EMWARN;
1401          what = (ew < 0 || ew >= EmWarn_NUMBER)
1402                    ? "unknown (?!)"
1403                    : LibVEX_EmWarn_string(ew);
1404          VG_(message)( Vg_UserMsg,
1405                        "Emulation fatal error -- Valgrind cannot continue:\n");
1406          VG_(message)( Vg_UserMsg, "  %s\n", what);
1407          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1408          VG_(message)(Vg_UserMsg, "\n");
1409          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1410          VG_(message)(Vg_UserMsg, "\n");
1411          VG_(exit)(1);
1412          break;
1413       }
1414 
1415       case VEX_TRC_JMP_SIGTRAP:
1416          VG_(synth_sigtrap)(tid);
1417          break;
1418 
1419       case VEX_TRC_JMP_SIGSEGV:
1420          VG_(synth_fault)(tid);
1421          break;
1422 
1423       case VEX_TRC_JMP_SIGBUS:
1424          VG_(synth_sigbus)(tid);
1425          break;
1426 
1427       case VEX_TRC_JMP_NODECODE: {
1428          Addr addr = VG_(get_IP)(tid);
1429 
1430          VG_(umsg)(
1431             "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1432          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1433 #define M(a) VG_(umsg)(a "\n");
1434    M("Your program just tried to execute an instruction that Valgrind" );
1435    M("did not recognise.  There are two possible reasons for this."    );
1436    M("1. Your program has a bug and erroneously jumped to a non-code"  );
1437    M("   location.  If you are running Memcheck and you just saw a"    );
1438    M("   warning about a bad jump, it's probably your program's fault.");
1439    M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1440    M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1441    M("   you are not sure, please let us know and we'll try to fix it.");
1442    M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1443    M("probably kill your program."                                     );
1444 #undef M
1445 
1446 #if defined(VGA_s390x)
1447          /* Now that the complaint is out we need to adjust the guest_IA. The
1448             reason is that -- after raising the exception -- execution will
1449             continue with the insn that follows the invalid insn. As the first
1450             2 bits of the invalid insn determine its length in the usual way,
1451             we can compute the address of the next insn here and adjust the
1452             guest_IA accordingly. This adjustment is essential and tested by
1453             none/tests/s390x/op_exception.c (which would loop forever
1454             otherwise) */
1455          UChar byte = ((UChar *)addr)[0];
1456          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1457          Addr  next_insn_addr = addr + insn_length;
1458 
1459          VG_(set_IP)(tid, next_insn_addr);
1460 #endif
1461          VG_(synth_sigill)(tid, addr);
1462          break;
1463       }
1464       case VEX_TRC_JMP_TINVAL:
1465          VG_(discard_translations)(
1466             (Addr64)VG_(threads)[tid].arch.vex.guest_TISTART,
1467             VG_(threads)[tid].arch.vex.guest_TILEN,
1468             "scheduler(VEX_TRC_JMP_TINVAL)"
1469          );
1470          if (0)
1471             VG_(printf)("dump translations done.\n");
1472          break;
1473 
1474       case VG_TRC_INVARIANT_FAILED:
1475          /* This typically happens if, after running generated code,
1476             it is detected that host CPU settings (eg, FPU/Vector
1477             control words) are not as they should be.  Vex's code
1478             generation specifies the state such control words should
1479             be in on entry to Vex-generated code, and they should be
1480             unchanged on exit from it.  Failure of this assertion
1481             usually means a bug in Vex's code generation. */
1482          //{ UInt xx;
1483          //  __asm__ __volatile__ (
1484          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1485          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1486          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1487          //}
1488          vg_assert2(0, "VG_(scheduler), phase 3: "
1489                        "run_innerloop detected host "
1490                        "state invariant failure", trc);
1491 
1492       case VEX_TRC_JMP_SYS_SYSENTER:
1493          /* Do whatever simulation is appropriate for an x86 sysenter
1494             instruction.  Note that it is critical to set this thread's
1495             guest_EIP to point at the code to execute after the
1496             sysenter, since Vex-generated code will not have set it --
1497             vex does not know what it should be.  Vex sets the next
1498             address to zero, so if you don't set guest_EIP, the thread
1499             will jump to zero afterwards and probably die as a result. */
1500 #        if defined(VGP_x86_linux)
1501          vg_assert2(0, "VG_(scheduler), phase 3: "
1502                        "sysenter_x86 on x86-linux is not supported");
1503 #        elif defined(VGP_x86_darwin)
1504          /* return address in client edx */
1505          VG_(threads)[tid].arch.vex.guest_EIP
1506             = VG_(threads)[tid].arch.vex.guest_EDX;
1507          handle_syscall(tid, trc[0]);
1508 #        else
1509          vg_assert2(0, "VG_(scheduler), phase 3: "
1510                        "sysenter_x86 on non-x86 platform?!?!");
1511 #        endif
1512          break;
1513 
1514       default:
1515 	 vg_assert2(0, "VG_(scheduler), phase 3: "
1516                        "unexpected thread return code (%u)", trc[0]);
1517 	 /* NOTREACHED */
1518 	 break;
1519 
1520       } /* switch (trc) */
1521 
1522       if (0)
1523          maybe_show_sb_counts();
1524    }
1525 
1526    if (VG_(clo_trace_sched))
1527       print_sched_event(tid, "exiting VG_(scheduler)");
1528 
1529    vg_assert(VG_(is_exiting)(tid));
1530 
1531    return tst->exitreason;
1532 }
1533 
1534 
1535 /*
1536    This causes all threads to forceably exit.  They aren't actually
1537    dead by the time this returns; you need to call
1538    VG_(reap_threads)() to wait for them.
1539  */
VG_(nuke_all_threads_except)1540 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1541 {
1542    ThreadId tid;
1543 
1544    vg_assert(VG_(is_running_thread)(me));
1545 
1546    for (tid = 1; tid < VG_N_THREADS; tid++) {
1547       if (tid == me
1548           || VG_(threads)[tid].status == VgTs_Empty)
1549          continue;
1550       if (0)
1551          VG_(printf)(
1552             "VG_(nuke_all_threads_except): nuking tid %d\n", tid);
1553 
1554       VG_(threads)[tid].exitreason = src;
1555       if (src == VgSrc_FatalSig)
1556          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1557       VG_(get_thread_out_of_syscall)(tid);
1558    }
1559 }
1560 
1561 
1562 /* ---------------------------------------------------------------------
1563    Specifying shadow register values
1564    ------------------------------------------------------------------ */
1565 
1566 #if defined(VGA_x86)
1567 #  define VG_CLREQ_ARGS       guest_EAX
1568 #  define VG_CLREQ_RET        guest_EDX
1569 #elif defined(VGA_amd64)
1570 #  define VG_CLREQ_ARGS       guest_RAX
1571 #  define VG_CLREQ_RET        guest_RDX
1572 #elif defined(VGA_ppc32) || defined(VGA_ppc64)
1573 #  define VG_CLREQ_ARGS       guest_GPR4
1574 #  define VG_CLREQ_RET        guest_GPR3
1575 #elif defined(VGA_arm)
1576 #  define VG_CLREQ_ARGS       guest_R4
1577 #  define VG_CLREQ_RET        guest_R3
1578 #elif defined (VGA_s390x)
1579 #  define VG_CLREQ_ARGS       guest_r2
1580 #  define VG_CLREQ_RET        guest_r3
1581 #elif defined(VGA_mips32)
1582 #  define VG_CLREQ_ARGS       guest_r12
1583 #  define VG_CLREQ_RET        guest_r11
1584 #else
1585 #  error Unknown arch
1586 #endif
1587 
1588 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1589 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1590 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1591 
1592 // These macros write a value to a client's thread register, and tell the
1593 // tool that it's happened (if necessary).
1594 
1595 #define SET_CLREQ_RETVAL(zztid, zzval) \
1596    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1597         VG_TRACK( post_reg_write, \
1598                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1599    } while (0)
1600 
1601 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1602    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1603         VG_TRACK( post_reg_write_clientcall_return, \
1604                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1605    } while (0)
1606 
1607 
1608 /* ---------------------------------------------------------------------
1609    Handle client requests.
1610    ------------------------------------------------------------------ */
1611 
1612 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1613 static Bool os_client_request(ThreadId tid, UWord *args)
1614 {
1615    Bool handled = True;
1616 
1617    vg_assert(VG_(is_running_thread)(tid));
1618 
1619    switch(args[0]) {
1620    case VG_USERREQ__LIBC_FREERES_DONE:
1621       /* This is equivalent to an exit() syscall, but we don't set the
1622 	 exitcode (since it might already be set) */
1623       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1624          VG_(message)(Vg_DebugMsg,
1625                       "__libc_freeres() done; really quitting!\n");
1626       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1627       break;
1628 
1629    default:
1630       handled = False;
1631       break;
1632    }
1633 
1634    return handled;
1635 }
1636 
1637 
1638 /* Do a client request for the thread tid.  After the request, tid may
1639    or may not still be runnable; if not, the scheduler will have to
1640    choose a new thread to run.
1641 */
1642 static
do_client_request(ThreadId tid)1643 void do_client_request ( ThreadId tid )
1644 {
1645    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1646    UWord req_no = arg[0];
1647 
1648    if (0)
1649       VG_(printf)("req no = 0x%llx, arg = %p\n", (ULong)req_no, arg);
1650    switch (req_no) {
1651 
1652       case VG_USERREQ__CLIENT_CALL0: {
1653          UWord (*f)(ThreadId) = (void*)arg[1];
1654 	 if (f == NULL)
1655 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1656 	 else
1657 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1658          break;
1659       }
1660       case VG_USERREQ__CLIENT_CALL1: {
1661          UWord (*f)(ThreadId, UWord) = (void*)arg[1];
1662 	 if (f == NULL)
1663 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1664 	 else
1665 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1666          break;
1667       }
1668       case VG_USERREQ__CLIENT_CALL2: {
1669          UWord (*f)(ThreadId, UWord, UWord) = (void*)arg[1];
1670 	 if (f == NULL)
1671 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1672 	 else
1673 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1674          break;
1675       }
1676       case VG_USERREQ__CLIENT_CALL3: {
1677          UWord (*f)(ThreadId, UWord, UWord, UWord) = (void*)arg[1];
1678 	 if (f == NULL)
1679 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1680 	 else
1681 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1682          break;
1683       }
1684 
1685       // Nb: this looks like a circular definition, because it kind of is.
1686       // See comment in valgrind.h to understand what's going on.
1687       case VG_USERREQ__RUNNING_ON_VALGRIND:
1688          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1689          break;
1690 
1691       case VG_USERREQ__PRINTF: {
1692          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1693             _VALIST_BY_REF version instead */
1694          if (sizeof(va_list) != sizeof(UWord))
1695             goto va_list_casting_error_NORETURN;
1696          union {
1697             va_list vargs;
1698             unsigned long uw;
1699          } u;
1700          u.uw = (unsigned long)arg[2];
1701          Int count =
1702             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1703          VG_(message_flush)();
1704          SET_CLREQ_RETVAL( tid, count );
1705          break;
1706       }
1707 
1708       case VG_USERREQ__PRINTF_BACKTRACE: {
1709          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1710             _VALIST_BY_REF version instead */
1711          if (sizeof(va_list) != sizeof(UWord))
1712             goto va_list_casting_error_NORETURN;
1713          union {
1714             va_list vargs;
1715             unsigned long uw;
1716          } u;
1717          u.uw = (unsigned long)arg[2];
1718          Int count =
1719             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], u.vargs );
1720          VG_(message_flush)();
1721          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1722          SET_CLREQ_RETVAL( tid, count );
1723          break;
1724       }
1725 
1726       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1727          va_list* vargsp = (va_list*)arg[2];
1728          Int count =
1729             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1730          VG_(message_flush)();
1731          SET_CLREQ_RETVAL( tid, count );
1732          break;
1733       }
1734 
1735       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1736          va_list* vargsp = (va_list*)arg[2];
1737          Int count =
1738             VG_(vmessage)( Vg_ClientMsg, (char *)arg[1], *vargsp );
1739          VG_(message_flush)();
1740          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1741          SET_CLREQ_RETVAL( tid, count );
1742          break;
1743       }
1744 
1745       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1746          va_list* vargsp = (va_list*)arg[2];
1747          Int count =
1748             VG_(vmessage)( Vg_DebugMsg, (char *)arg[1], *vargsp );
1749          VG_(message_flush)();
1750          SET_CLREQ_RETVAL( tid, count );
1751          break;
1752       }
1753 
1754       case VG_USERREQ__ADD_IFUNC_TARGET: {
1755          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1756          SET_CLREQ_RETVAL( tid, 0);
1757          break; }
1758 
1759       case VG_USERREQ__STACK_REGISTER: {
1760          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1761          SET_CLREQ_RETVAL( tid, sid );
1762          break; }
1763 
1764       case VG_USERREQ__STACK_DEREGISTER: {
1765          VG_(deregister_stack)(arg[1]);
1766          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1767          break; }
1768 
1769       case VG_USERREQ__STACK_CHANGE: {
1770          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1771          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1772          break; }
1773 
1774       case VG_USERREQ__GET_MALLOCFUNCS: {
1775 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1776 
1777 	 info->tl_malloc               = VG_(tdict).tool_malloc;
1778 	 info->tl_calloc               = VG_(tdict).tool_calloc;
1779 	 info->tl_realloc              = VG_(tdict).tool_realloc;
1780 	 info->tl_memalign             = VG_(tdict).tool_memalign;
1781 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1782 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1783 	 info->tl_free                 = VG_(tdict).tool_free;
1784 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1785 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1786          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1787 
1788 	 info->mallinfo                = VG_(mallinfo);
1789 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
1790 
1791          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1792 
1793 	 break;
1794       }
1795 
1796       /* Requests from the client program */
1797 
1798       case VG_USERREQ__DISCARD_TRANSLATIONS:
1799          if (VG_(clo_verbosity) > 2)
1800             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
1801                          " addr %p,  len %lu\n",
1802                          (void*)arg[1], arg[2] );
1803 
1804          VG_(discard_translations)(
1805             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
1806          );
1807 
1808          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1809 	 break;
1810 
1811       case VG_USERREQ__COUNT_ERRORS:
1812          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
1813          break;
1814 
1815       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
1816          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
1817          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1818          break;
1819 
1820       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
1821          Addr   ip    = arg[1];
1822          UChar* buf64 = (UChar*)arg[2];
1823 
1824          VG_(memset)(buf64, 0, 64);
1825          UInt linenum = 0;
1826          Bool ok = VG_(get_filename_linenum)(
1827                       ip, &buf64[0], 50, NULL, 0, NULL, &linenum
1828                    );
1829          if (ok) {
1830             /* Find the terminating zero in the first 50 bytes. */
1831             UInt i;
1832             for (i = 0; i < 50; i++) {
1833                if (buf64[i] == 0)
1834                   break;
1835             }
1836             /* We must find a zero somewhere in 0 .. 49.  Else
1837                VG_(get_filename_linenum) is not properly zero
1838                terminating. */
1839             vg_assert(i < 50);
1840             VG_(sprintf)(&buf64[i], ":%u", linenum);
1841          } else {
1842             buf64[0] = 0;
1843          }
1844 
1845          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1846          break;
1847       }
1848 
1849       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
1850          Word delta = arg[1];
1851          vg_assert(delta == 1 || delta == -1);
1852          ThreadState* tst = VG_(get_ThreadState)(tid);
1853          vg_assert(tst);
1854          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
1855             tst->err_disablement_level++;
1856          }
1857          else
1858          if (delta == -1 && tst->err_disablement_level > 0) {
1859             tst->err_disablement_level--;
1860          }
1861          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
1862          break;
1863       }
1864 
1865       case VG_USERREQ__MALLOCLIKE_BLOCK:
1866       case VG_USERREQ__RESIZEINPLACE_BLOCK:
1867       case VG_USERREQ__FREELIKE_BLOCK:
1868          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
1869          if (!arg[1]) {
1870             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1871             break;
1872          } else {
1873             goto my_default;
1874          }
1875 
1876       default:
1877        my_default:
1878 	 if (os_client_request(tid, arg)) {
1879 	    // do nothing, os_client_request() handled it
1880          } else if (VG_(needs).client_requests) {
1881 	    UWord ret;
1882 
1883             if (VG_(clo_verbosity) > 2)
1884                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
1885                            arg[0], (void*)arg[1], arg[2] );
1886 
1887 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
1888 	       SET_CLREQ_RETVAL(tid, ret);
1889          } else {
1890 	    static Bool whined = False;
1891 
1892 	    if (!whined && VG_(clo_verbosity) > 2) {
1893                // Allow for requests in core, but defined by tools, which
1894                // have 0 and 0 in their two high bytes.
1895                Char c1 = (arg[0] >> 24) & 0xff;
1896                Char c2 = (arg[0] >> 16) & 0xff;
1897                if (c1 == 0) c1 = '_';
1898                if (c2 == 0) c2 = '_';
1899 	       VG_(message)(Vg_UserMsg, "Warning:\n"
1900                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
1901 		   "  VG_(needs).client_requests should be set?\n",
1902 			    arg[0], c1, c2, arg[0] & 0xffff);
1903 	       whined = True;
1904 	    }
1905          }
1906          break;
1907    }
1908    return;
1909 
1910    /*NOTREACHED*/
1911   va_list_casting_error_NORETURN:
1912    VG_(umsg)(
1913       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
1914       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
1915       "on a platform where they cannot be supported.  Please use the\n"
1916       "equivalent _VALIST_BY_REF versions instead.\n"
1917       "\n"
1918       "This is a binary-incompatible change in Valgrind's client request\n"
1919       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
1920       "are expected to almost never see this message.  The only case in\n"
1921       "which you might see this message is if your code uses the macros\n"
1922       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
1923       "to recompile such code, using the header files from this version of\n"
1924       "Valgrind, and not any previous version.\n"
1925       "\n"
1926       "If you see this mesage in any other circumstances, it is probably\n"
1927       "a bug in Valgrind.  In this case, please file a bug report at\n"
1928       "\n"
1929       "   http://www.valgrind.org/support/bug_reports.html\n"
1930       "\n"
1931       "Will now abort.\n"
1932    );
1933    vg_assert(0);
1934 }
1935 
1936 
1937 /* ---------------------------------------------------------------------
1938    Sanity checking (permanently engaged)
1939    ------------------------------------------------------------------ */
1940 
1941 /* Internal consistency checks on the sched structures. */
1942 static
scheduler_sanity(ThreadId tid)1943 void scheduler_sanity ( ThreadId tid )
1944 {
1945    Bool bad = False;
1946    static UInt lasttime = 0;
1947    UInt now;
1948    Int lwpid = VG_(gettid)();
1949 
1950    if (!VG_(is_running_thread)(tid)) {
1951       VG_(message)(Vg_DebugMsg,
1952 		   "Thread %d is supposed to be running, "
1953                    "but doesn't own the_BigLock (owned by %d)\n",
1954 		   tid, VG_(running_tid));
1955       bad = True;
1956    }
1957 
1958    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
1959       VG_(message)(Vg_DebugMsg,
1960                    "Thread %d supposed to be in LWP %d, but we're actually %d\n",
1961                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
1962       bad = True;
1963    }
1964 
1965    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
1966       VG_(message)(Vg_DebugMsg,
1967                    "Thread (LWPID) %d doesn't own the_BigLock\n",
1968                    tid);
1969       bad = True;
1970    }
1971 
1972    /* Periodically show the state of all threads, for debugging
1973       purposes. */
1974    now = VG_(read_millisecond_timer)();
1975    if (0 && (!bad) && (lasttime + 4000/*ms*/ <= now)) {
1976       lasttime = now;
1977       VG_(printf)("\n------------ Sched State at %d ms ------------\n",
1978                   (Int)now);
1979       VG_(show_sched_status)();
1980    }
1981 
1982    /* core_panic also shows the sched status, which is why we don't
1983       show it above if bad==True. */
1984    if (bad)
1985       VG_(core_panic)("scheduler_sanity: failed");
1986 }
1987 
VG_(sanity_check_general)1988 void VG_(sanity_check_general) ( Bool force_expensive )
1989 {
1990    ThreadId tid;
1991 
1992    static UInt next_slow_check_at = 1;
1993    static UInt slow_check_interval = 25;
1994 
1995    if (VG_(clo_sanity_level) < 1) return;
1996 
1997    /* --- First do all the tests that we can do quickly. ---*/
1998 
1999    sanity_fast_count++;
2000 
2001    /* Check stuff pertaining to the memory check system. */
2002 
2003    /* Check that nobody has spuriously claimed that the first or
2004       last 16 pages of memory have become accessible [...] */
2005    if (VG_(needs).sanity_checks) {
2006       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2007    }
2008 
2009    /* --- Now some more expensive checks. ---*/
2010 
2011    /* Once every now and again, check some more expensive stuff.
2012       Gradually increase the interval between such checks so as not to
2013       burden long-running programs too much. */
2014    if ( force_expensive
2015         || VG_(clo_sanity_level) > 1
2016         || (VG_(clo_sanity_level) == 1
2017             && sanity_fast_count == next_slow_check_at)) {
2018 
2019       if (0) VG_(printf)("SLOW at %d\n", sanity_fast_count-1);
2020 
2021       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2022       slow_check_interval++;
2023       sanity_slow_count++;
2024 
2025       if (VG_(needs).sanity_checks) {
2026           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2027       }
2028 
2029       /* Look for stack overruns.  Visit all threads. */
2030       for (tid = 1; tid < VG_N_THREADS; tid++) {
2031 	 SizeT    remains;
2032          VgStack* stack;
2033 
2034 	 if (VG_(threads)[tid].status == VgTs_Empty ||
2035 	     VG_(threads)[tid].status == VgTs_Zombie)
2036 	    continue;
2037 
2038          stack
2039             = (VgStack*)
2040               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2041          SizeT limit
2042             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2043 	 remains
2044             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2045 	 if (remains < limit)
2046 	    VG_(message)(Vg_DebugMsg,
2047                          "WARNING: Thread %d is within %ld bytes "
2048                          "of running out of stack!\n",
2049 		         tid, remains);
2050       }
2051    }
2052 
2053    if (VG_(clo_sanity_level) > 1) {
2054       /* Check sanity of the low-level memory manager.  Note that bugs
2055          in the client's code can cause this to fail, so we don't do
2056          this check unless specially asked for.  And because it's
2057          potentially very expensive. */
2058       VG_(sanity_check_malloc_all)();
2059    }
2060 }
2061 
2062 /*--------------------------------------------------------------------*/
2063 /*--- end                                                          ---*/
2064 /*--------------------------------------------------------------------*/
2065