• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Thread scheduling.                               scheduler.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2017 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 /*
32    Overview
33 
34    Valgrind tries to emulate the kernel's threading as closely as
35    possible.  The client does all threading via the normal syscalls
36    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
37    the same process structure as would be created without Valgrind.
38    There are no extra threads.
39 
40    The main difference is that Valgrind only allows one client thread
41    to run at once.  This is controlled with the CPU Big Lock,
42    "the_BigLock".  Any time a thread wants to run client code or
43    manipulate any shared state (which is anything other than its own
44    ThreadState entry), it must hold the_BigLock.
45 
46    When a thread is about to block in a blocking syscall, it releases
47    the_BigLock, and re-takes it when it becomes runnable again (either
48    because the syscall finished, or we took a signal).
49 
50    VG_(scheduler) therefore runs in each thread.  It returns only when
51    the thread is exiting, either because it exited itself, or it was
52    told to exit by another thread.
53 
54    This file is almost entirely OS-independent.  The details of how
55    the OS handles threading and signalling are abstracted away and
56    implemented elsewhere.  [Some of the functions have worked their
57    way back for the moment, until we do an OS port in earnest...]
58 */
59 
60 
61 #include "pub_core_basics.h"
62 #include "pub_core_debuglog.h"
63 #include "pub_core_vki.h"
64 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
65 #include "pub_core_threadstate.h"
66 #include "pub_core_clientstate.h"
67 #include "pub_core_aspacemgr.h"
68 #include "pub_core_clreq.h"      // for VG_USERREQ__*
69 #include "pub_core_dispatch.h"
70 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
71 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
72 #include "pub_core_libcbase.h"
73 #include "pub_core_libcassert.h"
74 #include "pub_core_libcprint.h"
75 #include "pub_core_libcproc.h"
76 #include "pub_core_libcsignal.h"
77 #if defined(VGO_darwin)
78 #include "pub_core_mach.h"
79 #endif
80 #include "pub_core_machine.h"
81 #include "pub_core_mallocfree.h"
82 #include "pub_core_options.h"
83 #include "pub_core_replacemalloc.h"
84 #include "pub_core_sbprofile.h"
85 #include "pub_core_signals.h"
86 #include "pub_core_stacks.h"
87 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
88 #include "pub_core_syscall.h"
89 #include "pub_core_syswrap.h"
90 #include "pub_core_tooliface.h"
91 #include "pub_core_translate.h"     // For VG_(translate)()
92 #include "pub_core_transtab.h"
93 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
94 #include "priv_sched-lock.h"
95 #include "pub_core_scheduler.h"     // self
96 #include "pub_core_redir.h"
97 #include "libvex_emnote.h"          // VexEmNote
98 
99 
100 /* ---------------------------------------------------------------------
101    Types and globals for the scheduler.
102    ------------------------------------------------------------------ */
103 
104 /* ThreadId and ThreadState are defined elsewhere*/
105 
106 /* Defines the thread-scheduling timeslice, in terms of the number of
107    basic blocks we attempt to run each thread for.  Smaller values
108    give finer interleaving but much increased scheduling overheads. */
109 #define SCHEDULING_QUANTUM   100000
110 
111 /* If False, a fault is Valgrind-internal (ie, a bug) */
112 Bool VG_(in_generated_code) = False;
113 
114 /* 64-bit counter for the number of basic blocks done. */
115 static ULong bbs_done = 0;
116 
117 /* Counter to see if vgdb activity is to be verified.
118    When nr of bbs done reaches vgdb_next_poll, scheduler will
119    poll for gdbserver activity. VG_(force_vgdb_poll) and
120    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
121    to control when the next poll will be done. */
122 static ULong vgdb_next_poll;
123 
124 /* Forwards */
125 static void do_client_request ( ThreadId tid );
126 static void scheduler_sanity ( ThreadId tid );
127 static void mostly_clear_thread_record ( ThreadId tid );
128 
129 /* Stats. */
130 static ULong n_scheduling_events_MINOR = 0;
131 static ULong n_scheduling_events_MAJOR = 0;
132 
133 /* Stats: number of XIndirs, and number that missed in the fast
134    cache. */
135 static ULong stats__n_xindirs = 0;
136 static ULong stats__n_xindir_misses = 0;
137 
138 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
139    have to do 64 bit incs on the hot path through
140    VG_(cp_disp_xindir). */
141 /*global*/ UInt VG_(stats__n_xindirs_32) = 0;
142 /*global*/ UInt VG_(stats__n_xindir_misses_32) = 0;
143 
144 /* Sanity checking counts. */
145 static UInt sanity_fast_count = 0;
146 static UInt sanity_slow_count = 0;
147 
VG_(print_scheduler_stats)148 void VG_(print_scheduler_stats)(void)
149 {
150    VG_(message)(Vg_DebugMsg,
151       "scheduler: %'llu event checks.\n", bbs_done );
152    VG_(message)(Vg_DebugMsg,
153                 "scheduler: %'llu indir transfers, %'llu misses (1 in %llu)\n",
154                 stats__n_xindirs, stats__n_xindir_misses,
155                 stats__n_xindirs / (stats__n_xindir_misses
156                                     ? stats__n_xindir_misses : 1));
157    VG_(message)(Vg_DebugMsg,
158       "scheduler: %'llu/%'llu major/minor sched events.\n",
159       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
160    VG_(message)(Vg_DebugMsg,
161                 "   sanity: %u cheap, %u expensive checks.\n",
162                 sanity_fast_count, sanity_slow_count );
163 }
164 
165 /*
166  * Mutual exclusion object used to serialize threads.
167  */
168 static struct sched_lock *the_BigLock;
169 
170 
171 /* ---------------------------------------------------------------------
172    Helper functions for the scheduler.
173    ------------------------------------------------------------------ */
174 
175 static
print_sched_event(ThreadId tid,const HChar * what)176 void print_sched_event ( ThreadId tid, const HChar* what )
177 {
178    VG_(message)(Vg_DebugMsg, "  SCHED[%u]: %s\n", tid, what );
179 }
180 
181 /* For showing SB profiles, if the user asks to see them. */
182 static
maybe_show_sb_profile(void)183 void maybe_show_sb_profile ( void )
184 {
185    /* DO NOT MAKE NON-STATIC */
186    static ULong bbs_done_lastcheck = 0;
187    /* */
188    vg_assert(VG_(clo_profyle_interval) > 0);
189    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
190    vg_assert(delta >= 0);
191    if ((ULong)delta >= VG_(clo_profyle_interval)) {
192       bbs_done_lastcheck = bbs_done;
193       VG_(get_and_show_SB_profile)(bbs_done);
194    }
195 }
196 
197 static
name_of_sched_event(UInt event)198 const HChar* name_of_sched_event ( UInt event )
199 {
200    switch (event) {
201       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
202       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
203       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
204       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
205       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
206       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
207       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
208       case VEX_TRC_JMP_SIGFPE_INTOVF:
209       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
210       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
211       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
212       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
213       case VEX_TRC_JMP_YIELD:          return "YIELD";
214       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
215       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
216       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
217       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
218       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
219       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
220       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
221       case VEX_TRC_JMP_SYS_INT145:     return "INT145";
222       case VEX_TRC_JMP_SYS_INT210:     return "INT210";
223       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
224       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
225 
226       case VG_TRC_BORING:              return "VG_BORING";
227       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
228       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
229       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
230       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
231       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
232       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
233       default:                         return "??UNKNOWN??";
234   }
235 }
236 
237 /* Allocate a completely empty ThreadState record. */
VG_(alloc_ThreadState)238 ThreadId VG_(alloc_ThreadState) ( void )
239 {
240    Int i;
241    for (i = 1; i < VG_N_THREADS; i++) {
242       if (VG_(threads)[i].status == VgTs_Empty) {
243 	 VG_(threads)[i].status = VgTs_Init;
244 	 VG_(threads)[i].exitreason = VgSrc_None;
245          if (VG_(threads)[i].thread_name)
246             VG_(free)(VG_(threads)[i].thread_name);
247          VG_(threads)[i].thread_name = NULL;
248          return i;
249       }
250    }
251    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
252                "and rerun valgrind\n");
253    VG_(core_panic)("Max number of threads is too low");
254    /*NOTREACHED*/
255 }
256 
257 /*
258    Mark a thread as Runnable.  This will block until the_BigLock is
259    available, so that we get exclusive access to all the shared
260    structures and the CPU.  Up until we get the_BigLock, we must not
261    touch any shared state.
262 
263    When this returns, we'll actually be running.
264  */
VG_(acquire_BigLock)265 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
266 {
267    ThreadState *tst;
268 
269 #if 0
270    if (VG_(clo_trace_sched)) {
271       HChar buf[VG_(strlen)(who) + 30];
272       VG_(sprintf)(buf, "waiting for lock (%s)", who);
273       print_sched_event(tid, buf);
274    }
275 #endif
276 
277    /* First, acquire the_BigLock.  We can't do anything else safely
278       prior to this point.  Even doing debug printing prior to this
279       point is, technically, wrong. */
280    VG_(acquire_BigLock_LL)(NULL);
281 
282    tst = VG_(get_ThreadState)(tid);
283 
284    vg_assert(tst->status != VgTs_Runnable);
285 
286    tst->status = VgTs_Runnable;
287 
288    if (VG_(running_tid) != VG_INVALID_THREADID)
289       VG_(printf)("tid %u found %u running\n", tid, VG_(running_tid));
290    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
291    VG_(running_tid) = tid;
292 
293    { Addr gsp = VG_(get_SP)(tid);
294       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
295          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
296       else
297          VG_(unknown_SP_update)(gsp, gsp);
298    }
299 
300    if (VG_(clo_trace_sched)) {
301       HChar buf[VG_(strlen)(who) + 30];
302       VG_(sprintf)(buf, " acquired lock (%s)", who);
303       print_sched_event(tid, buf);
304    }
305 }
306 
307 /*
308    Set a thread into a sleeping state, and give up exclusive access to
309    the CPU.  On return, the thread must be prepared to block until it
310    is ready to run again (generally this means blocking in a syscall,
311    but it may mean that we remain in a Runnable state and we're just
312    yielding the CPU to another thread).
313  */
VG_(release_BigLock)314 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
315                           const HChar* who)
316 {
317    ThreadState *tst = VG_(get_ThreadState)(tid);
318 
319    vg_assert(tst->status == VgTs_Runnable);
320 
321    vg_assert(sleepstate == VgTs_WaitSys ||
322 	     sleepstate == VgTs_Yielding);
323 
324    tst->status = sleepstate;
325 
326    vg_assert(VG_(running_tid) == tid);
327    VG_(running_tid) = VG_INVALID_THREADID;
328 
329    if (VG_(clo_trace_sched)) {
330       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
331       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
332       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
333       print_sched_event(tid, buf);
334    }
335 
336    /* Release the_BigLock; this will reschedule any runnable
337       thread. */
338    VG_(release_BigLock_LL)(NULL);
339 }
340 
init_BigLock(void)341 static void init_BigLock(void)
342 {
343    vg_assert(!the_BigLock);
344    the_BigLock = ML_(create_sched_lock)();
345 }
346 
deinit_BigLock(void)347 static void deinit_BigLock(void)
348 {
349    ML_(destroy_sched_lock)(the_BigLock);
350    the_BigLock = NULL;
351 }
352 
353 /* See pub_core_scheduler.h for description */
VG_(acquire_BigLock_LL)354 void VG_(acquire_BigLock_LL) ( const HChar* who )
355 {
356    ML_(acquire_sched_lock)(the_BigLock);
357 }
358 
359 /* See pub_core_scheduler.h for description */
VG_(release_BigLock_LL)360 void VG_(release_BigLock_LL) ( const HChar* who )
361 {
362    ML_(release_sched_lock)(the_BigLock);
363 }
364 
VG_(owns_BigLock_LL)365 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
366 {
367    return (ML_(get_sched_lock_owner)(the_BigLock)
368            == VG_(threads)[tid].os_state.lwpid);
369 }
370 
371 
372 /* Clear out the ThreadState and release the semaphore. Leaves the
373    ThreadState in VgTs_Zombie state, so that it doesn't get
374    reallocated until the caller is really ready. */
VG_(exit_thread)375 void VG_(exit_thread)(ThreadId tid)
376 {
377    vg_assert(VG_(is_valid_tid)(tid));
378    vg_assert(VG_(is_running_thread)(tid));
379    vg_assert(VG_(is_exiting)(tid));
380 
381    mostly_clear_thread_record(tid);
382    VG_(running_tid) = VG_INVALID_THREADID;
383 
384    /* There should still be a valid exitreason for this thread */
385    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
386 
387    if (VG_(clo_trace_sched))
388       print_sched_event(tid, "release lock in VG_(exit_thread)");
389 
390    VG_(release_BigLock_LL)(NULL);
391 }
392 
393 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
394    out of the syscall and onto doing the next thing, whatever that is.
395    If it isn't blocked in a syscall, has no effect on the thread. */
VG_(get_thread_out_of_syscall)396 void VG_(get_thread_out_of_syscall)(ThreadId tid)
397 {
398    vg_assert(VG_(is_valid_tid)(tid));
399    vg_assert(!VG_(is_running_thread)(tid));
400 
401    if (VG_(threads)[tid].status == VgTs_WaitSys) {
402       if (VG_(clo_trace_signals)) {
403 	 VG_(message)(Vg_DebugMsg,
404                       "get_thread_out_of_syscall zaps tid %u lwp %d\n",
405 		      tid, VG_(threads)[tid].os_state.lwpid);
406       }
407 #     if defined(VGO_darwin)
408       {
409          // GrP fixme use mach primitives on darwin?
410          // GrP fixme thread_abort_safely?
411          // GrP fixme race for thread with WaitSys set but not in syscall yet?
412          extern kern_return_t thread_abort(mach_port_t);
413          thread_abort(VG_(threads)[tid].os_state.lwpid);
414       }
415 #     else
416       {
417          __attribute__((unused))
418          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
419          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
420             I'm really not sure.  Here's a race scenario which argues
421             that we shoudn't; but equally I'm not sure the scenario is
422             even possible, because of constraints caused by the question
423             of who holds the BigLock when.
424 
425             Target thread tid does sys_read on a socket and blocks.  This
426             function gets called, and we observe correctly that tid's
427             status is WaitSys but then for whatever reason this function
428             goes very slowly for a while.  Then data arrives from
429             wherever, tid's sys_read returns, tid exits.  Then we do
430             tkill on tid, but tid no longer exists; tkill returns an
431             error code and the assert fails. */
432          /* vg_assert(r == 0); */
433       }
434 #     endif
435    }
436 }
437 
438 /*
439    Yield the CPU for a short time to let some other thread run.
440  */
VG_(vg_yield)441 void VG_(vg_yield)(void)
442 {
443    ThreadId tid = VG_(running_tid);
444 
445    vg_assert(tid != VG_INVALID_THREADID);
446    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
447 
448    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
449 
450    /*
451       Tell the kernel we're yielding.
452     */
453 #  if defined(VGO_linux) || defined(VGO_darwin)
454    VG_(do_syscall0)(__NR_sched_yield);
455 #  elif defined(VGO_solaris)
456    VG_(do_syscall0)(__NR_yield);
457 #  else
458 #    error Unknown OS
459 #  endif
460 
461    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
462 }
463 
464 
465 /* Set the standard set of blocked signals, used whenever we're not
466    running a client syscall. */
block_signals(void)467 static void block_signals(void)
468 {
469    vki_sigset_t mask;
470 
471    VG_(sigfillset)(&mask);
472 
473    /* Don't block these because they're synchronous */
474    VG_(sigdelset)(&mask, VKI_SIGSEGV);
475    VG_(sigdelset)(&mask, VKI_SIGBUS);
476    VG_(sigdelset)(&mask, VKI_SIGFPE);
477    VG_(sigdelset)(&mask, VKI_SIGILL);
478    VG_(sigdelset)(&mask, VKI_SIGTRAP);
479 
480    /* Can't block these anyway */
481    VG_(sigdelset)(&mask, VKI_SIGSTOP);
482    VG_(sigdelset)(&mask, VKI_SIGKILL);
483 
484    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
485 }
486 
os_state_clear(ThreadState * tst)487 static void os_state_clear(ThreadState *tst)
488 {
489    tst->os_state.lwpid       = 0;
490    tst->os_state.threadgroup = 0;
491    tst->os_state.stk_id = NULL_STK_ID;
492 #  if defined(VGO_linux)
493    /* no other fields to clear */
494 #  elif defined(VGO_darwin)
495    tst->os_state.post_mach_trap_fn = NULL;
496    tst->os_state.pthread           = 0;
497    tst->os_state.func_arg          = 0;
498    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
499    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
500    tst->os_state.wq_jmpbuf_valid   = False;
501    tst->os_state.remote_port       = 0;
502    tst->os_state.msgh_id           = 0;
503    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
504 #  elif defined(VGO_solaris)
505 #  if defined(VGP_x86_solaris)
506    tst->os_state.thrptr = 0;
507 #  endif
508    tst->os_state.ustack = NULL;
509    tst->os_state.in_door_return = False;
510    tst->os_state.door_return_procedure = 0;
511    tst->os_state.oldcontext = NULL;
512    tst->os_state.schedctl_data = 0;
513    tst->os_state.daemon_thread = False;
514 #  else
515 #    error "Unknown OS"
516 #  endif
517 }
518 
os_state_init(ThreadState * tst)519 static void os_state_init(ThreadState *tst)
520 {
521    tst->os_state.valgrind_stack_base    = 0;
522    tst->os_state.valgrind_stack_init_SP = 0;
523    os_state_clear(tst);
524 }
525 
526 static
mostly_clear_thread_record(ThreadId tid)527 void mostly_clear_thread_record ( ThreadId tid )
528 {
529    vki_sigset_t savedmask;
530 
531    vg_assert(tid >= 0 && tid < VG_N_THREADS);
532    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
533    VG_(threads)[tid].tid = tid;
534 
535    /* Leave the thread in Zombie, so that it doesn't get reallocated
536       until the caller is finally done with the thread stack. */
537    VG_(threads)[tid].status               = VgTs_Zombie;
538 
539    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
540    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
541 
542    os_state_clear(&VG_(threads)[tid]);
543 
544    /* start with no altstack */
545    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
546    VG_(threads)[tid].altstack.ss_size = 0;
547    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
548 
549    VG_(clear_out_queued_signals)(tid, &savedmask);
550 
551    VG_(threads)[tid].sched_jmpbuf_valid = False;
552 }
553 
554 /*
555    Called in the child after fork.  If the parent has multiple
556    threads, then we've inherited a VG_(threads) array describing them,
557    but only the thread which called fork() is actually alive in the
558    child.  This functions needs to clean up all those other thread
559    structures.
560 
561    Whichever tid in the parent which called fork() becomes the
562    master_tid in the child.  That's because the only living slot in
563    VG_(threads) in the child after fork is VG_(threads)[tid], and it
564    would be too hard to try to re-number the thread and relocate the
565    thread state down to VG_(threads)[1].
566 
567    This function also needs to reinitialize the_BigLock, since
568    otherwise we may end up sharing its state with the parent, which
569    would be deeply confusing.
570 */
sched_fork_cleanup(ThreadId me)571 static void sched_fork_cleanup(ThreadId me)
572 {
573    ThreadId tid;
574    vg_assert(VG_(running_tid) == me);
575 
576 #  if defined(VGO_darwin)
577    // GrP fixme hack reset Mach ports
578    VG_(mach_init)();
579 #  endif
580 
581    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
582    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
583 
584    /* clear out all the unused thread slots */
585    for (tid = 1; tid < VG_N_THREADS; tid++) {
586       if (tid != me) {
587          mostly_clear_thread_record(tid);
588 	 VG_(threads)[tid].status = VgTs_Empty;
589          VG_(clear_syscallInfo)(tid);
590       }
591    }
592 
593    /* re-init and take the sema */
594    deinit_BigLock();
595    init_BigLock();
596    VG_(acquire_BigLock_LL)(NULL);
597 }
598 
599 
600 /* First phase of initialisation of the scheduler.  Initialise the
601    bigLock, zeroise the VG_(threads) structure and decide on the
602    ThreadId of the root thread.
603 */
VG_(scheduler_init_phase1)604 ThreadId VG_(scheduler_init_phase1) ( void )
605 {
606    Int i;
607    ThreadId tid_main;
608 
609    VG_(debugLog)(1,"sched","sched_init_phase1\n");
610 
611    if (VG_(clo_fair_sched) != disable_fair_sched
612        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
613        && VG_(clo_fair_sched) == enable_fair_sched)
614    {
615       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
616       VG_(exit)(1);
617    }
618 
619    if (VG_(clo_verbosity) > 1) {
620       VG_(message)(Vg_DebugMsg,
621                    "Scheduler: using %s scheduler lock implementation.\n",
622                    ML_(get_sched_lock_name)());
623    }
624 
625    init_BigLock();
626 
627    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
628       /* Paranoia .. completely zero it out. */
629       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
630 
631       VG_(threads)[i].sig_queue = NULL;
632 
633       os_state_init(&VG_(threads)[i]);
634       mostly_clear_thread_record(i);
635 
636       VG_(threads)[i].status                    = VgTs_Empty;
637       VG_(threads)[i].client_stack_szB          = 0;
638       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
639       VG_(threads)[i].err_disablement_level     = 0;
640       VG_(threads)[i].thread_name               = NULL;
641    }
642 
643    tid_main = VG_(alloc_ThreadState)();
644 
645    /* Bleh.  Unfortunately there are various places in the system that
646       assume that the main thread has a ThreadId of 1.
647       - Helgrind (possibly)
648       - stack overflow message in default_action() in m_signals.c
649       - definitely a lot more places
650    */
651    vg_assert(tid_main == 1);
652 
653    return tid_main;
654 }
655 
656 
657 /* Second phase of initialisation of the scheduler.  Given the root
658    ThreadId computed by first phase of initialisation, fill in stack
659    details and acquire bigLock.  Initialise the scheduler.  This is
660    called at startup.  The caller subsequently initialises the guest
661    state components of this main thread.
662 */
VG_(scheduler_init_phase2)663 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
664                                   Addr     clstack_end,
665                                   SizeT    clstack_size )
666 {
667    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%u, "
668                    "cls_end=0x%lx, cls_sz=%lu\n",
669                    tid_main, clstack_end, clstack_size);
670 
671    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
672    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
673 
674    VG_(threads)[tid_main].client_stack_highest_byte
675       = clstack_end;
676    VG_(threads)[tid_main].client_stack_szB
677       = clstack_size;
678 
679    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
680 }
681 
682 
683 /* ---------------------------------------------------------------------
684    Helpers for running translations.
685    ------------------------------------------------------------------ */
686 
687 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
688    mask state, but does need to pass "val" through.  jumped must be a
689    volatile UWord. */
690 #define SCHEDSETJMP(tid, jumped, stmt)					\
691    do {									\
692       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);	\
693 									\
694       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
695       if ((jumped) == ((UWord)0)) {                                     \
696 	 vg_assert(!_qq_tst->sched_jmpbuf_valid);			\
697 	 _qq_tst->sched_jmpbuf_valid = True;				\
698 	 stmt;								\
699       }	else if (VG_(clo_trace_sched))					\
700 	 VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n",       \
701                      __LINE__, tid, jumped);                            \
702       vg_assert(_qq_tst->sched_jmpbuf_valid);				\
703       _qq_tst->sched_jmpbuf_valid = False;				\
704    } while(0)
705 
706 
707 /* Do various guest state alignment checks prior to running a thread.
708    Specifically, check that what we have matches Vex's guest state
709    layout requirements.  See libvex.h for details, but in short the
710    requirements are: There must be no holes in between the primary
711    guest state, its two copies, and the spill area.  In short, all 4
712    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
713    be placed back-to-back without holes in between. */
do_pre_run_checks(volatile ThreadState * tst)714 static void do_pre_run_checks ( volatile ThreadState* tst )
715 {
716    Addr a_vex     = (Addr) & tst->arch.vex;
717    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
718    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
719    Addr a_spill   = (Addr) & tst->arch.vex_spill;
720    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
721    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
722    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
723    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
724 
725    if (0)
726    VG_(printf)("gst %p %u, sh1 %p %u, "
727                "sh2 %p %u, spill %p %u\n",
728                (void*)a_vex, sz_vex,
729                (void*)a_vexsh1, sz_vexsh1,
730                (void*)a_vexsh2, sz_vexsh2,
731                (void*)a_spill, sz_spill );
732 
733    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
734    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
735    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
736    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
737 
738    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
739    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
740    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
741    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
742 
743    /* Check that the guest state and its two shadows have the same
744       size, and that there are no holes in between.  The latter is
745       important because Memcheck assumes that it can reliably access
746       the shadows by indexing off a pointer to the start of the
747       primary guest state area. */
748    vg_assert(sz_vex == sz_vexsh1);
749    vg_assert(sz_vex == sz_vexsh2);
750    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
751    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
752    /* Also check there's no hole between the second shadow area and
753       the spill area. */
754    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
755    vg_assert(a_vex + 3 * sz_vex == a_spill);
756 
757 #  if defined(VGA_x86)
758    /* x86 XMM regs must form an array, ie, have no holes in
759       between. */
760    vg_assert(
761       (offsetof(VexGuestX86State,guest_XMM7)
762        - offsetof(VexGuestX86State,guest_XMM0))
763       == (8/*#regs*/-1) * 16/*bytes per reg*/
764    );
765    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
766    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
767    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
768    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
769    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
770 #  endif
771 
772 #  if defined(VGA_amd64)
773    /* amd64 YMM regs must form an array, ie, have no holes in
774       between. */
775    vg_assert(
776       (offsetof(VexGuestAMD64State,guest_YMM16)
777        - offsetof(VexGuestAMD64State,guest_YMM0))
778       == (17/*#regs*/-1) * 32/*bytes per reg*/
779    );
780    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
781    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
782    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
783    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
784    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
785 #  endif
786 
787 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
788    /* ppc guest_state vector regs must be 16 byte aligned for
789       loads/stores.  This is important! */
790    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
791    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
792    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
793    /* be extra paranoid .. */
794    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
795    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
796    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
797 #  endif
798 
799 #  if defined(VGA_arm)
800    /* arm guest_state VFP regs must be 8 byte aligned for
801       loads/stores.  Let's use 16 just to be on the safe side. */
802    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
803    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
804    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
805    /* be extra paranoid .. */
806    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
807    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
808    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
809 #  endif
810 
811 #  if defined(VGA_arm64)
812    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
813    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
814    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
815    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
816    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
817    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
818 #  endif
819 
820 #  if defined(VGA_s390x)
821    /* no special requirements */
822 #  endif
823 
824 #  if defined(VGA_mips32) || defined(VGA_mips64)
825    /* no special requirements */
826 #  endif
827 }
828 
829 // NO_VGDB_POLL value ensures vgdb is not polled, while
830 // VGDB_POLL_ASAP ensures that the next scheduler call
831 // will cause a poll.
832 #define NO_VGDB_POLL    0xffffffffffffffffULL
833 #define VGDB_POLL_ASAP  0x0ULL
834 
VG_(disable_vgdb_poll)835 void VG_(disable_vgdb_poll) (void )
836 {
837    vgdb_next_poll = NO_VGDB_POLL;
838 }
VG_(force_vgdb_poll)839 void VG_(force_vgdb_poll) ( void )
840 {
841    vgdb_next_poll = VGDB_POLL_ASAP;
842 }
843 
844 /* Run the thread tid for a while, and return a VG_TRC_* value
845    indicating why VG_(disp_run_translations) stopped, and possibly an
846    auxiliary word.  Also, only allow the thread to run for at most
847    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
848    is False, we are running ordinary redir'd translations, and we
849    should therefore start by looking up the guest next IP in TT.  If
850    it is True then we ignore the guest next IP and just run from
851    alt_host_addr, which presumably points at host code for a no-redir
852    translation.
853 
854    Return results are placed in two_words.  two_words[0] is set to the
855    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
856    the address to patch is placed in two_words[1].
857 */
858 static
run_thread_for_a_while(HWord * two_words,Int * dispatchCtrP,ThreadId tid,HWord alt_host_addr,Bool use_alt_host_addr)859 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
860                               /*MOD*/Int*   dispatchCtrP,
861                               ThreadId      tid,
862                               HWord         alt_host_addr,
863                               Bool          use_alt_host_addr )
864 {
865    volatile HWord        jumped         = 0;
866    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
867    volatile Int          done_this_time = 0;
868    volatile HWord        host_code_addr = 0;
869 
870    /* Paranoia */
871    vg_assert(VG_(is_valid_tid)(tid));
872    vg_assert(VG_(is_running_thread)(tid));
873    vg_assert(!VG_(is_exiting)(tid));
874    vg_assert(*dispatchCtrP > 0);
875 
876    tst = VG_(get_ThreadState)(tid);
877    do_pre_run_checks( tst );
878    /* end Paranoia */
879 
880    /* Futz with the XIndir stats counters. */
881    vg_assert(VG_(stats__n_xindirs_32) == 0);
882    vg_assert(VG_(stats__n_xindir_misses_32) == 0);
883 
884    /* Clear return area. */
885    two_words[0] = two_words[1] = 0;
886 
887    /* Figure out where we're starting from. */
888    if (use_alt_host_addr) {
889       /* unusual case -- no-redir translation */
890       host_code_addr = alt_host_addr;
891    } else {
892       /* normal case -- redir translation */
893       UInt cno = (UInt)VG_TT_FAST_HASH((Addr)tst->arch.vex.VG_INSTR_PTR);
894       if (LIKELY(VG_(tt_fast)[cno].guest == (Addr)tst->arch.vex.VG_INSTR_PTR))
895          host_code_addr = VG_(tt_fast)[cno].host;
896       else {
897          Addr res = 0;
898          /* not found in VG_(tt_fast). Searching here the transtab
899             improves the performance compared to returning directly
900             to the scheduler. */
901          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
902                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
903                                             True/*upd cache*/
904                                             );
905          if (LIKELY(found)) {
906             host_code_addr = res;
907          } else {
908             /* At this point, we know that we intended to start at a
909                normal redir translation, but it was not found.  In
910                which case we can return now claiming it's not
911                findable. */
912             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
913             return;
914          }
915       }
916    }
917    /* We have either a no-redir or a redir translation. */
918    vg_assert(host_code_addr != 0); /* implausible */
919 
920    /* there should be no undealt-with signals */
921    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
922 
923    /* Set up event counter stuff for the run. */
924    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
925    tst->arch.vex.host_EvC_FAILADDR
926       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
927 
928    /* Invalidate any in-flight LL/SC transactions, in the case that we're
929       using the fallback LL/SC implementation.  See bugs 344524 and 369459. */
930 #  if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
931    tst->arch.vex.guest_LLaddr = (HWord)(-1);
932 #  elif defined(VGP_arm64_linux)
933    tst->arch.vex.guest_LLSC_SIZE = 0;
934 #  endif
935 
936    if (0) {
937       vki_sigset_t m;
938       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
939       vg_assert(err == 0);
940       VG_(printf)("tid %u: entering code with unblocked signals: ", tid);
941       for (i = 1; i <= _VKI_NSIG; i++)
942          if (!VG_(sigismember)(&m, i))
943             VG_(printf)("%d ", i);
944       VG_(printf)("\n");
945    }
946 
947    /* Set up return-value area. */
948 
949    // Tell the tool this thread is about to run client code
950    VG_TRACK( start_client_code, tid, bbs_done );
951 
952    vg_assert(VG_(in_generated_code) == False);
953    VG_(in_generated_code) = True;
954 
955    SCHEDSETJMP(
956       tid,
957       jumped,
958       VG_(disp_run_translations)(
959          two_words,
960          (volatile void*)&tst->arch.vex,
961          host_code_addr
962       )
963    );
964 
965    vg_assert(VG_(in_generated_code) == True);
966    VG_(in_generated_code) = False;
967 
968    if (jumped != (HWord)0) {
969       /* We get here if the client took a fault that caused our signal
970          handler to longjmp. */
971       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
972       two_words[0] = VG_TRC_FAULT_SIGNAL;
973       two_words[1] = 0;
974       block_signals();
975    }
976 
977    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
978       and zero out the 32-bit ones in preparation for the next run of
979       generated code. */
980    stats__n_xindirs += (ULong)VG_(stats__n_xindirs_32);
981    VG_(stats__n_xindirs_32) = 0;
982    stats__n_xindir_misses += (ULong)VG_(stats__n_xindir_misses_32);
983    VG_(stats__n_xindir_misses_32) = 0;
984 
985    /* Inspect the event counter. */
986    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
987    vg_assert(tst->arch.vex.host_EvC_FAILADDR
988              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
989 
990    /* The number of events done this time is the difference between
991       the event counter originally and what it is now.  Except -- if
992       it has gone negative (to -1) then the transition 0 to -1 doesn't
993       correspond to a real executed block, so back it out.  It's like
994       this because the event checks decrement the counter first and
995       check it for negativeness second, hence the 0 to -1 transition
996       causes a bailout and the block it happens in isn't executed. */
997    {
998      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
999      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
1000      if (dispatchCtrAfterwards == -1) {
1001         done_this_time--;
1002      } else {
1003         /* If the generated code drives the counter below -1, something
1004            is seriously wrong. */
1005         vg_assert(dispatchCtrAfterwards >= 0);
1006      }
1007    }
1008 
1009    vg_assert(done_this_time >= 0);
1010    bbs_done += (ULong)done_this_time;
1011 
1012    *dispatchCtrP -= done_this_time;
1013    vg_assert(*dispatchCtrP >= 0);
1014 
1015    // Tell the tool this thread has stopped running client code
1016    VG_TRACK( stop_client_code, tid, bbs_done );
1017 
1018    if (bbs_done >= vgdb_next_poll) {
1019       if (VG_(clo_vgdb_poll))
1020          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
1021       else
1022          /* value was changed due to gdbserver invocation via ptrace */
1023          vgdb_next_poll = NO_VGDB_POLL;
1024       if (VG_(gdbserver_activity) (tid))
1025          VG_(gdbserver) (tid);
1026    }
1027 
1028    /* TRC value and possible auxiliary patch-address word are already
1029       in two_words[0] and [1] respectively, as a result of the call to
1030       VG_(run_innerloop). */
1031    /* Stay sane .. */
1032    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1033        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1034       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1035    } else {
1036       vg_assert(two_words[1] == 0); /* nobody messed with it */
1037    }
1038 }
1039 
1040 
1041 /* ---------------------------------------------------------------------
1042    The scheduler proper.
1043    ------------------------------------------------------------------ */
1044 
handle_tt_miss(ThreadId tid)1045 static void handle_tt_miss ( ThreadId tid )
1046 {
1047    Bool found;
1048    Addr ip = VG_(get_IP)(tid);
1049 
1050    /* Trivial event.  Miss in the fast-cache.  Do a full
1051       lookup for it. */
1052    found = VG_(search_transtab)( NULL, NULL, NULL,
1053                                  ip, True/*upd_fast_cache*/ );
1054    if (UNLIKELY(!found)) {
1055       /* Not found; we need to request a translation. */
1056       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1057                           bbs_done, True/*allow redirection*/ )) {
1058          found = VG_(search_transtab)( NULL, NULL, NULL,
1059                                        ip, True );
1060          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1061 
1062       } else {
1063 	 // If VG_(translate)() fails, it's because it had to throw a
1064 	 // signal because the client jumped to a bad address.  That
1065 	 // means that either a signal has been set up for delivery,
1066 	 // or the thread has been marked for termination.  Either
1067 	 // way, we just need to go back into the scheduler loop.
1068       }
1069    }
1070 }
1071 
1072 static
handle_chain_me(ThreadId tid,void * place_to_chain,Bool toFastEP)1073 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1074 {
1075    Bool found          = False;
1076    Addr ip             = VG_(get_IP)(tid);
1077    SECno to_sNo         = INV_SNO;
1078    TTEno to_tteNo       = INV_TTE;
1079 
1080    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1081                                  ip, False/*dont_upd_fast_cache*/ );
1082    if (!found) {
1083       /* Not found; we need to request a translation. */
1084       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1085                           bbs_done, True/*allow redirection*/ )) {
1086          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1087                                        ip, False );
1088          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1089       } else {
1090 	 // If VG_(translate)() fails, it's because it had to throw a
1091 	 // signal because the client jumped to a bad address.  That
1092 	 // means that either a signal has been set up for delivery,
1093 	 // or the thread has been marked for termination.  Either
1094 	 // way, we just need to go back into the scheduler loop.
1095         return;
1096       }
1097    }
1098    vg_assert(found);
1099    vg_assert(to_sNo != INV_SNO);
1100    vg_assert(to_tteNo != INV_TTE);
1101 
1102    /* So, finally we know where to patch through to.  Do the patching
1103       and update the various admin tables that allow it to be undone
1104       in the case that the destination block gets deleted. */
1105    VG_(tt_tc_do_chaining)( place_to_chain,
1106                            to_sNo, to_tteNo, toFastEP );
1107 }
1108 
handle_syscall(ThreadId tid,UInt trc)1109 static void handle_syscall(ThreadId tid, UInt trc)
1110 {
1111    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1112    volatile UWord jumped;
1113 
1114    /* Syscall may or may not block; either way, it will be
1115       complete by the time this call returns, and we'll be
1116       runnable again.  We could take a signal while the
1117       syscall runs. */
1118 
1119    if (VG_(clo_sanity_level) >= 3) {
1120       HChar buf[50];    // large enough
1121       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %u)", tid);
1122       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1123       vg_assert(ok);
1124    }
1125 
1126    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1127 
1128    if (VG_(clo_sanity_level) >= 3) {
1129       HChar buf[50];    // large enough
1130       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %u)", tid);
1131       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1132       vg_assert(ok);
1133    }
1134 
1135    if (!VG_(is_running_thread)(tid))
1136       VG_(printf)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
1137 		  tid, VG_(running_tid), tid, tst->status);
1138    vg_assert(VG_(is_running_thread)(tid));
1139 
1140    if (jumped != (UWord)0) {
1141       block_signals();
1142       VG_(poll_signals)(tid);
1143    }
1144 }
1145 
1146 /* tid just requested a jump to the noredir version of its current
1147    program counter.  So make up that translation if needed, run it,
1148    and return the resulting thread return code in two_words[]. */
1149 static
handle_noredir_jump(HWord * two_words,Int * dispatchCtrP,ThreadId tid)1150 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1151                            /*MOD*/Int*   dispatchCtrP,
1152                            ThreadId tid )
1153 {
1154    /* Clear return area. */
1155    two_words[0] = two_words[1] = 0;
1156 
1157    Addr  hcode = 0;
1158    Addr  ip    = VG_(get_IP)(tid);
1159 
1160    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1161    if (!found) {
1162       /* Not found; we need to request a translation. */
1163       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1164                           False/*NO REDIRECTION*/ )) {
1165 
1166          found = VG_(search_unredir_transtab)( &hcode, ip );
1167          vg_assert2(found, "unredir translation missing after creation?!");
1168       } else {
1169 	 // If VG_(translate)() fails, it's because it had to throw a
1170 	 // signal because the client jumped to a bad address.  That
1171 	 // means that either a signal has been set up for delivery,
1172 	 // or the thread has been marked for termination.  Either
1173 	 // way, we just need to go back into the scheduler loop.
1174          two_words[0] = VG_TRC_BORING;
1175          return;
1176       }
1177 
1178    }
1179 
1180    vg_assert(found);
1181    vg_assert(hcode != 0);
1182 
1183    /* Otherwise run it and return the resulting VG_TRC_* value. */
1184    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1185    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1186                            hcode, True/*use hcode*/ );
1187 }
1188 
1189 
1190 /*
1191    Run a thread until it wants to exit.
1192 
1193    We assume that the caller has already called VG_(acquire_BigLock) for
1194    us, so we own the VCPU.  Also, all signals are blocked.
1195  */
VG_(scheduler)1196 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1197 {
1198    /* Holds the remaining size of this thread's "timeslice". */
1199    Int dispatch_ctr = 0;
1200 
1201    ThreadState *tst = VG_(get_ThreadState)(tid);
1202    static Bool vgdb_startup_action_done = False;
1203 
1204    if (VG_(clo_trace_sched))
1205       print_sched_event(tid, "entering VG_(scheduler)");
1206 
1207    /* Do vgdb initialization (but once). Only the first (main) task
1208       starting up will do the below.
1209       Initialize gdbserver earlier than at the first
1210       thread VG_(scheduler) is causing problems:
1211       * at the end of VG_(scheduler_init_phase2) :
1212         The main thread is in VgTs_Init state, but in a not yet
1213         consistent state => the thread cannot be reported to gdb
1214         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1215         back the guest registers to gdb).
1216       * at end of valgrind_main, just
1217         before VG_(main_thread_wrapper_NORETURN)(1) :
1218         The main thread is still in VgTs_Init state but in a
1219         more advanced state. However, the thread state is not yet
1220         completely initialized : a.o., the os_state is not yet fully
1221         set => the thread is then not properly reported to gdb,
1222         which is then confused (causing e.g. a duplicate thread be
1223         shown, without thread id).
1224       * it would be possible to initialize gdbserver "lower" in the
1225         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1226         these are platform dependent and the place at which
1227         the thread state is completely initialized is not
1228         specific anymore to the main thread (so a similar "do it only
1229         once" would be needed).
1230 
1231         => a "once only" initialization here is the best compromise. */
1232    if (!vgdb_startup_action_done) {
1233       vg_assert(tid == 1); // it must be the main thread.
1234       vgdb_startup_action_done = True;
1235       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1236          /* If we have to poll, ensures we do an initial poll at first
1237             scheduler call. Otherwise, ensure no poll (unless interrupted
1238             by ptrace). */
1239          if (VG_(clo_vgdb_poll))
1240             VG_(force_vgdb_poll) ();
1241          else
1242             VG_(disable_vgdb_poll) ();
1243 
1244          vg_assert (VG_(dyn_vgdb_error) == VG_(clo_vgdb_error));
1245          /* As we are initializing, VG_(dyn_vgdb_error) can't have been
1246             changed yet. */
1247 
1248          VG_(gdbserver_prerun_action) (1);
1249       } else {
1250          VG_(disable_vgdb_poll) ();
1251       }
1252    }
1253 
1254    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1255        && tid != 1) {
1256       /* We disable the stack cache the first time we see a thread other
1257          than the main thread appearing. At this moment, we are sure the pthread
1258          lib loading is done/variable was initialised by pthread lib/... */
1259       if (VG_(client__stack_cache_actsize__addr)) {
1260          if (*VG_(client__stack_cache_actsize__addr) == 0) {
1261             VG_(debugLog)(1,"sched",
1262                           "pthread stack cache size disable done"
1263                           " via kludge\n");
1264             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1265             /* Set a value big enough to be above the hardcoded maximum stack
1266                cache size in glibc, small enough to allow a pthread stack size
1267                to be added without risk of overflow. */
1268          }
1269       } else {
1270           VG_(debugLog)(0,"sched",
1271                         "WARNING: pthread stack cache cannot be disabled!\n");
1272           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1273           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1274              to avoid having a msg for all following threads. */
1275       }
1276    }
1277 
1278    /* set the proper running signal mask */
1279    block_signals();
1280 
1281    vg_assert(VG_(is_running_thread)(tid));
1282 
1283    dispatch_ctr = SCHEDULING_QUANTUM;
1284 
1285    while (!VG_(is_exiting)(tid)) {
1286 
1287       vg_assert(dispatch_ctr >= 0);
1288       if (dispatch_ctr == 0) {
1289 
1290 	 /* Our slice is done, so yield the CPU to another thread.  On
1291             Linux, this doesn't sleep between sleeping and running,
1292             since that would take too much time. */
1293 
1294 	 /* 4 July 06: it seems that a zero-length nsleep is needed to
1295             cause async thread cancellation (canceller.c) to terminate
1296             in finite time; else it is in some kind of race/starvation
1297             situation and completion is arbitrarily delayed (although
1298             this is not a deadlock).
1299 
1300             Unfortunately these sleeps cause MPI jobs not to terminate
1301             sometimes (some kind of livelock).  So sleeping once
1302             every N opportunities appears to work. */
1303 
1304 	 /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1305             sys_yield also helps the problem, whilst not crashing apps. */
1306 
1307 	 VG_(release_BigLock)(tid, VgTs_Yielding,
1308                                    "VG_(scheduler):timeslice");
1309 	 /* ------------ now we don't have The Lock ------------ */
1310 
1311 	 VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1312 	 /* ------------ now we do have The Lock ------------ */
1313 
1314 	 /* OK, do some relatively expensive housekeeping stuff */
1315 	 scheduler_sanity(tid);
1316 	 VG_(sanity_check_general)(False);
1317 
1318 	 /* Look for any pending signals for this thread, and set them up
1319 	    for delivery */
1320 	 VG_(poll_signals)(tid);
1321 
1322 	 if (VG_(is_exiting)(tid))
1323 	    break;		/* poll_signals picked up a fatal signal */
1324 
1325 	 /* For stats purposes only. */
1326 	 n_scheduling_events_MAJOR++;
1327 
1328 	 /* Figure out how many bbs to ask vg_run_innerloop to do. */
1329          dispatch_ctr = SCHEDULING_QUANTUM;
1330 
1331 	 /* paranoia ... */
1332 	 vg_assert(tst->tid == tid);
1333 	 vg_assert(tst->os_state.lwpid == VG_(gettid)());
1334       }
1335 
1336       /* For stats purposes only. */
1337       n_scheduling_events_MINOR++;
1338 
1339       if (0)
1340          VG_(message)(Vg_DebugMsg, "thread %u: running for %d bbs\n",
1341                                    tid, dispatch_ctr - 1 );
1342 
1343       HWord trc[2]; /* "two_words" */
1344       run_thread_for_a_while( &trc[0],
1345                               &dispatch_ctr,
1346                               tid, 0/*ignored*/, False );
1347 
1348       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1349          const HChar *name = name_of_sched_event(trc[0]);
1350          HChar buf[VG_(strlen)(name) + 10];    // large enough
1351 	 VG_(sprintf)(buf, "TRC: %s", name);
1352 	 print_sched_event(tid, buf);
1353       }
1354 
1355       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1356          /* If we got a request to run a no-redir version of
1357             something, do so now -- handle_noredir_jump just (creates
1358             and) runs that one translation.  The flip side is that the
1359             noredir translation can't itself return another noredir
1360             request -- that would be nonsensical.  It can, however,
1361             return VG_TRC_BORING, which just means keep going as
1362             normal. */
1363          /* Note that the fact that we need to continue with a
1364             no-redir jump is not recorded anywhere else in this
1365             thread's state.  So we *must* execute the block right now
1366             -- we can't fail to execute it and later resume with it,
1367             because by then we'll have forgotten the fact that it
1368             should be run as no-redir, but will get run as a normal
1369             potentially-redir'd, hence screwing up.  This really ought
1370             to be cleaned up, by noting in the guest state that the
1371             next block to be executed should be no-redir.  Then we can
1372             suspend and resume at any point, which isn't the case at
1373             the moment. */
1374          /* We can't enter a no-redir translation with the dispatch
1375             ctr set to zero, for the reasons commented just above --
1376             we need to force it to execute right now.  So, if the
1377             dispatch ctr is zero, set it to one.  Note that this would
1378             have the bad side effect of holding the Big Lock arbitrary
1379             long should there be an arbitrarily long sequence of
1380             back-to-back no-redir translations to run.  But we assert
1381             just below that this translation cannot request another
1382             no-redir jump, so we should be safe against that. */
1383          if (dispatch_ctr == 0) {
1384             dispatch_ctr = 1;
1385          }
1386          handle_noredir_jump( &trc[0],
1387                               &dispatch_ctr,
1388                               tid );
1389          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1390 
1391          /* This can't be allowed to happen, since it means the block
1392             didn't execute, and we have no way to resume-as-noredir
1393             after we get more timeslice.  But I don't think it ever
1394             can, since handle_noredir_jump will assert if the counter
1395             is zero on entry. */
1396          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1397          /* This asserts the same thing. */
1398          vg_assert(dispatch_ctr >= 0);
1399 
1400          /* A no-redir translation can't return with a chain-me
1401             request, since chaining in the no-redir cache is too
1402             complex. */
1403          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1404                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1405       }
1406 
1407       switch (trc[0]) {
1408       case VEX_TRC_JMP_BORING:
1409          /* assisted dispatch, no event.  Used by no-redir
1410             translations to force return to the scheduler. */
1411       case VG_TRC_BORING:
1412          /* no special event, just keep going. */
1413          break;
1414 
1415       case VG_TRC_INNER_FASTMISS:
1416 	 vg_assert(dispatch_ctr >= 0);
1417 	 handle_tt_miss(tid);
1418 	 break;
1419 
1420       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1421          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1422          handle_chain_me(tid, (void*)trc[1], False);
1423          break;
1424       }
1425 
1426       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1427          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1428          handle_chain_me(tid, (void*)trc[1], True);
1429          break;
1430       }
1431 
1432       case VEX_TRC_JMP_CLIENTREQ:
1433 	 do_client_request(tid);
1434 	 break;
1435 
1436       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1437       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1438       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1439       case VEX_TRC_JMP_SYS_INT145:  /* x86-solaris */
1440       case VEX_TRC_JMP_SYS_INT210:  /* x86-solaris */
1441       /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
1442       case VEX_TRC_JMP_SYS_SYSCALL:
1443 	 handle_syscall(tid, trc[0]);
1444 	 if (VG_(clo_sanity_level) > 2)
1445 	    VG_(sanity_check_general)(True); /* sanity-check every syscall */
1446 	 break;
1447 
1448       case VEX_TRC_JMP_YIELD:
1449 	 /* Explicit yield, because this thread is in a spin-lock
1450 	    or something.  Only let the thread run for a short while
1451             longer.  Because swapping to another thread is expensive,
1452             we're prepared to let this thread eat a little more CPU
1453             before swapping to another.  That means that short term
1454             spins waiting for hardware to poke memory won't cause a
1455             thread swap. */
1456          if (dispatch_ctr > 300)
1457             dispatch_ctr = 300;
1458 	 break;
1459 
1460       case VG_TRC_INNER_COUNTERZERO:
1461 	 /* Timeslice is out.  Let a new thread be scheduled. */
1462 	 vg_assert(dispatch_ctr == 0);
1463 	 break;
1464 
1465       case VG_TRC_FAULT_SIGNAL:
1466 	 /* Everything should be set up (either we're exiting, or
1467 	    about to start in a signal handler). */
1468 	 break;
1469 
1470       case VEX_TRC_JMP_MAPFAIL:
1471          /* Failure of arch-specific address translation (x86/amd64
1472             segment override use) */
1473          /* jrs 2005 03 11: is this correct? */
1474          VG_(synth_fault)(tid);
1475          break;
1476 
1477       case VEX_TRC_JMP_EMWARN: {
1478          static Int  counts[EmNote_NUMBER];
1479          static Bool counts_initted = False;
1480          VexEmNote ew;
1481          const HChar* what;
1482          Bool      show;
1483          Int       q;
1484          if (!counts_initted) {
1485             counts_initted = True;
1486             for (q = 0; q < EmNote_NUMBER; q++)
1487                counts[q] = 0;
1488          }
1489          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1490          what = (ew < 0 || ew >= EmNote_NUMBER)
1491                    ? "unknown (?!)"
1492                    : LibVEX_EmNote_string(ew);
1493          show = (ew < 0 || ew >= EmNote_NUMBER)
1494                    ? True
1495                    : counts[ew]++ < 3;
1496          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1497             VG_(message)( Vg_UserMsg,
1498                           "Emulation warning: unsupported action:\n");
1499             VG_(message)( Vg_UserMsg, "  %s\n", what);
1500             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1501          }
1502          break;
1503       }
1504 
1505       case VEX_TRC_JMP_EMFAIL: {
1506          VexEmNote ew;
1507          const HChar* what;
1508          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1509          what = (ew < 0 || ew >= EmNote_NUMBER)
1510                    ? "unknown (?!)"
1511                    : LibVEX_EmNote_string(ew);
1512          VG_(message)( Vg_UserMsg,
1513                        "Emulation fatal error -- Valgrind cannot continue:\n");
1514          VG_(message)( Vg_UserMsg, "  %s\n", what);
1515          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1516          VG_(message)(Vg_UserMsg, "\n");
1517          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1518          VG_(message)(Vg_UserMsg, "\n");
1519          VG_(exit)(1);
1520          break;
1521       }
1522 
1523       case VEX_TRC_JMP_SIGILL:
1524          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1525          break;
1526 
1527       case VEX_TRC_JMP_SIGTRAP:
1528          VG_(synth_sigtrap)(tid);
1529          break;
1530 
1531       case VEX_TRC_JMP_SIGSEGV:
1532          VG_(synth_fault)(tid);
1533          break;
1534 
1535       case VEX_TRC_JMP_SIGBUS:
1536          VG_(synth_sigbus)(tid);
1537          break;
1538 
1539       case VEX_TRC_JMP_SIGFPE_INTDIV:
1540          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1541          break;
1542 
1543       case VEX_TRC_JMP_SIGFPE_INTOVF:
1544          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1545          break;
1546 
1547       case VEX_TRC_JMP_NODECODE: {
1548          Addr addr = VG_(get_IP)(tid);
1549 
1550          if (VG_(clo_sigill_diag)) {
1551             VG_(umsg)(
1552                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1553             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1554 #        define M(a) VG_(umsg)(a "\n");
1555          M("Your program just tried to execute an instruction that Valgrind" );
1556          M("did not recognise.  There are two possible reasons for this."    );
1557          M("1. Your program has a bug and erroneously jumped to a non-code"  );
1558          M("   location.  If you are running Memcheck and you just saw a"    );
1559          M("   warning about a bad jump, it's probably your program's fault.");
1560          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1561          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1562          M("   you are not sure, please let us know and we'll try to fix it.");
1563          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1564          M("probably kill your program."                                     );
1565 #        undef M
1566          }
1567 #        if defined(VGA_s390x)
1568          /* Now that the complaint is out we need to adjust the guest_IA. The
1569             reason is that -- after raising the exception -- execution will
1570             continue with the insn that follows the invalid insn. As the first
1571             2 bits of the invalid insn determine its length in the usual way,
1572             we can compute the address of the next insn here and adjust the
1573             guest_IA accordingly. This adjustment is essential and tested by
1574             none/tests/s390x/op_exception.c (which would loop forever
1575             otherwise) */
1576          UChar byte = ((UChar *)addr)[0];
1577          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1578          Addr  next_insn_addr = addr + insn_length;
1579          VG_(set_IP)(tid, next_insn_addr);
1580 #        endif
1581          VG_(synth_sigill)(tid, addr);
1582          break;
1583       }
1584 
1585       case VEX_TRC_JMP_INVALICACHE:
1586          VG_(discard_translations)(
1587             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1588             VG_(threads)[tid].arch.vex.guest_CMLEN,
1589             "scheduler(VEX_TRC_JMP_INVALICACHE)"
1590          );
1591          if (0)
1592             VG_(printf)("dump translations done.\n");
1593          break;
1594 
1595       case VEX_TRC_JMP_FLUSHDCACHE: {
1596          void* start = (void*)VG_(threads)[tid].arch.vex.guest_CMSTART;
1597          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
1598          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1599          VG_(flush_dcache)(start, len);
1600          break;
1601       }
1602 
1603       case VG_TRC_INVARIANT_FAILED:
1604          /* This typically happens if, after running generated code,
1605             it is detected that host CPU settings (eg, FPU/Vector
1606             control words) are not as they should be.  Vex's code
1607             generation specifies the state such control words should
1608             be in on entry to Vex-generated code, and they should be
1609             unchanged on exit from it.  Failure of this assertion
1610             usually means a bug in Vex's code generation. */
1611          //{ UInt xx;
1612          //  __asm__ __volatile__ (
1613          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1614          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1615          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1616          //}
1617          vg_assert2(0, "VG_(scheduler), phase 3: "
1618                        "run_innerloop detected host "
1619                        "state invariant failure", trc);
1620 
1621       case VEX_TRC_JMP_SYS_SYSENTER:
1622          /* Do whatever simulation is appropriate for an x86 sysenter
1623             instruction.  Note that it is critical to set this thread's
1624             guest_EIP to point at the code to execute after the
1625             sysenter, since Vex-generated code will not have set it --
1626             vex does not know what it should be.  Vex sets the next
1627             address to zero, so if you don't set guest_EIP, the thread
1628             will jump to zero afterwards and probably die as a result. */
1629 #        if defined(VGP_x86_linux)
1630          vg_assert2(0, "VG_(scheduler), phase 3: "
1631                        "sysenter_x86 on x86-linux is not supported");
1632 #        elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
1633          /* return address in client edx */
1634          VG_(threads)[tid].arch.vex.guest_EIP
1635             = VG_(threads)[tid].arch.vex.guest_EDX;
1636          handle_syscall(tid, trc[0]);
1637 #        else
1638          vg_assert2(0, "VG_(scheduler), phase 3: "
1639                        "sysenter_x86 on non-x86 platform?!?!");
1640 #        endif
1641          break;
1642 
1643       default:
1644 	 vg_assert2(0, "VG_(scheduler), phase 3: "
1645                        "unexpected thread return code (%u)", trc[0]);
1646 	 /* NOTREACHED */
1647 	 break;
1648 
1649       } /* switch (trc) */
1650 
1651       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1652          maybe_show_sb_profile();
1653    }
1654 
1655    if (VG_(clo_trace_sched))
1656       print_sched_event(tid, "exiting VG_(scheduler)");
1657 
1658    vg_assert(VG_(is_exiting)(tid));
1659 
1660    return tst->exitreason;
1661 }
1662 
1663 
VG_(nuke_all_threads_except)1664 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1665 {
1666    ThreadId tid;
1667 
1668    vg_assert(VG_(is_running_thread)(me));
1669 
1670    for (tid = 1; tid < VG_N_THREADS; tid++) {
1671       if (tid == me
1672           || VG_(threads)[tid].status == VgTs_Empty)
1673          continue;
1674       if (0)
1675          VG_(printf)(
1676             "VG_(nuke_all_threads_except): nuking tid %u\n", tid);
1677 
1678       VG_(threads)[tid].exitreason = src;
1679       if (src == VgSrc_FatalSig)
1680          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1681       VG_(get_thread_out_of_syscall)(tid);
1682    }
1683 }
1684 
1685 
1686 /* ---------------------------------------------------------------------
1687    Specifying shadow register values
1688    ------------------------------------------------------------------ */
1689 
1690 #if defined(VGA_x86)
1691 #  define VG_CLREQ_ARGS       guest_EAX
1692 #  define VG_CLREQ_RET        guest_EDX
1693 #elif defined(VGA_amd64)
1694 #  define VG_CLREQ_ARGS       guest_RAX
1695 #  define VG_CLREQ_RET        guest_RDX
1696 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1697 #  define VG_CLREQ_ARGS       guest_GPR4
1698 #  define VG_CLREQ_RET        guest_GPR3
1699 #elif defined(VGA_arm)
1700 #  define VG_CLREQ_ARGS       guest_R4
1701 #  define VG_CLREQ_RET        guest_R3
1702 #elif defined(VGA_arm64)
1703 #  define VG_CLREQ_ARGS       guest_X4
1704 #  define VG_CLREQ_RET        guest_X3
1705 #elif defined (VGA_s390x)
1706 #  define VG_CLREQ_ARGS       guest_r2
1707 #  define VG_CLREQ_RET        guest_r3
1708 #elif defined(VGA_mips32) || defined(VGA_mips64)
1709 #  define VG_CLREQ_ARGS       guest_r12
1710 #  define VG_CLREQ_RET        guest_r11
1711 #else
1712 #  error Unknown arch
1713 #endif
1714 
1715 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1716 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1717 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1718 
1719 // These macros write a value to a client's thread register, and tell the
1720 // tool that it's happened (if necessary).
1721 
1722 #define SET_CLREQ_RETVAL(zztid, zzval) \
1723    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1724         VG_TRACK( post_reg_write, \
1725                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1726    } while (0)
1727 
1728 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1729    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1730         VG_TRACK( post_reg_write_clientcall_return, \
1731                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1732    } while (0)
1733 
1734 
1735 /* ---------------------------------------------------------------------
1736    Handle client requests.
1737    ------------------------------------------------------------------ */
1738 
1739 // OS-specific(?) client requests
os_client_request(ThreadId tid,UWord * args)1740 static Bool os_client_request(ThreadId tid, UWord *args)
1741 {
1742    Bool handled = True;
1743 
1744    vg_assert(VG_(is_running_thread)(tid));
1745 
1746    switch(args[0]) {
1747    case VG_USERREQ__FREERES_DONE:
1748       /* This is equivalent to an exit() syscall, but we don't set the
1749 	 exitcode (since it might already be set) */
1750       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1751          VG_(message)(Vg_DebugMsg,
1752                       "__gnu_cxx::__freeres() and __libc_freeres() wrapper "
1753                       "done; really quitting!\n");
1754       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1755       break;
1756 
1757    default:
1758       handled = False;
1759       break;
1760    }
1761 
1762    return handled;
1763 }
1764 
1765 
1766 /* Write out a client message, possibly including a back trace. Return
1767    the number of characters written. In case of XML output, the format
1768    string as well as any arguments it requires will be XML'ified.
1769    I.e. special characters such as the angle brackets will be translated
1770    into proper escape sequences. */
1771 static
print_client_message(ThreadId tid,const HChar * format,va_list * vargsp,Bool include_backtrace)1772 Int print_client_message( ThreadId tid, const HChar *format,
1773                           va_list *vargsp, Bool include_backtrace)
1774 {
1775    Int count;
1776 
1777    if (VG_(clo_xml)) {
1778       /* Translate the format string as follows:
1779          <  -->  &lt;
1780          >  -->  &gt;
1781          &  -->  &amp;
1782          %s -->  %pS
1783          Yes, yes, it's simplified but in synch with
1784          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1785       */
1786 
1787       /* Allocate a buffer that is for sure large enough. */
1788       HChar xml_format[VG_(strlen)(format) * 5 + 1];
1789 
1790       const HChar *p;
1791       HChar *q = xml_format;
1792 
1793       for (p = format; *p; ++p) {
1794          switch (*p) {
1795          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
1796          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
1797          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
1798          case '%':
1799             /* Careful: make sure %%s stays %%s */
1800             *q++ = *p++;
1801             if (*p == 's') {
1802               *q++ = 'p';
1803               *q++ = 'S';
1804             } else {
1805               *q++ = *p;
1806             }
1807             break;
1808 
1809          default:
1810             *q++ = *p;
1811             break;
1812          }
1813       }
1814       *q = '\0';
1815 
1816       VG_(printf_xml)( "<clientmsg>\n" );
1817       VG_(printf_xml)( "  <tid>%u</tid>\n", tid );
1818       const ThreadState *tst = VG_(get_ThreadState)(tid);
1819       if (tst->thread_name)
1820          VG_(printf_xml)("  <threadname>%s</threadname>\n", tst->thread_name);
1821       VG_(printf_xml)( "  <text>" );
1822       count = VG_(vprintf_xml)( xml_format, *vargsp );
1823       VG_(printf_xml)( "  </text>\n" );
1824    } else {
1825       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1826       VG_(message_flush)();
1827    }
1828 
1829    if (include_backtrace)
1830       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1831 
1832    if (VG_(clo_xml))
1833       VG_(printf_xml)( "</clientmsg>\n" );
1834 
1835    return count;
1836 }
1837 
1838 
1839 /* Do a client request for the thread tid.  After the request, tid may
1840    or may not still be runnable; if not, the scheduler will have to
1841    choose a new thread to run.
1842 */
1843 static
do_client_request(ThreadId tid)1844 void do_client_request ( ThreadId tid )
1845 {
1846    UWord* arg = (UWord*)(CLREQ_ARGS(VG_(threads)[tid].arch));
1847    UWord req_no = arg[0];
1848 
1849    if (0)
1850       VG_(printf)("req no = 0x%lx, arg = %p\n", req_no, arg);
1851    switch (req_no) {
1852 
1853       case VG_USERREQ__CLIENT_CALL0: {
1854          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1855 	 if (f == NULL)
1856 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1857 	 else
1858 	    SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1859          break;
1860       }
1861       case VG_USERREQ__CLIENT_CALL1: {
1862          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1863 	 if (f == NULL)
1864 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1865 	 else
1866 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1867          break;
1868       }
1869       case VG_USERREQ__CLIENT_CALL2: {
1870          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1871 	 if (f == NULL)
1872 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1873 	 else
1874 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1875          break;
1876       }
1877       case VG_USERREQ__CLIENT_CALL3: {
1878          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1879 	 if (f == NULL)
1880 	    VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1881 	 else
1882 	    SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1883          break;
1884       }
1885 
1886       // Nb: this looks like a circular definition, because it kind of is.
1887       // See comment in valgrind.h to understand what's going on.
1888       case VG_USERREQ__RUNNING_ON_VALGRIND:
1889          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1890          break;
1891 
1892       case VG_USERREQ__PRINTF: {
1893          const HChar* format = (HChar *)arg[1];
1894          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1895             _VALIST_BY_REF version instead */
1896          if (sizeof(va_list) != sizeof(UWord))
1897             goto va_list_casting_error_NORETURN;
1898          union {
1899             va_list vargs;
1900             unsigned long uw;
1901          } u;
1902          u.uw = (unsigned long)arg[2];
1903          Int count =
1904             print_client_message( tid, format, &u.vargs,
1905                                   /* include_backtrace */ False );
1906          SET_CLREQ_RETVAL( tid, count );
1907          break;
1908       }
1909 
1910       case VG_USERREQ__PRINTF_BACKTRACE: {
1911          const HChar* format = (HChar *)arg[1];
1912          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1913             _VALIST_BY_REF version instead */
1914          if (sizeof(va_list) != sizeof(UWord))
1915             goto va_list_casting_error_NORETURN;
1916          union {
1917             va_list vargs;
1918             unsigned long uw;
1919          } u;
1920          u.uw = (unsigned long)arg[2];
1921          Int count =
1922             print_client_message( tid, format, &u.vargs,
1923                                   /* include_backtrace */ True );
1924          SET_CLREQ_RETVAL( tid, count );
1925          break;
1926       }
1927 
1928       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
1929          const HChar* format = (HChar *)arg[1];
1930          va_list* vargsp = (va_list*)arg[2];
1931          Int count =
1932             print_client_message( tid, format, vargsp,
1933                                   /* include_backtrace */ False );
1934 
1935          SET_CLREQ_RETVAL( tid, count );
1936          break;
1937       }
1938 
1939       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
1940          const HChar* format = (HChar *)arg[1];
1941          va_list* vargsp = (va_list*)arg[2];
1942          Int count =
1943             print_client_message( tid, format, vargsp,
1944                                   /* include_backtrace */ True );
1945          SET_CLREQ_RETVAL( tid, count );
1946          break;
1947       }
1948 
1949       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
1950          va_list* vargsp = (va_list*)arg[2];
1951          Int count =
1952             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
1953          VG_(message_flush)();
1954          SET_CLREQ_RETVAL( tid, count );
1955          break;
1956       }
1957 
1958       case VG_USERREQ__ADD_IFUNC_TARGET: {
1959          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
1960          SET_CLREQ_RETVAL( tid, 0);
1961          break; }
1962 
1963       case VG_USERREQ__STACK_REGISTER: {
1964          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
1965          SET_CLREQ_RETVAL( tid, sid );
1966          break; }
1967 
1968       case VG_USERREQ__STACK_DEREGISTER: {
1969          VG_(deregister_stack)(arg[1]);
1970          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1971          break; }
1972 
1973       case VG_USERREQ__STACK_CHANGE: {
1974          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
1975          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1976          break; }
1977 
1978       case VG_USERREQ__GET_MALLOCFUNCS: {
1979 	 struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
1980 
1981 	 info->tl_malloc               = VG_(tdict).tool_malloc;
1982 	 info->tl_calloc               = VG_(tdict).tool_calloc;
1983 	 info->tl_realloc              = VG_(tdict).tool_realloc;
1984 	 info->tl_memalign             = VG_(tdict).tool_memalign;
1985 	 info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
1986 	 info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
1987 	 info->tl_free                 = VG_(tdict).tool_free;
1988 	 info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
1989 	 info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
1990          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
1991 
1992 	 info->mallinfo                = VG_(mallinfo);
1993 	 info->clo_trace_malloc        = VG_(clo_trace_malloc);
1994 
1995          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
1996 
1997 	 break;
1998       }
1999 
2000       /* Requests from the client program */
2001 
2002       case VG_USERREQ__DISCARD_TRANSLATIONS:
2003          if (VG_(clo_verbosity) > 2)
2004             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
2005                          " addr %p,  len %lu\n",
2006                          (void*)arg[1], arg[2] );
2007 
2008          VG_(discard_translations)(
2009             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
2010          );
2011 
2012          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2013 	 break;
2014 
2015       case VG_USERREQ__INNER_THREADS:
2016          if (VG_(clo_verbosity) > 2)
2017             VG_(printf)( "client request: INNER_THREADS,"
2018                          " addr %p\n",
2019                          (void*)arg[1] );
2020          VG_(inner_threads) = (ThreadState*)arg[1];
2021          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2022 	 break;
2023 
2024       case VG_USERREQ__COUNT_ERRORS:
2025          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
2026          break;
2027 
2028       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
2029          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
2030          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2031          break;
2032 
2033       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
2034          Addr   ip    = arg[1];
2035          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
2036          const HChar *buf;  // points to a string of unknown size
2037 
2038          VG_(memset)(buf64, 0, 64);
2039          UInt linenum = 0;
2040          Bool ok = VG_(get_filename_linenum)(
2041                       ip, &buf, NULL, &linenum
2042                    );
2043          if (ok) {
2044             /* For backward compatibility truncate the filename to
2045                49 characters. */
2046             VG_(strncpy)(buf64, buf, 50);
2047             buf64[49] = '\0';
2048             UInt i;
2049             for (i = 0; i < 50; i++) {
2050                if (buf64[i] == 0)
2051                   break;
2052             }
2053             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
2054          } else {
2055             buf64[0] = 0;
2056          }
2057 
2058          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2059          break;
2060       }
2061 
2062       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2063          Word delta = arg[1];
2064          vg_assert(delta == 1 || delta == -1);
2065          ThreadState* tst = VG_(get_ThreadState)(tid);
2066          vg_assert(tst);
2067          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2068             tst->err_disablement_level++;
2069          }
2070          else
2071          if (delta == -1 && tst->err_disablement_level > 0) {
2072             tst->err_disablement_level--;
2073          }
2074          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2075          break;
2076       }
2077 
2078       case VG_USERREQ__GDB_MONITOR_COMMAND: {
2079          UWord ret;
2080          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2081          SET_CLREQ_RETVAL(tid, ret);
2082          break;
2083       }
2084 
2085       case VG_USERREQ__MALLOCLIKE_BLOCK:
2086       case VG_USERREQ__RESIZEINPLACE_BLOCK:
2087       case VG_USERREQ__FREELIKE_BLOCK:
2088          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
2089          if (!arg[1]) {
2090             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2091             break;
2092          } else {
2093             goto my_default;
2094          }
2095 
2096       case VG_USERREQ__VEX_INIT_FOR_IRI:
2097          LibVEX_InitIRI ( (IRICB *)arg[1] );
2098          break;
2099 
2100       default:
2101        my_default:
2102 	 if (os_client_request(tid, arg)) {
2103 	    // do nothing, os_client_request() handled it
2104          } else if (VG_(needs).client_requests) {
2105 	    UWord ret;
2106 
2107             if (VG_(clo_verbosity) > 2)
2108                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
2109                            arg[0], (void*)arg[1], arg[2] );
2110 
2111 	    if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2112 	       SET_CLREQ_RETVAL(tid, ret);
2113          } else {
2114 	    static Bool whined = False;
2115 
2116 	    if (!whined && VG_(clo_verbosity) > 2) {
2117                // Allow for requests in core, but defined by tools, which
2118                // have 0 and 0 in their two high bytes.
2119                HChar c1 = (arg[0] >> 24) & 0xff;
2120                HChar c2 = (arg[0] >> 16) & 0xff;
2121                if (c1 == 0) c1 = '_';
2122                if (c2 == 0) c2 = '_';
2123 	       VG_(message)(Vg_UserMsg, "Warning:\n"
2124                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
2125 		   "  VG_(needs).client_requests should be set?\n",
2126 			    arg[0], c1, c2, arg[0] & 0xffff);
2127 	       whined = True;
2128 	    }
2129          }
2130          break;
2131    }
2132    return;
2133 
2134    /*NOTREACHED*/
2135   va_list_casting_error_NORETURN:
2136    VG_(umsg)(
2137       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2138       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2139       "on a platform where they cannot be supported.  Please use the\n"
2140       "equivalent _VALIST_BY_REF versions instead.\n"
2141       "\n"
2142       "This is a binary-incompatible change in Valgrind's client request\n"
2143       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
2144       "are expected to almost never see this message.  The only case in\n"
2145       "which you might see this message is if your code uses the macros\n"
2146       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
2147       "to recompile such code, using the header files from this version of\n"
2148       "Valgrind, and not any previous version.\n"
2149       "\n"
2150       "If you see this mesage in any other circumstances, it is probably\n"
2151       "a bug in Valgrind.  In this case, please file a bug report at\n"
2152       "\n"
2153       "   http://www.valgrind.org/support/bug_reports.html\n"
2154       "\n"
2155       "Will now abort.\n"
2156    );
2157    vg_assert(0);
2158 }
2159 
2160 
2161 /* ---------------------------------------------------------------------
2162    Sanity checking (permanently engaged)
2163    ------------------------------------------------------------------ */
2164 
2165 /* Internal consistency checks on the sched structures. */
2166 static
scheduler_sanity(ThreadId tid)2167 void scheduler_sanity ( ThreadId tid )
2168 {
2169    Bool bad = False;
2170    Int lwpid = VG_(gettid)();
2171 
2172    if (!VG_(is_running_thread)(tid)) {
2173       VG_(message)(Vg_DebugMsg,
2174 		   "Thread %u is supposed to be running, "
2175                    "but doesn't own the_BigLock (owned by %u)\n",
2176 		   tid, VG_(running_tid));
2177       bad = True;
2178    }
2179 
2180    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2181       VG_(message)(Vg_DebugMsg,
2182                    "Thread %u supposed to be in LWP %d, but we're actually %d\n",
2183                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2184       bad = True;
2185    }
2186 
2187    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2188       VG_(message)(Vg_DebugMsg,
2189                    "Thread (LWPID) %u doesn't own the_BigLock\n",
2190                    tid);
2191       bad = True;
2192    }
2193 
2194    if (0) {
2195       /* Periodically show the state of all threads, for debugging
2196          purposes. */
2197       static UInt lasttime = 0;
2198       UInt now;
2199       now = VG_(read_millisecond_timer)();
2200       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2201          lasttime = now;
2202          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2203                      (Int)now);
2204          VG_(show_sched_status)(True,  // host_stacktrace
2205                                 True,  // stack_usage
2206                                 True); // exited_threads);
2207       }
2208    }
2209 
2210    /* core_panic also shows the sched status, which is why we don't
2211       show it above if bad==True. */
2212    if (bad)
2213       VG_(core_panic)("scheduler_sanity: failed");
2214 }
2215 
VG_(sanity_check_general)2216 void VG_(sanity_check_general) ( Bool force_expensive )
2217 {
2218    ThreadId tid;
2219 
2220    static UInt next_slow_check_at = 1;
2221    static UInt slow_check_interval = 25;
2222 
2223    if (VG_(clo_sanity_level) < 1) return;
2224 
2225    /* --- First do all the tests that we can do quickly. ---*/
2226 
2227    sanity_fast_count++;
2228 
2229    /* Check stuff pertaining to the memory check system. */
2230 
2231    /* Check that nobody has spuriously claimed that the first or
2232       last 16 pages of memory have become accessible [...] */
2233    if (VG_(needs).sanity_checks) {
2234       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2235    }
2236 
2237    /* --- Now some more expensive checks. ---*/
2238 
2239    /* Once every now and again, check some more expensive stuff.
2240       Gradually increase the interval between such checks so as not to
2241       burden long-running programs too much. */
2242    if ( force_expensive
2243         || VG_(clo_sanity_level) > 1
2244         || (VG_(clo_sanity_level) == 1
2245             && sanity_fast_count == next_slow_check_at)) {
2246 
2247       if (0) VG_(printf)("SLOW at %u\n", sanity_fast_count-1);
2248 
2249       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2250       slow_check_interval++;
2251       sanity_slow_count++;
2252 
2253       if (VG_(needs).sanity_checks) {
2254           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2255       }
2256 
2257       /* Look for stack overruns.  Visit all threads. */
2258       for (tid = 1; tid < VG_N_THREADS; tid++) {
2259 	 SizeT    remains;
2260          VgStack* stack;
2261 
2262 	 if (VG_(threads)[tid].status == VgTs_Empty ||
2263 	     VG_(threads)[tid].status == VgTs_Zombie)
2264 	    continue;
2265 
2266          stack
2267             = (VgStack*)
2268               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2269          SizeT limit
2270             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2271 	 remains
2272             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2273 	 if (remains < limit)
2274 	    VG_(message)(Vg_DebugMsg,
2275                          "WARNING: Thread %u is within %lu bytes "
2276                          "of running out of valgrind stack!\n"
2277                          "Valgrind stack size can be increased "
2278                          "using --valgrind-stacksize=....\n",
2279 		         tid, remains);
2280       }
2281    }
2282 
2283    if (VG_(clo_sanity_level) > 1) {
2284       /* Check sanity of the low-level memory manager.  Note that bugs
2285          in the client's code can cause this to fail, so we don't do
2286          this check unless specially asked for.  And because it's
2287          potentially very expensive. */
2288       VG_(sanity_check_malloc_all)();
2289    }
2290 }
2291 
2292 /*--------------------------------------------------------------------*/
2293 /*--- end                                                          ---*/
2294 /*--------------------------------------------------------------------*/
2295