1
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker. ---*/
4 /*--- This file checks stack and global array accesses. ---*/
5 /*--- sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of Ptrcheck, a Valgrind tool for checking pointer
10 use in programs.
11
12 Copyright (C) 2008-2013 OpenWorks Ltd
13 info@open-works.co.uk
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31
32 Neither the names of the U.S. Department of Energy nor the
33 University of California nor the names of its contributors may be
34 used to endorse or promote products derived from this software
35 without prior written permission.
36 */
37
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
50
51 #include "pc_common.h"
52
53 #include "sg_main.h" // self
54
55
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58
59
60 //////////////////////////////////////////////////////////////
61 // //
62 // Basic Stuff //
63 // //
64 //////////////////////////////////////////////////////////////
65
is_sane_TId(ThreadId tid)66 static inline Bool is_sane_TId ( ThreadId tid )
67 {
68 return tid >= 0 && tid < VG_N_THREADS
69 && tid != VG_INVALID_THREADID;
70 }
71
sg_malloc(const HChar * cc,SizeT n)72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
73 void* p;
74 tl_assert(n > 0);
75 p = VG_(malloc)( cc, n );
76 tl_assert(p);
77 return p;
78 }
79
sg_free(void * p)80 static void sg_free ( void* p ) {
81 tl_assert(p);
82 VG_(free)(p);
83 }
84
85
86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2). Return -1 if the
87 first interval is lower, 1 if the first interval is higher, and 0
88 if there is any overlap. Redundant paranoia with casting is there
89 following what looked distinctly like a bug in gcc-4.1.2, in which
90 some of the comparisons were done signedly instead of
91 unsignedly. */
92 inline
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94 Addr a2, SizeT n2 ) {
95 UWord a1w = (UWord)a1;
96 UWord n1w = (UWord)n1;
97 UWord a2w = (UWord)a2;
98 UWord n2w = (UWord)n2;
99 tl_assert(n1w > 0 && n2w > 0);
100 if (a1w + n1w <= a2w) return -1L;
101 if (a2w + n2w <= a1w) return 1L;
102 return 0;
103 }
104
105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106 within [aBig,aBig+nBig). */
107 inline
is_subinterval_of(Addr aBig,SizeT nBig,Addr aSmall,SizeT nSmall)108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109 Addr aSmall, SizeT nSmall ) {
110 tl_assert(nBig > 0 && nSmall > 0);
111 return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112 }
113
114 inline
Addr__min(Addr a1,Addr a2)115 static Addr Addr__min ( Addr a1, Addr a2 ) {
116 return a1 < a2 ? a1 : a2;
117 }
118
119 inline
Addr__max(Addr a1,Addr a2)120 static Addr Addr__max ( Addr a1, Addr a2 ) {
121 return a1 < a2 ? a2 : a1;
122 }
123
124
125 //////////////////////////////////////////////////////////////
126 // //
127 // StackBlocks Persistent Cache //
128 // //
129 //////////////////////////////////////////////////////////////
130
131 /* We maintain a set of XArray* of StackBlocks. These are never
132 freed. When a new StackBlock vector is acquired from
133 VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134 If not present, it is added. If present, the just-acquired one is
135 freed and the copy used.
136
137 This simplifies storage management elsewhere. It allows us to
138 assume that a pointer to an XArray* of StackBlock is valid forever.
139 It also means there are no duplicates anywhere, which could be
140 important from a space point of view for programs that generate a
141 lot of translations, or where translations are frequently discarded
142 and re-made.
143
144 Note that we normalise the arrays by sorting the elements according
145 to an arbitrary total order, so as to avoid the situation that two
146 vectors describe the same set of variables but are not structurally
147 identical. */
148
StackBlock__sane(const StackBlock * fb)149 static inline Bool StackBlock__sane ( const StackBlock* fb )
150 {
151 if (fb->name[ sizeof(fb->name)-1 ] != 0)
152 return False;
153 if (fb->spRel != False && fb->spRel != True)
154 return False;
155 if (fb->isVec != False && fb->isVec != True)
156 return False;
157 return True;
158 }
159
160 /* Generate an arbitrary total ordering on StackBlocks. */
StackBlock__cmp(const StackBlock * fb1,const StackBlock * fb2)161 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
162 {
163 Word r;
164 tl_assert(StackBlock__sane(fb1));
165 tl_assert(StackBlock__sane(fb2));
166 /* Hopefully the .base test hits most of the time. For the blocks
167 associated with any particular instruction, if the .base values
168 are the same then probably it doesn't make sense for the other
169 fields to be different. But this is supposed to be a completely
170 general structural total order, so we have to compare everything
171 anyway. */
172 if (fb1->base < fb2->base) return -1;
173 if (fb1->base > fb2->base) return 1;
174 /* compare sizes */
175 if (fb1->szB < fb2->szB) return -1;
176 if (fb1->szB > fb2->szB) return 1;
177 /* compare sp/fp flag */
178 if (fb1->spRel < fb2->spRel) return -1;
179 if (fb1->spRel > fb2->spRel) return 1;
180 /* compare is/is-not array-typed flag */
181 if (fb1->isVec < fb2->isVec) return -1;
182 if (fb1->isVec > fb2->isVec) return 1;
183 /* compare the name */
184 r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185 return r;
186 }
187
188 /* Returns True if all fields except .szB are the same. szBs may or
189 may not be the same; they are simply not consulted. */
StackBlock__all_fields_except_szB_are_equal(StackBlock * fb1,StackBlock * fb2)190 static Bool StackBlock__all_fields_except_szB_are_equal (
191 StackBlock* fb1,
192 StackBlock* fb2
193 )
194 {
195 tl_assert(StackBlock__sane(fb1));
196 tl_assert(StackBlock__sane(fb2));
197 return fb1->base == fb2->base
198 && fb1->spRel == fb2->spRel
199 && fb1->isVec == fb2->isVec
200 && 0 == VG_(strcmp)(fb1->name, fb2->name);
201 }
202
203
204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
StackBlocks__cmp(XArray * fb1s,XArray * fb2s)205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206 {
207 Word i, r, n1, n2;
208 n1 = VG_(sizeXA)( fb1s );
209 n2 = VG_(sizeXA)( fb2s );
210 if (n1 < n2) return -1;
211 if (n1 > n2) return 1;
212 for (i = 0; i < n1; i++) {
213 StackBlock *fb1, *fb2;
214 fb1 = VG_(indexXA)( fb1s, i );
215 fb2 = VG_(indexXA)( fb2s, i );
216 r = StackBlock__cmp( fb1, fb2 );
217 if (r != 0) return r;
218 }
219 tl_assert(i == n1 && i == n2);
220 return 0;
221 }
222
pp_StackBlocks(XArray * sbs)223 static void pp_StackBlocks ( XArray* sbs )
224 {
225 Word i, n = VG_(sizeXA)( sbs );
226 VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
227 for (i = 0; i < n; i++) {
228 StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
229 VG_(message)(Vg_DebugMsg,
230 " StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
231 sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
232 sb->isVec ? 'Y' : 'N', &sb->name[0]
233 );
234 }
235 VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
236 }
237
238
239 /* ---------- The StackBlock vector cache ---------- */
240
241 static WordFM* /* XArray* of StackBlock -> nothing */
242 frameBlocks_set = NULL;
243
init_StackBlocks_set(void)244 static void init_StackBlocks_set ( void )
245 {
246 tl_assert(!frameBlocks_set);
247 frameBlocks_set
248 = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
249 (Word(*)(UWord,UWord))StackBlocks__cmp );
250 tl_assert(frameBlocks_set);
251 }
252
253 /* Find the given StackBlock-vector in our collection thereof. If
254 found, deallocate the supplied one, and return the address of the
255 copy. If not found, add the supplied one to our collection and
256 return its address. */
257 static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add(XArray * orig)258 StackBlocks__find_and_dealloc__or_add
259 ( XArray* /* of StackBlock */ orig )
260 {
261 UWord key, val;
262
263 /* First, normalise, as per comments above. */
264 VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
265 VG_(sortXA)( orig );
266
267 /* Now get rid of any exact duplicates. */
268 nuke_dups:
269 { Word r, w, nEQ, n = VG_(sizeXA)( orig );
270 if (n >= 2) {
271 w = 0;
272 nEQ = 0;
273 for (r = 0; r < n; r++) {
274 if (r+1 < n) {
275 StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
276 StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
277 Word c = StackBlock__cmp(pR0,pR1);
278 tl_assert(c == -1 || c == 0);
279 if (c == 0) { nEQ++; continue; }
280 }
281 if (w != r) {
282 StackBlock* pW = VG_(indexXA)( orig, w );
283 StackBlock* pR = VG_(indexXA)( orig, r );
284 *pW = *pR;
285 }
286 w++;
287 }
288 tl_assert(r == n);
289 tl_assert(w + nEQ == n);
290 if (w < n) {
291 VG_(dropTailXA)( orig, n-w );
292 }
293 if (0) VG_(printf)("delta %ld\n", n-w);
294 }
295 }
296
297 /* Deal with the following strangeness, where two otherwise
298 identical blocks are claimed to have different sizes. In which
299 case we use the larger size. */
300 /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
301 StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
302 StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303 */
304 { Word i, n = VG_(sizeXA)( orig );
305 if (n >= 2) {
306 for (i = 0; i < n-1; i++) {
307 StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
308 StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
309 if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
310 /* They can't be identical because the previous tidying
311 pass would have removed the duplicates. And they
312 can't be > because the earlier sorting pass would
313 have ordered otherwise-identical descriptors
314 according to < on .szB fields. Hence: */
315 tl_assert(sb0->szB < sb1->szB);
316 sb0->szB = sb1->szB;
317 /* This makes the blocks identical, at the size of the
318 larger one. Rather than go to all the hassle of
319 sliding the rest down, simply go back to the
320 remove-duplicates stage. The assertion guarantees
321 that we eventually make progress, since the rm-dups
322 stage will get rid of one of the blocks. This is
323 expected to happen only exceedingly rarely. */
324 tl_assert(StackBlock__cmp(sb0,sb1) == 0);
325 goto nuke_dups;
326 }
327 }
328 }
329 }
330
331 /* If there are any blocks which overlap and have the same
332 fpRel-ness, junk the whole descriptor; it's obviously bogus.
333 Icc11 certainly generates bogus info from time to time.
334
335 This check is pretty weak; really we ought to have a stronger
336 sanity check. */
337 { Word i, n = VG_(sizeXA)( orig );
338 static Int moans = 3;
339 for (i = 0; i < n-1; i++) {
340 StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
341 StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
342 if (sb1->spRel == sb2->spRel
343 && (sb1->base >= sb2->base
344 || sb1->base + sb1->szB > sb2->base)) {
345 if (moans > 0 && !VG_(clo_xml)) {
346 moans--;
347 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
348 "overlapping stack blocks\n");
349 if (VG_(clo_verbosity) >= 2)
350 pp_StackBlocks(orig);
351 if (moans == 0)
352 VG_(message)(Vg_UserMsg, "Further instances of this "
353 "message will not be shown\n" );
354 }
355 VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
356 break;
357 }
358 }
359 }
360
361 /* Now, do we have it already? */
362 if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
363 /* yes */
364 XArray* res;
365 tl_assert(val == 0);
366 tl_assert(key != (UWord)orig);
367 VG_(deleteXA)(orig);
368 res = (XArray*)key;
369 return res;
370 } else {
371 /* no */
372 VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
373 return orig;
374 }
375 }
376
377 /* Top level function for getting the StackBlock vector for a given
378 instruction. It is guaranteed that the returned pointer will be
379 valid for the entire rest of the run, and also that the addresses
380 of the individual elements of the array will not change. */
381
get_StackBlocks_for_IP(Addr ip)382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
383 {
384 XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
385 tl_assert(blocks);
386 return StackBlocks__find_and_dealloc__or_add( blocks );
387 }
388
389
390 //////////////////////////////////////////////////////////////
391 // //
392 // GlobalBlocks Persistent Cache //
393 // //
394 //////////////////////////////////////////////////////////////
395
396 /* Generate an arbitrary total ordering on GlobalBlocks. */
GlobalBlock__cmp(GlobalBlock * gb1,GlobalBlock * gb2)397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
398 {
399 Word r;
400 /* compare addrs */
401 if (gb1->addr < gb2->addr) return -1;
402 if (gb1->addr > gb2->addr) return 1;
403 /* compare sizes */
404 if (gb1->szB < gb2->szB) return -1;
405 if (gb1->szB > gb2->szB) return 1;
406 /* compare is/is-not array-typed flag */
407 if (gb1->isVec < gb2->isVec) return -1;
408 if (gb1->isVec > gb2->isVec) return 1;
409 /* compare the name */
410 r = (Word)VG_(strcmp)(gb1->name, gb2->name);
411 if (r != 0) return r;
412 /* compare the soname */
413 r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
414 return r;
415 }
416
417 static WordFM* /* GlobalBlock* -> nothing */
418 globalBlock_set = NULL;
419
init_GlobalBlock_set(void)420 static void init_GlobalBlock_set ( void )
421 {
422 tl_assert(!globalBlock_set);
423 globalBlock_set
424 = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
425 (Word(*)(UWord,UWord))GlobalBlock__cmp );
426 tl_assert(globalBlock_set);
427 }
428
429
430 /* Top level function for making GlobalBlocks persistent. Call here
431 with a non-persistent version, and the returned one is guaranteed
432 to be valid for the entire rest of the run. The supplied one is
433 copied, not stored, so can be freed after the call. */
434
get_persistent_GlobalBlock(GlobalBlock * orig)435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
436 {
437 UWord key, val;
438 /* Now, do we have it already? */
439 if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
440 /* yes, return the copy */
441 GlobalBlock* res;
442 tl_assert(val == 0);
443 res = (GlobalBlock*)key;
444 tl_assert(res != orig);
445 return res;
446 } else {
447 /* no. clone it, store the clone and return the clone's
448 address. */
449 GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
450 sizeof(GlobalBlock) );
451 tl_assert(clone);
452 *clone = *orig;
453 VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
454 return clone;
455 }
456 }
457
458
459 //////////////////////////////////////////////////////////////
460 // //
461 // Interval tree of StackTreeBlock //
462 // //
463 //////////////////////////////////////////////////////////////
464
465 /* A node in a stack interval tree. Zero length intervals (.szB == 0)
466 are not allowed.
467
468 A stack interval tree is a (WordFM StackTreeNode* void). There is
469 one stack interval tree for each thread.
470 */
471 typedef
472 struct {
473 Addr addr;
474 SizeT szB; /* copied from .descr->szB */
475 StackBlock* descr; /* it's an instance of this block */
476 UWord depth; /* depth of stack at time block was pushed */
477 }
478 StackTreeNode;
479
pp_StackTree(WordFM * sitree,const HChar * who)480 static void pp_StackTree ( WordFM* sitree, const HChar* who )
481 {
482 UWord keyW, valW;
483 VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
484 VG_(initIterFM)( sitree );
485 while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
486 StackTreeNode* nd = (StackTreeNode*)keyW;
487 VG_(printf)(" [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
488 nd->descr, nd->descr->name, nd->descr->szB);
489 }
490 VG_(printf)(">>> END pp_StackTree %s\n", who );
491 }
492
493 /* Interval comparison function for StackTreeNode */
cmp_intervals_StackTreeNode(StackTreeNode * sn1,StackTreeNode * sn2)494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
495 StackTreeNode* sn2 )
496 {
497 return cmp_nonempty_intervals(sn1->addr, sn1->szB,
498 sn2->addr, sn2->szB);
499 }
500
501 /* Find the node holding 'a', if any. */
find_StackTreeNode(WordFM * sitree,Addr a)502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
503 {
504 UWord keyW, valW;
505 StackTreeNode key;
506 tl_assert(sitree);
507 key.addr = a;
508 key.szB = 1;
509 if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
510 StackTreeNode* res = (StackTreeNode*)keyW;
511 tl_assert(valW == 0);
512 tl_assert(res != &key);
513 return res;
514 } else {
515 return NULL;
516 }
517 }
518
519 /* Note that the supplied XArray of FrameBlock must have been
520 made persistent already. */
521 __attribute__((noinline))
add_blocks_to_StackTree(WordFM * sitree,XArray * descrs,XArray * bases,UWord depth)522 static void add_blocks_to_StackTree (
523 /*MOD*/WordFM* sitree,
524 XArray* /* FrameBlock */ descrs,
525 XArray* /* Addr */ bases,
526 UWord depth
527 )
528 {
529 Bool debug = (Bool)0;
530 Word i, nDescrs, nBases;
531
532 nDescrs = VG_(sizeXA)( descrs ),
533 nBases = VG_(sizeXA)( bases );
534 tl_assert(nDescrs == nBases);
535
536 if (nDescrs == 0) return;
537
538 tl_assert(sitree);
539 if (debug) {
540 VG_(printf)("\ndepth = %lu\n", depth);
541 pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
542 pp_StackBlocks(descrs);
543 }
544
545 for (i = 0; i < nDescrs; i++) {
546 Bool already_present;
547 StackTreeNode* nyu;
548 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
549 StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
550 tl_assert(descr->szB > 0);
551 nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
552 nyu->addr = addr;
553 nyu->szB = descr->szB;
554 nyu->descr = descr;
555 nyu->depth = depth;
556 if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
557 already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
558 /* The interval can't already be there; else we have
559 overlapping stack blocks. */
560 tl_assert(!already_present);
561 if (debug) {
562 pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
563 }
564 }
565 if (debug) {
566 pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
567 VG_(printf)("\n");
568 }
569 }
570
del_blocks_from_StackTree(WordFM * sitree,XArray * bases)571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
572 XArray* /* Addr */ bases )
573 {
574 UWord oldK, oldV;
575 Word i, nBases = VG_(sizeXA)( bases );
576 for (i = 0; i < nBases; i++) {
577 Bool b;
578 Addr addr = *(Addr*)VG_(indexXA)( bases, i );
579 StackTreeNode* nd = find_StackTreeNode(sitree, addr);
580 /* The interval must be there; we added it earlier when
581 the associated frame was created. */
582 tl_assert(nd);
583 b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
584 /* we just found the block! */
585 tl_assert(b);
586 tl_assert(oldV == 0);
587 tl_assert(nd == (StackTreeNode*)oldK);
588 sg_free(nd);
589 }
590 }
591
592
delete_StackTree__kFin(UWord keyW)593 static void delete_StackTree__kFin ( UWord keyW ) {
594 StackTreeNode* nd = (StackTreeNode*)keyW;
595 tl_assert(nd);
596 sg_free(nd);
597 }
delete_StackTree__vFin(UWord valW)598 static void delete_StackTree__vFin ( UWord valW ) {
599 tl_assert(valW == 0);
600 }
delete_StackTree(WordFM * sitree)601 static void delete_StackTree ( WordFM* sitree )
602 {
603 VG_(deleteFM)( sitree,
604 delete_StackTree__kFin, delete_StackTree__vFin );
605 }
606
new_StackTree(void)607 static WordFM* new_StackTree ( void ) {
608 return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
609 (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
610 }
611
612
613 //////////////////////////////////////////////////////////////
614 // //
615 // Interval tree of GlobalTreeBlock //
616 // //
617 //////////////////////////////////////////////////////////////
618
619 /* A node in a global interval tree. Zero length intervals
620 (.szB == 0) are not allowed.
621
622 A global interval tree is a (WordFM GlobalTreeNode* void). There
623 is one global interval tree for the entire process.
624 */
625 typedef
626 struct {
627 Addr addr; /* copied from .descr->addr */
628 SizeT szB; /* copied from .descr->szB */
629 GlobalBlock* descr; /* it's this block */
630 }
631 GlobalTreeNode;
632
GlobalTreeNode__pp(GlobalTreeNode * nd)633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
634 tl_assert(nd->descr);
635 VG_(printf)("GTNode [%#lx,+%ld) %s",
636 nd->addr, nd->szB, nd->descr->name);
637 }
638
GlobalTree__pp(WordFM * gitree,const HChar * who)639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
640 const HChar* who )
641 {
642 UWord keyW, valW;
643 GlobalTreeNode* nd;
644 VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
645 VG_(initIterFM)( gitree );
646 while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
647 tl_assert(valW == 0);
648 nd = (GlobalTreeNode*)keyW;
649 VG_(printf)(" ");
650 GlobalTreeNode__pp(nd);
651 VG_(printf)("\n");
652 }
653 VG_(doneIterFM)( gitree );
654 VG_(printf)(">>>\n");
655 }
656
657 /* Interval comparison function for GlobalTreeNode */
cmp_intervals_GlobalTreeNode(GlobalTreeNode * gn1,GlobalTreeNode * gn2)658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
659 GlobalTreeNode* gn2 )
660 {
661 return cmp_nonempty_intervals( gn1->addr, gn1->szB,
662 gn2->addr, gn2->szB );
663 }
664
665 /* Find the node holding 'a', if any. */
find_GlobalTreeNode(WordFM * gitree,Addr a)666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
667 {
668 UWord keyW, valW;
669 GlobalTreeNode key;
670 key.addr = a;
671 key.szB = 1;
672 if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
673 GlobalTreeNode* res = (GlobalTreeNode*)keyW;
674 tl_assert(valW == 0);
675 tl_assert(res != &key);
676 return res;
677 } else {
678 return NULL;
679 }
680 }
681
682 /* Note that the supplied GlobalBlock must have been made persistent
683 already. */
add_block_to_GlobalTree(WordFM * gitree,GlobalBlock * descr)684 static void add_block_to_GlobalTree (
685 /*MOD*/WordFM* gitree,
686 GlobalBlock* descr
687 )
688 {
689 Bool already_present;
690 GlobalTreeNode *nyu, *nd;
691 UWord keyW, valW;
692 static Int moans = 3;
693
694 tl_assert(descr->szB > 0);
695 nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
696 nyu->addr = descr->addr;
697 nyu->szB = descr->szB;
698 nyu->descr = descr;
699
700 /* Basically it's an error to add a global block to the tree that
701 is already in the tree. However, detect and ignore attempts to
702 insert exact duplicates; they do appear for some reason
703 (possible a bug in m_debuginfo?) */
704 already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
705 if (already_present) {
706 tl_assert(valW == 0);
707 nd = (GlobalTreeNode*)keyW;
708 tl_assert(nd);
709 tl_assert(nd != nyu);
710 tl_assert(nd->descr);
711 tl_assert(nyu->descr);
712 if (nd->addr == nyu->addr && nd->szB == nyu->szB
713 /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
714 /* Although it seems reasonable to demand that duplicate
715 blocks have identical names, that is too strict. For
716 example, reading debuginfo from glibc produces two
717 otherwise identical blocks with names "tzname" and
718 "__tzname". A constraint of the form "must be identical,
719 or one must be a substring of the other" would fix that.
720 However, such trickery is scuppered by the fact that we
721 truncate all variable names to 15 characters to make
722 storage management simpler, hence giving pairs like
723 "__EI___pthread_" (truncated) vs "__pthread_keys". So
724 it's simplest just to skip the name comparison
725 completely. */
726 && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
727 /* exact duplicate; ignore it */
728 sg_free(nyu);
729 return;
730 }
731 /* else fall through; the assertion below will catch it */
732 }
733
734 already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
735 /* The interval can't already be there; else we have
736 overlapping global blocks. */
737 /* Unfortunately (25 Jan 09) at least icc11 has been seen to
738 generate overlapping block descriptions in the Dwarf3; clearly
739 bogus. */
740 if (already_present && moans > 0 && !VG_(clo_xml)) {
741 moans--;
742 VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
743 "overlapping global blocks\n");
744 if (VG_(clo_verbosity) >= 2) {
745 GlobalTree__pp( gitree,
746 "add_block_to_GlobalTree: non-exact duplicate" );
747 VG_(printf)("Overlapping block: ");
748 GlobalTreeNode__pp(nyu);
749 VG_(printf)("\n");
750 }
751 if (moans == 0)
752 VG_(message)(Vg_UserMsg, "Further instances of this "
753 "message will not be shown\n" );
754 }
755 /* tl_assert(!already_present); */
756 }
757
del_GlobalTree_range(WordFM * gitree,Addr a,SizeT szB)758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
759 Addr a, SizeT szB )
760 {
761 /* One easy way to do this: look up [a,a+szB) in the tree. That
762 will either succeed, producing a block which intersects that
763 range, in which case we delete it and repeat; or it will fail,
764 in which case there are no blocks intersecting the range, and we
765 can bring the process to a halt. */
766 UWord keyW, valW, oldK, oldV;
767 GlobalTreeNode key, *nd;
768 Bool b, anyFound;
769
770 tl_assert(szB > 0);
771
772 anyFound = False;
773
774 key.addr = a;
775 key.szB = szB;
776
777 while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
778 anyFound = True;
779 nd = (GlobalTreeNode*)keyW;
780 tl_assert(valW == 0);
781 tl_assert(nd != &key);
782 tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
783
784 b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
785 tl_assert(b);
786 tl_assert(oldV == 0);
787 tl_assert(oldK == keyW); /* check we deleted the node we just found */
788 }
789
790 return anyFound;
791 }
792
793
794 //////////////////////////////////////////////////////////////
795 // //
796 // Invar //
797 // //
798 //////////////////////////////////////////////////////////////
799
800 /* An invariant, as resulting from watching the destination of a
801 memory referencing instruction. Initially is Inv_Unset until the
802 instruction makes a first access. */
803
804 typedef
805 enum {
806 Inv_Unset=1, /* not established yet */
807 Inv_Unknown, /* unknown location */
808 Inv_Stack0, /* array-typed stack block in innermost frame */
809 Inv_StackN, /* array-typed stack block in non-innermost frame */
810 Inv_Global, /* array-typed global block */
811 }
812 InvarTag;
813
814 typedef
815 struct {
816 InvarTag tag;
817 union {
818 struct {
819 } Unset;
820 struct {
821 } Unknown;
822 struct {
823 Addr addr;
824 SizeT szB;
825 StackBlock* descr;
826 } Stack0; /* innermost stack frame */
827 struct {
828 /* Pointer to a node in the interval tree for
829 this thread. */
830 StackTreeNode* nd;
831 } StackN; /* non-innermost stack frame */
832 struct {
833 /* Pointer to a GlobalBlock in the interval tree of
834 global blocks. */
835 GlobalTreeNode* nd;
836 } Global;
837 }
838 Inv;
839 }
840 Invar;
841
842 /* Partial debugging printing for an Invar. */
pp_Invar(Invar * i)843 static void pp_Invar ( Invar* i )
844 {
845 switch (i->tag) {
846 case Inv_Unset:
847 VG_(printf)("Unset");
848 break;
849 case Inv_Unknown:
850 VG_(printf)("Unknown");
851 break;
852 case Inv_Stack0:
853 VG_(printf)("Stack0 [%#lx,+%lu)",
854 i->Inv.Stack0.addr, i->Inv.Stack0.szB);
855 break;
856 case Inv_StackN:
857 VG_(printf)("StackN [%#lx,+%lu)",
858 i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
859 break;
860 case Inv_Global:
861 VG_(printf)("Global [%#lx,+%lu)",
862 i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
863 break;
864 default:
865 tl_assert(0);
866 }
867 }
868
869 /* Compare two Invars for equality. */
eq_Invar(Invar * i1,Invar * i2)870 static Bool eq_Invar ( Invar* i1, Invar* i2 )
871 {
872 tl_assert(i1->tag != Inv_Unset);
873 tl_assert(i2->tag != Inv_Unset);
874 if (i1->tag != i2->tag)
875 return False;
876 switch (i1->tag) {
877 case Inv_Unknown:
878 return True;
879 case Inv_Stack0:
880 return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
881 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
882 case Inv_StackN:
883 return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
884 case Inv_Global:
885 return i1->Inv.Global.nd == i2->Inv.Global.nd;
886 default:
887 tl_assert(0);
888 }
889 /*NOTREACHED*/
890 tl_assert(0);
891 }
892
893 /* Generate a piece of text showing 'ea' is relative to 'invar', if
894 known. If unknown, generate an empty string. 'buf' must be at
895 least 32 bytes in size. Also return the absolute value of the
896 delta, if known, or zero if not known.
897 */
gen_delta_str(HChar * buf,UWord * absDelta,Invar * inv,Addr ea)898 static void gen_delta_str ( /*OUT*/HChar* buf,
899 /*OUT*/UWord* absDelta,
900 Invar* inv, Addr ea )
901 {
902 Addr block = 0;
903 SizeT szB = 0;
904
905 buf[0] = 0;
906 *absDelta = 0;
907
908 switch (inv->tag) {
909 case Inv_Unknown:
910 case Inv_Unset:
911 return; /* unknown */
912 case Inv_Stack0:
913 block = inv->Inv.Stack0.addr;
914 szB = inv->Inv.Stack0.szB;
915 break;
916 case Inv_StackN:
917 block = inv->Inv.StackN.nd->addr;
918 szB = inv->Inv.StackN.nd->szB;
919 break;
920 case Inv_Global:
921 block = inv->Inv.Global.nd->addr;
922 szB = inv->Inv.Global.nd->szB;
923 break;
924 default:
925 tl_assert(0);
926 }
927 tl_assert(szB > 0);
928 if (ea < block) {
929 *absDelta = block - ea;
930 VG_(sprintf)(buf, "%'lu before", *absDelta);
931 }
932 else if (ea >= block + szB) {
933 *absDelta = ea - (block + szB);
934 VG_(sprintf)(buf, "%'lu after", *absDelta);
935 }
936 else {
937 // Leave *absDelta at zero.
938 VG_(sprintf)(buf, "%'lu inside", ea - block);
939 }
940 }
941
942
943 /* Print selected parts of an Invar, suitable for use in error
944 messages. */
show_Invar(HChar * buf,Word nBuf,Invar * inv,Word depth)945 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
946 {
947 const HChar* str;
948 tl_assert(nBuf >= 128);
949 buf[0] = 0;
950 switch (inv->tag) {
951 case Inv_Unknown:
952 VG_(sprintf)(buf, "%s", "unknown");
953 break;
954 case Inv_Stack0:
955 str = "array";
956 VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
957 str, inv->Inv.Stack0.descr->name,
958 inv->Inv.Stack0.szB );
959 break;
960 case Inv_StackN:
961 str = "array";
962 VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
963 str, inv->Inv.StackN.nd->descr->name,
964 inv->Inv.StackN.nd->descr->szB,
965 depth - inv->Inv.StackN.nd->depth );
966 break;
967 case Inv_Global:
968 str = "array";
969 VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
970 str, inv->Inv.Global.nd->descr->name,
971 inv->Inv.Global.nd->descr->szB,
972 inv->Inv.Global.nd->descr->soname );
973 break;
974 case Inv_Unset:
975 VG_(sprintf)(buf, "%s", "Unset!");
976 break;
977 default:
978 tl_assert(0);
979 }
980 }
981
982
983 //////////////////////////////////////////////////////////////
984 // //
985 // our globals //
986 // //
987 //////////////////////////////////////////////////////////////
988
989 //////////////////////////////////////////////////////////////
990 ///
991
992 #define N_QCACHE 16
993
994 /* Powers of two only, else the result will be chaos */
995 #define QCACHE_ADVANCE_EVERY 16
996
997 /* Per-thread query cache. Note that the invar can only be Inv_StackN
998 (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
999 typedef
1000 struct {
1001 Addr addr;
1002 SizeT szB;
1003 Invar inv;
1004 }
1005 QCElem;
1006
1007 typedef
1008 struct {
1009 Word nInUse;
1010 QCElem elems[N_QCACHE];
1011 }
1012 QCache;
1013
QCache__invalidate(QCache * qc)1014 static void QCache__invalidate ( QCache* qc ) {
1015 tl_assert(qc->nInUse >= 0);
1016 qc->nInUse = 0;
1017 }
1018
QCache__pp(QCache * qc,const HChar * who)1019 static void QCache__pp ( QCache* qc, const HChar* who )
1020 {
1021 Word i;
1022 VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1023 for (i = 0; i < qc->nInUse; i++) {
1024 VG_(printf)(" [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1025 pp_Invar(&qc->elems[i].inv);
1026 VG_(printf)("\n");
1027 }
1028 VG_(printf)(">>>\n");
1029 }
1030
1031 static ULong stats__qcache_queries = 0;
1032 static ULong stats__qcache_misses = 0;
1033 static ULong stats__qcache_probes = 0;
1034
1035 ///
1036 //////////////////////////////////////////////////////////////
1037
1038 /* Each thread has:
1039 * a shadow stack of StackFrames, which is a double-linked list
1040 * an stack block interval tree
1041 */
1042 static struct _StackFrame* shadowStacks[VG_N_THREADS];
1043
1044 static WordFM* /* StackTreeNode */ siTrees[VG_N_THREADS];
1045
1046 static QCache qcaches[VG_N_THREADS];
1047
1048
1049 /* Additionally, there is one global variable interval tree
1050 for the entire process.
1051 */
1052 static WordFM* /* GlobalTreeNode */ giTree;
1053
1054
invalidate_all_QCaches(void)1055 static void invalidate_all_QCaches ( void )
1056 {
1057 Word i;
1058 for (i = 0; i < VG_N_THREADS; i++) {
1059 QCache__invalidate( &qcaches[i] );
1060 }
1061 }
1062
ourGlobals_init(void)1063 static void ourGlobals_init ( void )
1064 {
1065 Word i;
1066 for (i = 0; i < VG_N_THREADS; i++) {
1067 shadowStacks[i] = NULL;
1068 siTrees[i] = NULL;
1069 }
1070 invalidate_all_QCaches();
1071 giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1072 (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1073 }
1074
1075
1076 //////////////////////////////////////////////////////////////
1077 // //
1078 // Handle global variable load/unload events //
1079 // //
1080 //////////////////////////////////////////////////////////////
1081
acquire_globals(ULong di_handle)1082 static void acquire_globals ( ULong di_handle )
1083 {
1084 Word n, i;
1085 XArray* /* of GlobalBlock */ gbs;
1086 if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1087 gbs = VG_(di_get_global_blocks_from_dihandle)
1088 (di_handle, True/*arrays only*/);
1089 if (0) VG_(printf)(" GOT %ld globals\n", VG_(sizeXA)( gbs ));
1090
1091 n = VG_(sizeXA)( gbs );
1092 for (i = 0; i < n; i++) {
1093 GlobalBlock* gbp;
1094 GlobalBlock* gb = VG_(indexXA)( gbs, i );
1095 if (0) VG_(printf)(" new Global size %2lu at %#lx: %s %s\n",
1096 gb->szB, gb->addr, gb->soname, gb->name );
1097 tl_assert(gb->szB > 0);
1098 /* Make a persistent copy of each GlobalBlock, and add it
1099 to the tree. */
1100 gbp = get_persistent_GlobalBlock( gb );
1101 add_block_to_GlobalTree( giTree, gbp );
1102 }
1103
1104 VG_(deleteXA)( gbs );
1105 }
1106
1107
1108 /* We only intercept these two because we need to see any di_handles
1109 that might arise from the mappings/allocations. */
sg_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1110 void sg_new_mem_mmap( Addr a, SizeT len,
1111 Bool rr, Bool ww, Bool xx, ULong di_handle )
1112 {
1113 if (di_handle > 0)
1114 acquire_globals(di_handle);
1115 }
sg_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1116 void sg_new_mem_startup( Addr a, SizeT len,
1117 Bool rr, Bool ww, Bool xx, ULong di_handle )
1118 {
1119 if (di_handle > 0)
1120 acquire_globals(di_handle);
1121 }
sg_die_mem_munmap(Addr a,SizeT len)1122 void sg_die_mem_munmap ( Addr a, SizeT len )
1123 {
1124 Bool debug = (Bool)0;
1125 Bool overlap = False;
1126
1127 if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1128
1129 if (len == 0)
1130 return;
1131
1132 overlap = del_GlobalTree_range(giTree, a, len);
1133
1134 { /* redundant sanity check */
1135 UWord keyW, valW;
1136 VG_(initIterFM)( giTree );
1137 while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1138 GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1139 tl_assert(valW == 0);
1140 tl_assert(nd->szB > 0);
1141 tl_assert(nd->addr + nd->szB <= a
1142 || a + len <= nd->addr);
1143 }
1144 VG_(doneIterFM)( giTree );
1145 }
1146
1147 if (!overlap)
1148 return;
1149
1150 /* Ok, the range contained some blocks. Therefore we'll need to
1151 visit all the Invars in all the thread shadow stacks, and
1152 convert all Inv_Global entries that intersect [a,a+len) to
1153 Inv_Unknown. */
1154 tl_assert(len > 0);
1155 preen_global_Invars( a, len );
1156 invalidate_all_QCaches();
1157 }
1158
1159
1160 //////////////////////////////////////////////////////////////
1161 // //
1162 // StackFrame //
1163 // //
1164 //////////////////////////////////////////////////////////////
1165
1166 static ULong stats__total_accesses = 0;
1167 static ULong stats__classify_Stack0 = 0;
1168 static ULong stats__classify_StackN = 0;
1169 static ULong stats__classify_Global = 0;
1170 static ULong stats__classify_Unknown = 0;
1171 static ULong stats__Invars_preened = 0;
1172 static ULong stats__Invars_changed = 0;
1173 static ULong stats__t_i_b_empty = 0;
1174 static ULong stats__htab_fast = 0;
1175 static ULong stats__htab_searches = 0;
1176 static ULong stats__htab_probes = 0;
1177 static ULong stats__htab_resizes = 0;
1178
1179
1180 /* A dynamic instance of an instruction */
1181 typedef
1182 struct {
1183 /* IMMUTABLE */
1184 Addr insn_addr; /* NB! zero means 'not in use' */
1185 XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1186 /* MUTABLE */
1187 Invar invar;
1188 }
1189 IInstance;
1190
1191
1192 #define N_HTAB_FIXED 64
1193
1194 typedef
1195 struct _StackFrame {
1196 /* The sp when the frame was created, so we know when to get rid
1197 of it. */
1198 Addr creation_sp;
1199 /* The stack frames for a thread are arranged as a doubly linked
1200 list. Obviously the outermost frame in the stack has .outer
1201 as NULL and the innermost in theory has .inner as NULL.
1202 However, when a function returns, we don't delete the
1203 just-vacated StackFrame. Instead, it is retained in the list
1204 and will be re-used when the next call happens. This is so
1205 as to avoid constantly having to dynamically allocate and
1206 deallocate frames. */
1207 struct _StackFrame* inner;
1208 struct _StackFrame* outer;
1209 Word depth; /* 0 for outermost; increases inwards */
1210 /* Information for each memory referencing instruction, for this
1211 instantiation of the function. The iinstances array is
1212 operated as a simple linear-probe hash table, which is
1213 dynamically expanded as necessary. Once critical thing is
1214 that an IInstance with a .insn_addr of zero is interpreted to
1215 mean that hash table slot is unused. This means we can't
1216 store an IInstance for address zero. */
1217 /* Note that htab initially points to htab_fixed. If htab_fixed
1218 turns out not to be big enough then htab is made to point to
1219 dynamically allocated memory. But it's often the case that
1220 htab_fixed is big enough, so this optimisation saves a huge
1221 number of sg_malloc/sg_free call pairs. */
1222 IInstance* htab;
1223 UWord htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1224 UWord htab_used; /* number of hash table slots currently in use */
1225 /* If this frame is currently making a call, then the following
1226 are relevant. */
1227 Addr sp_at_call;
1228 Addr fp_at_call;
1229 XArray* /* of Addr */ blocks_added_by_call;
1230 /* See comment just above */
1231 IInstance htab_fixed[N_HTAB_FIXED];
1232 }
1233 StackFrame;
1234
1235
1236
1237
1238
1239 /* Move this somewhere else? */
1240 /* Visit all Invars in the entire system. If 'isHeap' is True, change
1241 all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown. If
1242 'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1243 instead. */
1244
1245 __attribute__((noinline))
preen_global_Invar(Invar * inv,Addr a,SizeT len)1246 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1247 {
1248 stats__Invars_preened++;
1249 tl_assert(len > 0);
1250 tl_assert(inv);
1251 switch (inv->tag) {
1252 case Inv_Global:
1253 tl_assert(inv->Inv.Global.nd);
1254 tl_assert(inv->Inv.Global.nd->szB > 0);
1255 if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1256 inv->Inv.Global.nd->addr,
1257 inv->Inv.Global.nd->szB);
1258 if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1259 inv->Inv.Global.nd->szB)) {
1260 inv->tag = Inv_Unknown;
1261 stats__Invars_changed++;
1262 }
1263 break;
1264 case Inv_Stack0:
1265 case Inv_StackN:
1266 case Inv_Unknown:
1267 break;
1268 default:
1269 tl_assert(0);
1270 }
1271 }
1272
1273 __attribute__((noinline))
preen_global_Invars(Addr a,SizeT len)1274 static void preen_global_Invars ( Addr a, SizeT len )
1275 {
1276 Int i;
1277 UWord u;
1278 StackFrame* frame;
1279 tl_assert(len > 0);
1280 for (i = 0; i < VG_N_THREADS; i++) {
1281 frame = shadowStacks[i];
1282 if (!frame)
1283 continue; /* no frames for this thread */
1284 /* start from the innermost frame */
1285 while (frame->inner)
1286 frame = frame->inner;
1287 tl_assert(frame->outer);
1288 /* work through the frames from innermost to outermost. The
1289 order isn't important; we just need to ensure we visit each
1290 frame once (including those which are not actually active,
1291 more 'inner' than the 'innermost active frame', viz, just
1292 hanging around waiting to be used, when the current innermost
1293 active frame makes more calls. See comments on definition of
1294 struct _StackFrame. */
1295 for (; frame; frame = frame->outer) {
1296 UWord xx = 0; /* sanity check only; count of used htab entries */
1297 if (!frame->htab)
1298 continue; /* frame not in use. See shadowStack_unwind(). */
1299 for (u = 0; u < frame->htab_size; u++) {
1300 IInstance* ii = &frame->htab[u];
1301 if (ii->insn_addr == 0)
1302 continue; /* not in use */
1303 if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1304 preen_global_Invar( &ii->invar, a, len );
1305 xx++;
1306 }
1307 tl_assert(xx == frame->htab_used);
1308 }
1309 }
1310 }
1311
1312
1313 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1314 of the ip are guaranteed to be zero */
compute_II_hash(Addr ip,UWord htab_size)1315 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1316 return (ip >> 0) & (htab_size - 1);
1317 }
1318
1319 __attribute__((noinline))
initialise_II_hash_table(StackFrame * sf)1320 static void initialise_II_hash_table ( StackFrame* sf )
1321 {
1322 UWord i;
1323 sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1324 sf->htab = &sf->htab_fixed[0];
1325 tl_assert(sf->htab);
1326 sf->htab_used = 0;
1327 for (i = 0; i < sf->htab_size; i++)
1328 sf->htab[i].insn_addr = 0; /* NOT IN USE */
1329 }
1330
1331
1332 __attribute__((noinline))
resize_II_hash_table(StackFrame * sf)1333 static void resize_II_hash_table ( StackFrame* sf )
1334 {
1335 UWord i, j, ix, old_size, new_size;
1336 IInstance *old_htab, *new_htab, *old;
1337
1338 tl_assert(sf && sf->htab);
1339 old_size = sf->htab_size;
1340 new_size = 2 * old_size;
1341 old_htab = sf->htab;
1342 new_htab = sg_malloc( "di.sg_main.rIht.1",
1343 new_size * sizeof(IInstance) );
1344 for (i = 0; i < new_size; i++) {
1345 new_htab[i].insn_addr = 0; /* NOT IN USE */
1346 }
1347 for (i = 0; i < old_size; i++) {
1348 old = &old_htab[i];
1349 if (old->insn_addr == 0 /* NOT IN USE */)
1350 continue;
1351 ix = compute_II_hash(old->insn_addr, new_size);
1352 /* find out where to put this, in the new table */
1353 j = new_size;
1354 while (1) {
1355 if (new_htab[ix].insn_addr == 0)
1356 break;
1357 /* This can't ever happen, because it would mean the new
1358 table is full; that isn't allowed -- even the old table is
1359 only allowed to become half full. */
1360 tl_assert(j > 0);
1361 j--;
1362 ix++; if (ix == new_size) ix = 0;
1363 }
1364 /* copy the old entry to this location */
1365 tl_assert(ix < new_size);
1366 tl_assert(new_htab[ix].insn_addr == 0);
1367 new_htab[ix] = *old;
1368 tl_assert(new_htab[ix].insn_addr != 0);
1369 }
1370 /* all entries copied; free old table. */
1371 if (old_htab != &sf->htab_fixed[0])
1372 sg_free(old_htab);
1373 sf->htab = new_htab;
1374 sf->htab_size = new_size;
1375 /* check sf->htab_used is correct. Optional and a bit expensive
1376 but anyway: */
1377 j = 0;
1378 for (i = 0; i < new_size; i++) {
1379 if (new_htab[i].insn_addr != 0) {
1380 j++;
1381 }
1382 }
1383 tl_assert(j == sf->htab_used);
1384 if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1385 }
1386
1387
1388 __attribute__((noinline))
find_or_create_IInstance_SLOW(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1389 static IInstance* find_or_create_IInstance_SLOW (
1390 StackFrame* sf,
1391 Addr ip,
1392 XArray* /* StackBlock */ ip_frameblocks
1393 )
1394 {
1395 UWord i, ix;
1396
1397 stats__htab_searches++;
1398
1399 tl_assert(sf);
1400 tl_assert(sf->htab);
1401
1402 /* Make sure the table loading doesn't get too high. */
1403 if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1404 stats__htab_resizes++;
1405 resize_II_hash_table(sf);
1406 }
1407 tl_assert(2 * sf->htab_used <= sf->htab_size);
1408
1409 ix = compute_II_hash(ip, sf->htab_size);
1410 i = sf->htab_size;
1411 while (1) {
1412 stats__htab_probes++;
1413 /* Note that because of the way the fast-case handler works,
1414 these two tests are actually redundant in the first iteration
1415 of this loop. (Except they aren't redundant if the code just
1416 above resized the table first. :-) */
1417 if (sf->htab[ix].insn_addr == ip)
1418 return &sf->htab[ix];
1419 if (sf->htab[ix].insn_addr == 0)
1420 break;
1421 /* If i ever gets to zero and we have found neither what we're
1422 looking for nor an empty slot, the table must be full. Which
1423 isn't possible -- we monitor the load factor to ensure it
1424 doesn't get above say 50%; if that ever does happen the table
1425 is resized. */
1426 tl_assert(i > 0);
1427 i--;
1428 ix++;
1429 if (ix == sf->htab_size) ix = 0;
1430 }
1431
1432 /* So now we've found a free slot at ix, and we can use that. */
1433 tl_assert(sf->htab[ix].insn_addr == 0);
1434
1435 /* Add a new record in this slot. */
1436 tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1437 sf->htab[ix].insn_addr = ip;
1438 sf->htab[ix].blocks = ip_frameblocks;
1439 sf->htab[ix].invar.tag = Inv_Unset;
1440 sf->htab_used++;
1441 return &sf->htab[ix];
1442 }
1443
1444
1445 inline
find_or_create_IInstance(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1446 static IInstance* find_or_create_IInstance (
1447 StackFrame* sf,
1448 Addr ip,
1449 XArray* /* StackBlock */ ip_frameblocks
1450 )
1451 {
1452 UWord ix = compute_II_hash(ip, sf->htab_size);
1453 /* Is it in the first slot we come to? */
1454 if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1455 stats__htab_fast++;
1456 return &sf->htab[ix];
1457 }
1458 /* If the first slot we come to is empty, bag it. */
1459 if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1460 stats__htab_fast++;
1461 tl_assert(ip != 0);
1462 sf->htab[ix].insn_addr = ip;
1463 sf->htab[ix].blocks = ip_frameblocks;
1464 sf->htab[ix].invar.tag = Inv_Unset;
1465 sf->htab_used++;
1466 return &sf->htab[ix];
1467 }
1468 /* Otherwise we hand off to the slow case, which searches other
1469 slots, and optionally resizes the table if necessary. */
1470 return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1471 }
1472
1473
1474 __attribute__((noinline))
calculate_StackBlock_EA(StackBlock * descr,Addr sp,Addr fp)1475 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1476 Addr sp, Addr fp ) {
1477 UWord w1 = (UWord)descr->base;
1478 UWord w2 = (UWord)(descr->spRel ? sp : fp);
1479 UWord ea = w1 + w2;
1480 return ea;
1481 }
1482
1483 /* Given an array of StackBlocks, return an array of Addrs, holding
1484 their effective addresses. Caller deallocates result array. */
1485 __attribute__((noinline))
calculate_StackBlock_EAs(XArray * blocks,Addr sp,Addr fp)1486 static XArray* /* Addr */ calculate_StackBlock_EAs (
1487 XArray* /* StackBlock */ blocks,
1488 Addr sp, Addr fp
1489 )
1490 {
1491 XArray* res;
1492 Word i, n = VG_(sizeXA)( blocks );
1493 tl_assert(n > 0);
1494 res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1495 for (i = 0; i < n; i++) {
1496 StackBlock* blk = VG_(indexXA)( blocks, i );
1497 Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1498 VG_(addToXA)( res, &ea );
1499 }
1500 return res;
1501 }
1502
1503
1504 /* Try to classify the block into which a memory access falls, and
1505 write the result in 'inv'. This writes all relevant fields of
1506 'inv'. */
1507 __attribute__((noinline))
classify_address(Invar * inv,ThreadId tid,Addr ea,Addr sp,Addr fp,UWord szB,XArray * thisInstrBlocks)1508 static void classify_address ( /*OUT*/Invar* inv,
1509 ThreadId tid,
1510 Addr ea, Addr sp, Addr fp,
1511 UWord szB,
1512 XArray* /* of StackBlock */ thisInstrBlocks )
1513 {
1514 tl_assert(szB > 0);
1515 /* First, look in the stack blocks accessible in this instruction's
1516 frame. */
1517 {
1518 Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1519 if (nBlocks == 0) stats__t_i_b_empty++;
1520 for (i = 0; i < nBlocks; i++) {
1521 StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1522 Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1523 if (bea <= ea && ea + szB <= bea + descr->szB) {
1524 /* found it */
1525 inv->tag = Inv_Stack0;
1526 inv->Inv.Stack0.addr = bea;
1527 inv->Inv.Stack0.szB = descr->szB;
1528 inv->Inv.Stack0.descr = descr;
1529 stats__classify_Stack0++;
1530 return;
1531 }
1532 }
1533 }
1534 /* Look in this thread's query cache */
1535 { Word i;
1536 QCache* cache = &qcaches[tid];
1537 static UWord ctr = 0;
1538 stats__qcache_queries++;
1539 for (i = 0; i < cache->nInUse; i++) {
1540 if (0) /* expensive in a loop like this */
1541 tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1542 stats__qcache_probes++;
1543 if (is_subinterval_of(cache->elems[i].addr,
1544 cache->elems[i].szB, ea, szB)) {
1545 if (i > 0
1546 && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1547 QCElem tmp;
1548 tmp = cache->elems[i-1];
1549 cache->elems[i-1] = cache->elems[i];
1550 cache->elems[i] = tmp;
1551 i--;
1552 }
1553 *inv = cache->elems[i].inv;
1554 return;
1555 }
1556 }
1557 stats__qcache_misses++;
1558 }
1559 /* Ok, so it's not a block in the top frame. Perhaps it's a block
1560 in some calling frame? Consult this thread's stack-block
1561 interval tree to find out. */
1562 { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1563 /* We know that [ea,ea+1) is in the block, but we need to
1564 restrict to the case where the whole access falls within
1565 it. */
1566 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1567 nd = NULL;
1568 }
1569 if (nd) {
1570 /* found it */
1571 inv->tag = Inv_StackN;
1572 inv->Inv.StackN.nd = nd;
1573 stats__classify_StackN++;
1574 goto out;
1575 }
1576 }
1577 /* Not in a stack block. Try the global pool. */
1578 { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1579 /* We know that [ea,ea+1) is in the block, but we need to
1580 restrict to the case where the whole access falls within
1581 it. */
1582 if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1583 nd = NULL;
1584 }
1585 if (nd) {
1586 /* found it */
1587 inv->tag = Inv_Global;
1588 inv->Inv.Global.nd = nd;
1589 stats__classify_Global++;
1590 goto out;
1591 }
1592 }
1593 /* No idea - give up. */
1594 inv->tag = Inv_Unknown;
1595 stats__classify_Unknown++;
1596
1597 /* Update the cache */
1598 out:
1599 { Addr toadd_addr = 0;
1600 SizeT toadd_szB = 0;
1601 QCache* cache = &qcaches[tid];
1602
1603 static UWord ctr = 0;
1604 Bool show = False;
1605 if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1606
1607 if (show) QCache__pp(cache, "before upd");
1608
1609 switch (inv->tag) {
1610 case Inv_Global:
1611 toadd_addr = inv->Inv.Global.nd->addr;
1612 toadd_szB = inv->Inv.Global.nd->szB;
1613 break;
1614 case Inv_StackN:
1615 toadd_addr = inv->Inv.StackN.nd->addr;
1616 toadd_szB = inv->Inv.StackN.nd->szB;
1617 break;
1618 case Inv_Unknown: {
1619 /* This is more complex. We need to figure out the
1620 intersection of the "holes" in the global and stack
1621 interval trees into which [ea,ea+szB) falls. This is
1622 further complicated by the fact that [ea,ea+szB) might
1623 not fall cleanly into a hole; it may instead fall across
1624 the boundary of a stack or global block. In that case
1625 we just ignore it and don't update the cache, since we
1626 have no way to represent this situation precisely. */
1627 StackTreeNode sNegInf, sPosInf, sKey, *sLB, *sUB;
1628 GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1629 Addr gMin, gMax, sMin, sMax, uMin, uMax;
1630 Bool sOK, gOK;
1631 sNegInf.addr = 0;
1632 sNegInf.szB = 1;
1633 sPosInf.addr = ~(UWord)0;
1634 sPosInf.szB = 1;
1635 gNegInf.addr = 0;
1636 gNegInf.szB = 1;
1637 gPosInf.addr = ~(UWord)0;
1638 gPosInf.szB = 1;
1639 sKey.addr = ea;
1640 sKey.szB = szB;
1641 gKey.addr = ea;
1642 gKey.szB = szB;
1643 if (0) VG_(printf)("Tree sizes %ld %ld\n",
1644 VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1645 sOK = VG_(findBoundsFM)( siTrees[tid],
1646 (UWord*)&sLB, NULL/*unused*/,
1647 (UWord*)&sUB, NULL/*unused*/,
1648 (UWord)&sNegInf, 0/*unused*/,
1649 (UWord)&sPosInf, 0/*unused*/,
1650 (UWord)&sKey );
1651 gOK = VG_(findBoundsFM)( giTree,
1652 (UWord*)&gLB, NULL/*unused*/,
1653 (UWord*)&gUB, NULL/*unused*/,
1654 (UWord)&gNegInf, 0/*unused*/,
1655 (UWord)&gPosInf, 0/*unused*/,
1656 (UWord)&gKey );
1657 if (!(sOK && gOK)) {
1658 /* If this happens, then [ea,ea+szB) partially overlaps
1659 a heap or stack block. We can't represent that, so
1660 just forget it (should be very rare). However, do
1661 maximum sanity checks first. In such a
1662 partial overlap case, it can't be the case that both
1663 [ea] and [ea+szB-1] overlap the same block, since if
1664 that were indeed the case then it wouldn't be a
1665 partial overlap; rather it would simply fall inside
1666 that block entirely and we shouldn't be inside this
1667 conditional at all. */
1668 if (!sOK) {
1669 StackTreeNode *ndFirst, *ndLast;
1670 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1671 ndLast = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1672 /* if both ends of the range fall inside a block,
1673 they can't be in the same block. */
1674 if (ndFirst && ndLast)
1675 tl_assert(ndFirst != ndLast);
1676 /* for each end of the range, if it is in a block,
1677 the range as a whole can't be entirely within the
1678 block. */
1679 if (ndFirst)
1680 tl_assert(!is_subinterval_of(ndFirst->addr,
1681 ndFirst->szB, ea, szB));
1682 if (ndLast)
1683 tl_assert(!is_subinterval_of(ndLast->addr,
1684 ndLast->szB, ea, szB));
1685 }
1686 if (!gOK) {
1687 GlobalTreeNode *ndFirst, *ndLast;
1688 ndFirst = find_GlobalTreeNode( giTree, ea );
1689 ndLast = find_GlobalTreeNode( giTree, ea+szB-1 );
1690 /* if both ends of the range fall inside a block,
1691 they can't be in the same block. */
1692 if (ndFirst && ndLast)
1693 tl_assert(ndFirst != ndLast);
1694 /* for each end of the range, if it is in a block,
1695 the range as a whole can't be entirely within the
1696 block. */
1697 if (ndFirst)
1698 tl_assert(!is_subinterval_of(ndFirst->addr,
1699 ndFirst->szB, ea, szB));
1700 if (ndLast)
1701 tl_assert(!is_subinterval_of(ndLast->addr,
1702 ndLast->szB, ea, szB));
1703 }
1704 if (0) VG_(printf)("overlapping blocks in cache\n");
1705 return;
1706 }
1707 sMin = sLB == &sNegInf ? 0 : (sLB->addr + sLB->szB);
1708 sMax = sUB == &sPosInf ? ~(UWord)0 : (sUB->addr - 1);
1709 gMin = gLB == &gNegInf ? 0 : (gLB->addr + gLB->szB);
1710 gMax = gUB == &gPosInf ? ~(UWord)0 : (gUB->addr - 1);
1711 if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1712 sMin, sMax, gMin, gMax);
1713 /* [sMin,sMax] and [gMin,gMax] must both contain
1714 [ea,ea+szB) (right?) That implies they must overlap at
1715 at least over [ea,ea+szB). */
1716 tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1717 tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1718 /* So now compute their intersection. */
1719 uMin = Addr__max( sMin, gMin );
1720 uMax = Addr__min( sMax, gMax );
1721 if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1722 tl_assert(uMin <= uMax);
1723 tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1724 /* Finally, we can park [uMin,uMax] in the cache. However,
1725 if uMax is ~0, we can't represent the difference; hence
1726 fudge uMax. */
1727 if (uMin < uMax && uMax == ~(UWord)0)
1728 uMax--;
1729 toadd_addr = uMin;
1730 toadd_szB = uMax - uMin + 1;
1731 break;
1732 }
1733 default:
1734 /* We should only be caching info for the above 3 cases */
1735 tl_assert(0);
1736 } /* switch (inv->tag) */
1737
1738 { /* and actually add this to the cache, finally */
1739 Word i;
1740 Word ip = cache->nInUse / 2; /* doesn't seem critical */
1741
1742 if (cache->nInUse < N_QCACHE)
1743 cache->nInUse++;
1744 for (i = cache->nInUse-1; i > ip; i--) {
1745 cache->elems[i] = cache->elems[i-1];
1746 }
1747
1748 tl_assert(toadd_szB > 0);
1749 cache->elems[ip].addr = toadd_addr;
1750 cache->elems[ip].szB = toadd_szB;
1751 cache->elems[ip].inv = *inv;
1752 }
1753
1754 if (show) QCache__pp(cache, "after upd");
1755
1756 }
1757 }
1758
1759
1760 /* CALLED FROM GENERATED CODE */
1761 static
1762 VG_REGPARM(3)
helperc__mem_access(Addr ea,Addr sp,Addr fp,Word sszB,Addr ip,XArray * ip_frameBlocks)1763 void helperc__mem_access ( /* Known only at run time: */
1764 Addr ea, Addr sp, Addr fp,
1765 /* Known at translation time: */
1766 Word sszB, Addr ip, XArray* ip_frameBlocks )
1767 {
1768 UWord szB;
1769 IInstance* iinstance;
1770 Invar* inv;
1771 Invar new_inv;
1772 ThreadId tid = VG_(get_running_tid)();
1773 StackFrame* frame;
1774 HChar bufE[160], bufA[160], bufD[32];
1775
1776 stats__total_accesses++;
1777
1778 tl_assert(is_sane_TId(tid));
1779 frame = shadowStacks[tid];
1780 tl_assert(frame);
1781
1782 /* Find the instance info for this instruction. */
1783 tl_assert(ip_frameBlocks);
1784 iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1785 tl_assert(iinstance);
1786 tl_assert(iinstance->blocks == ip_frameBlocks);
1787
1788 szB = (sszB < 0) ? (-sszB) : sszB;
1789 tl_assert(szB > 0);
1790
1791 inv = &iinstance->invar;
1792
1793 /* Deal with first uses of instruction instances. */
1794 if (inv->tag == Inv_Unset) {
1795 /* This is the first use of this instance of the instruction, so
1796 we can't make any check; we merely record what we saw, so we
1797 can compare it against what happens for 2nd and subsequent
1798 accesses. */
1799 classify_address( inv,
1800 tid, ea, sp, fp, szB, iinstance->blocks );
1801 tl_assert(inv->tag != Inv_Unset);
1802 return;
1803 }
1804
1805 /* So generate an Invar and see if it's different from what
1806 we had before. */
1807 classify_address( &new_inv,
1808 tid, ea, sp, fp, szB, iinstance->blocks );
1809 tl_assert(new_inv.tag != Inv_Unset);
1810
1811 /* Did we see something different from before? If no, then there's
1812 no error. */
1813 if (LIKELY(eq_Invar(&new_inv, inv)))
1814 return;
1815
1816 tl_assert(inv->tag != Inv_Unset);
1817
1818 VG_(memset)(bufE, 0, sizeof(bufE));
1819 show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1820
1821 VG_(memset)(bufA, 0, sizeof(bufA));
1822 show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1823
1824 VG_(memset)(bufD, 0, sizeof(bufD));
1825 UWord absDelta;
1826 gen_delta_str( bufD, &absDelta, inv, ea );
1827
1828 if (absDelta < 1024)
1829 sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1830
1831 /* And now install the new observation as "standard", so as to
1832 make future error messages make more sense. */
1833 *inv = new_inv;
1834 }
1835
1836
1837 ////////////////////////////////////////
1838 /* Primary push-a-new-frame routine. Called indirectly from
1839 generated code. */
1840
1841 static UWord stats__max_sitree_size = 0;
1842 static UWord stats__max_gitree_size = 0;
1843
1844 static
shadowStack_new_frame(ThreadId tid,Addr sp_at_call_insn,Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * descrs_at_call_insn)1845 void shadowStack_new_frame ( ThreadId tid,
1846 Addr sp_at_call_insn,
1847 Addr sp_post_call_insn,
1848 Addr fp_at_call_insn,
1849 Addr ip_post_call_insn,
1850 XArray* descrs_at_call_insn )
1851 {
1852 StackFrame *callee, *caller;
1853 tl_assert(is_sane_TId(tid));
1854
1855 caller = shadowStacks[tid];
1856 tl_assert(caller);
1857
1858 if (caller->outer) { /* "this is not the outermost frame" */
1859 tl_assert(caller->outer->inner == caller);
1860 tl_assert(caller->outer->depth >= 0);
1861 tl_assert(1 + caller->outer->depth == caller->depth);
1862 } else {
1863 tl_assert(caller->depth == 0);
1864 }
1865
1866 caller->sp_at_call = sp_at_call_insn;
1867 caller->fp_at_call = fp_at_call_insn;
1868
1869 if (descrs_at_call_insn) {
1870 tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1871 caller->blocks_added_by_call
1872 = calculate_StackBlock_EAs( descrs_at_call_insn,
1873 sp_at_call_insn, fp_at_call_insn );
1874 if (caller->blocks_added_by_call)
1875 add_blocks_to_StackTree( siTrees[tid],
1876 descrs_at_call_insn,
1877 caller->blocks_added_by_call,
1878 caller->depth /* stack depth at which
1879 these blocks are
1880 considered to exist*/ );
1881 if (1) {
1882 UWord s = VG_(sizeFM)( siTrees[tid] );
1883 UWord g = VG_(sizeFM)( giTree );
1884 Bool sb = s > stats__max_sitree_size;
1885 Bool gb = g > stats__max_gitree_size;
1886 if (sb) stats__max_sitree_size = s;
1887 if (gb) stats__max_gitree_size = g;
1888 if (0 && (sb || gb))
1889 VG_(message)(Vg_DebugMsg,
1890 "exp-sgcheck: new max tree sizes: "
1891 "StackTree %ld, GlobalTree %ld\n",
1892 stats__max_sitree_size, stats__max_gitree_size );
1893 }
1894 } else {
1895 caller->blocks_added_by_call = NULL;
1896 }
1897
1898 /* caller->blocks_added_by_call is used again (and then freed) when
1899 this frame is removed from the stack. */
1900
1901 if (caller->inner) {
1902 callee = caller->inner;
1903 } else {
1904 callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1905 VG_(memset)(callee, 0, sizeof(StackFrame));
1906 callee->outer = caller;
1907 caller->inner = callee;
1908 callee->depth = 1 + caller->depth;
1909 tl_assert(callee->inner == NULL);
1910 }
1911
1912 /* This sets up .htab, .htab_size and .htab_used */
1913 initialise_II_hash_table( callee );
1914
1915 callee->creation_sp = sp_post_call_insn;
1916 callee->sp_at_call = 0; // not actually required ..
1917 callee->fp_at_call = 0; // .. these 3 initialisations are ..
1918 callee->blocks_added_by_call = NULL; // .. just for cleanness
1919
1920 /* record the new running stack frame */
1921 shadowStacks[tid] = callee;
1922
1923 /* and this thread's query cache is now invalid */
1924 QCache__invalidate( &qcaches[tid] );
1925
1926 if (0)
1927 { Word d = callee->depth;
1928 HChar fnname[80];
1929 Bool ok;
1930 Addr ip = ip_post_call_insn;
1931 ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1932 while (d > 0) {
1933 VG_(printf)(" ");
1934 d--;
1935 }
1936 VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1937 }
1938 }
1939
1940 /* CALLED FROM GENERATED CODE */
1941 static
1942 VG_REGPARM(3)
helperc__new_frame(Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * blocks_at_call_insn,Word sp_adjust)1943 void helperc__new_frame ( Addr sp_post_call_insn,
1944 Addr fp_at_call_insn,
1945 Addr ip_post_call_insn,
1946 XArray* blocks_at_call_insn,
1947 Word sp_adjust )
1948 {
1949 ThreadId tid = VG_(get_running_tid)();
1950 Addr sp_at_call_insn = sp_post_call_insn + sp_adjust;
1951 shadowStack_new_frame( tid,
1952 sp_at_call_insn,
1953 sp_post_call_insn,
1954 fp_at_call_insn,
1955 ip_post_call_insn,
1956 blocks_at_call_insn );
1957 }
1958
1959
1960 ////////////////////////////////////////
1961 /* Primary remove-frame(s) routine. Called indirectly from
1962 generated code. */
1963
1964 __attribute__((noinline))
shadowStack_unwind(ThreadId tid,Addr sp_now)1965 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1966 {
1967 StackFrame *innermost, *innermostOrig;
1968 tl_assert(is_sane_TId(tid));
1969 innermost = shadowStacks[tid];
1970 tl_assert(innermost);
1971 innermostOrig = innermost;
1972 //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1973 while (1) {
1974 if (!innermost->outer)
1975 break;
1976 if (innermost->inner)
1977 tl_assert(innermost->inner->outer == innermost);
1978 tl_assert(innermost->outer->inner == innermost);
1979 tl_assert(innermost->blocks_added_by_call == NULL);
1980 if (sp_now <= innermost->creation_sp) break;
1981 //VG_(printf)("UNWIND dump %p\n", innermost->creation_sp);
1982 tl_assert(innermost->htab);
1983 if (innermost->htab != &innermost->htab_fixed[0])
1984 sg_free(innermost->htab);
1985 /* be on the safe side */
1986 innermost->creation_sp = 0;
1987 innermost->htab = NULL;
1988 innermost->htab_size = 0;
1989 innermost->htab_used = 0;
1990 innermost->sp_at_call = 0;
1991 innermost->fp_at_call = 0;
1992 innermost->blocks_added_by_call = NULL;
1993 innermost = innermost->outer;
1994
1995 /* So now we're "back" in the calling frame. Remove from this
1996 thread's stack-interval-tree, the blocks added at the time of
1997 the call. */
1998
1999 if (innermost->outer) { /* not at the outermost frame */
2000 if (innermost->blocks_added_by_call == NULL) {
2001 } else {
2002 del_blocks_from_StackTree( siTrees[tid],
2003 innermost->blocks_added_by_call );
2004 VG_(deleteXA)( innermost->blocks_added_by_call );
2005 innermost->blocks_added_by_call = NULL;
2006 }
2007 }
2008 /* That completes the required tidying of the interval tree
2009 associated with the frame we just removed. */
2010
2011 if (0) {
2012 Word d = innermost->depth;
2013 while (d > 0) {
2014 VG_(printf)(" ");
2015 d--;
2016 }
2017 VG_(printf)("X\n");
2018 }
2019
2020 }
2021
2022 tl_assert(innermost);
2023
2024 if (innermost != innermostOrig) {
2025 shadowStacks[tid] = innermost;
2026 /* this thread's query cache is now invalid */
2027 QCache__invalidate( &qcaches[tid] );
2028 }
2029 }
2030
2031
2032
2033 //////////////////////////////////////////////////////////////
2034 // //
2035 // Instrumentation //
2036 // //
2037 //////////////////////////////////////////////////////////////
2038
2039 /* What does instrumentation need to do?
2040
2041 - at each Call transfer, generate a call to shadowStack_new_frame
2042 do this by manually inspecting the IR
2043
2044 - at each sp change, if the sp change is negative,
2045 call shadowStack_unwind
2046 do this by asking for SP-change analysis
2047
2048 - for each memory referencing instruction,
2049 call helperc__mem_access
2050 */
2051
2052 /* A complication: sg_ instrumentation and h_ instrumentation need to
2053 be interleaved. Since the latter is a lot more complex than the
2054 former, we split the sg_ instrumentation here into four functions
2055 and let the h_ instrumenter call the four functions as it goes.
2056 Hence the h_ instrumenter drives the sg_ instrumenter.
2057
2058 To make this viable, the sg_ instrumenter carries what running
2059 state it needs in 'struct _SGEnv'. This is exported only
2060 abstractly from this file.
2061 */
2062
2063 struct _SGEnv {
2064 /* the current insn's IP */
2065 Addr64 curr_IP;
2066 /* whether the above is actually known */
2067 Bool curr_IP_known;
2068 /* if we find a mem ref, is it the first for this insn? Used for
2069 detecting insns which make more than one memory ref, a situation
2070 we basically can't really handle properly; and so we ignore all
2071 but the first ref. */
2072 Bool firstRef;
2073 /* READONLY */
2074 IRTemp (*newIRTemp_cb)(IRType,void*);
2075 void* newIRTemp_opaque;
2076 };
2077
2078
2079 /* --- Helper fns for instrumentation --- */
2080
gen_Get_SP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2081 static IRTemp gen_Get_SP ( struct _SGEnv* sge,
2082 IRSB* bbOut,
2083 VexGuestLayout* layout,
2084 Int hWordTy_szB )
2085 {
2086 IRExpr* sp_expr;
2087 IRTemp sp_temp;
2088 IRType sp_type;
2089 /* This in effect forces the host and guest word sizes to be the
2090 same. */
2091 tl_assert(hWordTy_szB == layout->sizeof_SP);
2092 sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2093 sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2094 sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2095 addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2096 return sp_temp;
2097 }
2098
gen_Get_FP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2099 static IRTemp gen_Get_FP ( struct _SGEnv* sge,
2100 IRSB* bbOut,
2101 VexGuestLayout* layout,
2102 Int hWordTy_szB )
2103 {
2104 IRExpr* fp_expr;
2105 IRTemp fp_temp;
2106 IRType fp_type;
2107 /* This in effect forces the host and guest word sizes to be the
2108 same. */
2109 tl_assert(hWordTy_szB == layout->sizeof_SP);
2110 fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2111 fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2112 fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2113 addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2114 return fp_temp;
2115 }
2116
instrument_mem_access(struct _SGEnv * sge,IRSB * bbOut,IRExpr * addr,Int szB,Bool isStore,Int hWordTy_szB,Addr curr_IP,VexGuestLayout * layout)2117 static void instrument_mem_access ( struct _SGEnv* sge,
2118 IRSB* bbOut,
2119 IRExpr* addr,
2120 Int szB,
2121 Bool isStore,
2122 Int hWordTy_szB,
2123 Addr curr_IP,
2124 VexGuestLayout* layout )
2125 {
2126 IRType tyAddr = Ity_INVALID;
2127 XArray* frameBlocks = NULL;
2128
2129 tl_assert(isIRAtom(addr));
2130 tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2131
2132 tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2133 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2134
2135 #if defined(VGA_x86)
2136 { UChar* p = (UChar*)curr_IP;
2137 // pop %ebp; RET
2138 if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2139 // pop %ebp; RET $imm16
2140 if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2141 // PUSH %EBP; mov %esp,%ebp
2142 if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2143 }
2144 #endif
2145
2146 /* First off, find or create the StackBlocks for this instruction. */
2147 frameBlocks = get_StackBlocks_for_IP( curr_IP );
2148 tl_assert(frameBlocks);
2149 //if (VG_(sizeXA)( frameBlocks ) == 0)
2150 // frameBlocks = NULL;
2151
2152 /* Generate a call to "helperc__mem_access", passing:
2153 addr current_SP current_FP szB curr_IP frameBlocks
2154 */
2155 { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2156 IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2157 IRExpr** args
2158 = mkIRExprVec_6( addr,
2159 IRExpr_RdTmp(t_SP),
2160 IRExpr_RdTmp(t_FP),
2161 mkIRExpr_HWord( isStore ? (-szB) : szB ),
2162 mkIRExpr_HWord( curr_IP ),
2163 mkIRExpr_HWord( (HWord)frameBlocks ) );
2164 IRDirty* di
2165 = unsafeIRDirty_0_N( 3/*regparms*/,
2166 "helperc__mem_access",
2167 VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2168 args );
2169
2170 addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2171 }
2172 }
2173
2174
2175 /* --- Instrumentation main (4 fns) --- */
2176
sg_instrument_init(IRTemp (* newIRTemp_cb)(IRType,void *),void * newIRTemp_opaque)2177 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2178 void* newIRTemp_opaque )
2179 {
2180 struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2181 sizeof(struct _SGEnv));
2182 tl_assert(env);
2183 env->curr_IP = 0;
2184 env->curr_IP_known = False;
2185 env->firstRef = True;
2186 env->newIRTemp_cb = newIRTemp_cb;
2187 env->newIRTemp_opaque = newIRTemp_opaque;
2188 return env;
2189 }
2190
sg_instrument_fini(struct _SGEnv * env)2191 void sg_instrument_fini ( struct _SGEnv * env )
2192 {
2193 sg_free(env);
2194 }
2195
2196 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2197 as required. This must be called before 'st' itself is added to
2198 'sbOut'. */
sg_instrument_IRStmt(struct _SGEnv * env,IRSB * sbOut,IRStmt * st,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2199 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2200 /*MOD*/IRSB* sbOut,
2201 IRStmt* st,
2202 VexGuestLayout* layout,
2203 IRType gWordTy, IRType hWordTy )
2204 {
2205 if (!sg_clo_enable_sg_checks)
2206 return;
2207
2208 tl_assert(st);
2209 tl_assert(isFlatIRStmt(st));
2210 switch (st->tag) {
2211 case Ist_NoOp:
2212 case Ist_AbiHint:
2213 case Ist_Put:
2214 case Ist_PutI:
2215 case Ist_MBE:
2216 /* None of these can contain any memory references. */
2217 break;
2218
2219 case Ist_Exit:
2220 tl_assert(st->Ist.Exit.jk != Ijk_Call);
2221 /* else we must deal with a conditional call */
2222 break;
2223
2224 case Ist_IMark:
2225 env->curr_IP_known = True;
2226 env->curr_IP = (Addr)st->Ist.IMark.addr;
2227 env->firstRef = True;
2228 break;
2229
2230 case Ist_Store:
2231 tl_assert(env->curr_IP_known);
2232 if (env->firstRef) {
2233 instrument_mem_access(
2234 env, sbOut,
2235 st->Ist.Store.addr,
2236 sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2237 True/*isStore*/,
2238 sizeofIRType(hWordTy),
2239 env->curr_IP, layout
2240 );
2241 env->firstRef = False;
2242 }
2243 break;
2244
2245 case Ist_WrTmp: {
2246 IRExpr* data = st->Ist.WrTmp.data;
2247 if (data->tag == Iex_Load) {
2248 tl_assert(env->curr_IP_known);
2249 if (env->firstRef) {
2250 instrument_mem_access(
2251 env, sbOut,
2252 data->Iex.Load.addr,
2253 sizeofIRType(data->Iex.Load.ty),
2254 False/*!isStore*/,
2255 sizeofIRType(hWordTy),
2256 env->curr_IP, layout
2257 );
2258 env->firstRef = False;
2259 }
2260 }
2261 break;
2262 }
2263
2264 case Ist_Dirty: {
2265 Int dataSize;
2266 IRDirty* d = st->Ist.Dirty.details;
2267 if (d->mFx != Ifx_None) {
2268 /* This dirty helper accesses memory. Collect the
2269 details. */
2270 tl_assert(env->curr_IP_known);
2271 if (env->firstRef) {
2272 tl_assert(d->mAddr != NULL);
2273 tl_assert(d->mSize != 0);
2274 dataSize = d->mSize;
2275 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2276 instrument_mem_access(
2277 env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2278 sizeofIRType(hWordTy), env->curr_IP, layout
2279 );
2280 }
2281 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2282 instrument_mem_access(
2283 env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2284 sizeofIRType(hWordTy), env->curr_IP, layout
2285 );
2286 }
2287 env->firstRef = False;
2288 }
2289 } else {
2290 tl_assert(d->mAddr == NULL);
2291 tl_assert(d->mSize == 0);
2292 }
2293 break;
2294 }
2295
2296 case Ist_CAS: {
2297 /* We treat it as a read and a write of the location. I
2298 think that is the same behaviour as it was before IRCAS
2299 was introduced, since prior to that point, the Vex front
2300 ends would translate a lock-prefixed instruction into a
2301 (normal) read followed by a (normal) write. */
2302 if (env->firstRef) {
2303 Int dataSize;
2304 IRCAS* cas = st->Ist.CAS.details;
2305 tl_assert(cas->addr != NULL);
2306 tl_assert(cas->dataLo != NULL);
2307 dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2308 if (cas->dataHi != NULL)
2309 dataSize *= 2; /* since it's a doubleword-CAS */
2310 instrument_mem_access(
2311 env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2312 sizeofIRType(hWordTy), env->curr_IP, layout
2313 );
2314 instrument_mem_access(
2315 env, sbOut, cas->addr, dataSize, True/*isStore*/,
2316 sizeofIRType(hWordTy), env->curr_IP, layout
2317 );
2318 env->firstRef = False;
2319 }
2320 break;
2321 }
2322
2323 default:
2324 tl_assert(0);
2325
2326 } /* switch (st->tag) */
2327 }
2328
2329
2330 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2331 possibly modify 'env' as required. This must be the last
2332 instrumentation statement in the block. */
sg_instrument_final_jump(struct _SGEnv * env,IRSB * sbOut,IRExpr * next,IRJumpKind jumpkind,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2333 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2334 /*MOD*/IRSB* sbOut,
2335 IRExpr* next,
2336 IRJumpKind jumpkind,
2337 VexGuestLayout* layout,
2338 IRType gWordTy, IRType hWordTy )
2339 {
2340 if (!sg_clo_enable_sg_checks)
2341 return;
2342
2343 if (jumpkind == Ijk_Call) {
2344 // Assumes x86 or amd64
2345 IRTemp sp_post_call_insn, fp_post_call_insn;
2346 XArray* frameBlocks;
2347 IRExpr** args;
2348 IRDirty* di;
2349 sp_post_call_insn
2350 = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2351 fp_post_call_insn
2352 = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2353 tl_assert(env->curr_IP_known);
2354 frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2355 tl_assert(frameBlocks);
2356 if (VG_(sizeXA)(frameBlocks) == 0)
2357 frameBlocks = NULL;
2358 args
2359 = mkIRExprVec_5(
2360 IRExpr_RdTmp(sp_post_call_insn),
2361 IRExpr_RdTmp(fp_post_call_insn),
2362 /* assume the call doesn't change FP */
2363 next,
2364 mkIRExpr_HWord( (HWord)frameBlocks ),
2365 mkIRExpr_HWord( sizeofIRType(gWordTy) )
2366 );
2367 di = unsafeIRDirty_0_N(
2368 3/*regparms*/,
2369 "helperc__new_frame",
2370 VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2371 args );
2372 addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2373 }
2374 }
2375
2376
2377 //////////////////////////////////////////////////////////////
2378 // //
2379 // end Instrumentation //
2380 // //
2381 //////////////////////////////////////////////////////////////
2382
2383
2384 //////////////////////////////////////////////////////////////
2385 // //
2386 // misc //
2387 // //
2388 //////////////////////////////////////////////////////////////
2389
2390 /* Make a new empty stack frame that is suitable for being the
2391 outermost frame in a stack. It has a creation_sp of effectively
2392 infinity, so it can never be removed. */
new_root_StackFrame(void)2393 static StackFrame* new_root_StackFrame ( void )
2394 {
2395 StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2396 VG_(memset)( sframe, 0, sizeof(*sframe) );
2397 sframe->creation_sp = ~0UL;
2398
2399 /* This sets up .htab, .htab_size and .htab_used */
2400 initialise_II_hash_table( sframe );
2401
2402 /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2403
2404 return sframe;
2405 }
2406
2407 /* Primary routine for setting up the shadow stack for a new thread.
2408 Note that this is used to create not only child thread stacks, but
2409 the root thread's stack too. We create a new stack with
2410 .creation_sp set to infinity, so that the outermost frame can never
2411 be removed (by shadowStack_unwind). The core calls this function
2412 as soon as a thread is created. We cannot yet get its SP value,
2413 since that may not yet be set. */
shadowStack_thread_create(ThreadId parent,ThreadId child)2414 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2415 {
2416 tl_assert(is_sane_TId(child));
2417 if (parent == VG_INVALID_THREADID) {
2418 /* creating the main thread's stack */
2419 } else {
2420 tl_assert(is_sane_TId(parent));
2421 tl_assert(parent != child);
2422 tl_assert(shadowStacks[parent] != NULL);
2423 tl_assert(siTrees[parent] != NULL);
2424 }
2425
2426 /* Create the child's stack. Bear in mind we may be re-using
2427 it. */
2428 if (shadowStacks[child] == NULL) {
2429 /* First use of this stack. Just allocate an initial frame. */
2430 tl_assert(siTrees[child] == NULL);
2431 } else {
2432 StackFrame *frame, *frame2;
2433 /* re-using a stack. */
2434 /* get rid of the interval tree */
2435 tl_assert(siTrees[child] != NULL);
2436 delete_StackTree( siTrees[child] );
2437 siTrees[child] = NULL;
2438 /* Throw away all existing frames. */
2439 frame = shadowStacks[child];
2440 while (frame->outer)
2441 frame = frame->outer;
2442 tl_assert(frame->depth == 0);
2443 while (frame) {
2444 frame2 = frame->inner;
2445 if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2446 sg_free(frame);
2447 frame = frame2;
2448 }
2449 shadowStacks[child] = NULL;
2450 }
2451
2452 tl_assert(shadowStacks[child] == NULL);
2453 tl_assert(siTrees[child] == NULL);
2454
2455 /* Set up the initial stack frame. */
2456 shadowStacks[child] = new_root_StackFrame();
2457
2458 /* and set up the child's stack block interval tree. */
2459 siTrees[child] = new_StackTree();
2460 }
2461
2462 /* Once a thread is ready to go, the core calls here. We take the
2463 opportunity to push a second frame on its stack, with the
2464 presumably valid SP value that is going to be used for the thread's
2465 startup. Hence we should always wind up with a valid outermost
2466 frame for the thread. */
shadowStack_set_initial_SP(ThreadId tid)2467 static void shadowStack_set_initial_SP ( ThreadId tid )
2468 {
2469 StackFrame* sf;
2470 tl_assert(is_sane_TId(tid));
2471 sf = shadowStacks[tid];
2472 tl_assert(sf != NULL);
2473 tl_assert(sf->outer == NULL);
2474 tl_assert(sf->inner == NULL);
2475 tl_assert(sf->creation_sp == ~0UL);
2476 shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2477 0, VG_(get_IP)(tid), NULL );
2478 }
2479
2480
2481 //////////////////////////////////////////////////////////////
2482 // //
2483 // main-ish //
2484 // //
2485 //////////////////////////////////////////////////////////////
2486
2487 /* CALLED indirectly FROM GENERATED CODE. Calls here are created by
2488 sp-change analysis, as requested in pc_pre_clo_int(). */
sg_die_mem_stack(Addr old_SP,SizeT len)2489 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2490 ThreadId tid = VG_(get_running_tid)();
2491 shadowStack_unwind( tid, old_SP+len );
2492 }
2493
sg_pre_clo_init(void)2494 void sg_pre_clo_init ( void ) {
2495 ourGlobals_init();
2496 init_StackBlocks_set();
2497 init_GlobalBlock_set();
2498 }
2499
sg_post_clo_init(void)2500 void sg_post_clo_init ( void ) {
2501 }
2502
sg_pre_thread_ll_create(ThreadId parent,ThreadId child)2503 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2504 shadowStack_thread_create(parent, child);
2505 }
2506
sg_pre_thread_first_insn(ThreadId tid)2507 void sg_pre_thread_first_insn ( ThreadId tid ) {
2508 shadowStack_set_initial_SP(tid);
2509 }
2510
sg_fini(Int exitcode)2511 void sg_fini(Int exitcode)
2512 {
2513 if (VG_(clo_stats)) {
2514 VG_(message)(Vg_DebugMsg,
2515 " sg_: %'llu total accesses, of which:\n", stats__total_accesses);
2516 VG_(message)(Vg_DebugMsg,
2517 " sg_: stack0: %'12llu classify\n",
2518 stats__classify_Stack0);
2519 VG_(message)(Vg_DebugMsg,
2520 " sg_: stackN: %'12llu classify\n",
2521 stats__classify_StackN);
2522 VG_(message)(Vg_DebugMsg,
2523 " sg_: global: %'12llu classify\n",
2524 stats__classify_Global);
2525 VG_(message)(Vg_DebugMsg,
2526 " sg_: unknown: %'12llu classify\n",
2527 stats__classify_Unknown);
2528 VG_(message)(Vg_DebugMsg,
2529 " sg_: %'llu Invars preened, of which %'llu changed\n",
2530 stats__Invars_preened, stats__Invars_changed);
2531 VG_(message)(Vg_DebugMsg,
2532 " sg_: t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2533 VG_(message)(Vg_DebugMsg,
2534 " sg_: qcache: %'llu searches, %'llu probes, %'llu misses\n",
2535 stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2536 VG_(message)(Vg_DebugMsg,
2537 " sg_: htab-fast: %'llu hits\n",
2538 stats__htab_fast);
2539 VG_(message)(Vg_DebugMsg,
2540 " sg_: htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2541 stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2542 }
2543 }
2544
2545
2546 /*--------------------------------------------------------------------*/
2547 /*--- end sg_main.c ---*/
2548 /*--------------------------------------------------------------------*/
2549