• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
4 /*--- This file checks stack and global array accesses.            ---*/
5 /*---                                                    sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
10    use in programs.
11 
12    Copyright (C) 2008-2010 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
50 
51 #include "pc_common.h"
52 
53 #include "sg_main.h"      // self
54 
55 
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58 
59 
60 //////////////////////////////////////////////////////////////
61 //                                                          //
62 // Basic Stuff                                              //
63 //                                                          //
64 //////////////////////////////////////////////////////////////
65 
is_sane_TId(ThreadId tid)66 static inline Bool is_sane_TId ( ThreadId tid )
67 {
68    return tid >= 0 && tid < VG_N_THREADS
69           && tid != VG_INVALID_THREADID;
70 }
71 
sg_malloc(HChar * cc,SizeT n)72 static void* sg_malloc ( HChar* cc, SizeT n ) {
73    void* p;
74    tl_assert(n > 0);
75    p = VG_(malloc)( cc, n );
76    tl_assert(p);
77    return p;
78 }
79 
sg_free(void * p)80 static void sg_free ( void* p ) {
81    tl_assert(p);
82    VG_(free)(p);
83 }
84 
85 
86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
87    first interval is lower, 1 if the first interval is higher, and 0
88    if there is any overlap.  Redundant paranoia with casting is there
89    following what looked distinctly like a bug in gcc-4.1.2, in which
90    some of the comparisons were done signedly instead of
91    unsignedly. */
92 inline
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94                                      Addr a2, SizeT n2 ) {
95    UWord a1w = (UWord)a1;
96    UWord n1w = (UWord)n1;
97    UWord a2w = (UWord)a2;
98    UWord n2w = (UWord)n2;
99    tl_assert(n1w > 0 && n2w > 0);
100    if (a1w + n1w <= a2w) return -1L;
101    if (a2w + n2w <= a1w) return 1L;
102    return 0;
103 }
104 
105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106    within [aBig,aBig+nBig). */
107 inline
is_subinterval_of(Addr aBig,SizeT nBig,Addr aSmall,SizeT nSmall)108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109                                 Addr aSmall, SizeT nSmall ) {
110    tl_assert(nBig > 0 && nSmall > 0);
111    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112 }
113 
114 inline
Addr__min(Addr a1,Addr a2)115 static Addr Addr__min ( Addr a1, Addr a2 ) {
116    return a1 < a2 ? a1 : a2;
117 }
118 
119 inline
Addr__max(Addr a1,Addr a2)120 static Addr Addr__max ( Addr a1, Addr a2 ) {
121    return a1 < a2 ? a2 : a1;
122 }
123 
124 
125 //////////////////////////////////////////////////////////////
126 //                                                          //
127 // StackBlocks Persistent Cache                             //
128 //                                                          //
129 //////////////////////////////////////////////////////////////
130 
131 /* We maintain a set of XArray* of StackBlocks.  These are never
132    freed.  When a new StackBlock vector is acquired from
133    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134    If not present, it is added.  If present, the just-acquired one is
135    freed and the copy used.
136 
137    This simplifies storage management elsewhere.  It allows us to
138    assume that a pointer to an XArray* of StackBlock is valid forever.
139    It also means there are no duplicates anywhere, which could be
140    important from a space point of view for programs that generate a
141    lot of translations, or where translations are frequently discarded
142    and re-made.
143 
144    Note that we normalise the arrays by sorting the elements according
145    to an arbitrary total order, so as to avoid the situation that two
146    vectors describe the same set of variables but are not structurally
147    identical. */
148 
StackBlock__sane(StackBlock * fb)149 static inline Bool StackBlock__sane ( StackBlock* fb )
150 {
151    if (fb->name[ sizeof(fb->name)-1 ] != 0)
152       return False;
153    if (fb->spRel != False && fb->spRel != True)
154       return False;
155    if (fb->isVec != False && fb->isVec != True)
156       return False;
157    return True;
158 }
159 
160 /* Generate an arbitrary total ordering on StackBlocks. */
StackBlock__cmp(StackBlock * fb1,StackBlock * fb2)161 static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162 {
163    Word r;
164    tl_assert(StackBlock__sane(fb1));
165    tl_assert(StackBlock__sane(fb2));
166    /* Hopefully the .base test hits most of the time.  For the blocks
167       associated with any particular instruction, if the .base values
168       are the same then probably it doesn't make sense for the other
169       fields to be different.  But this is supposed to be a completely
170       general structural total order, so we have to compare everything
171       anyway. */
172    if (fb1->base < fb2->base) return -1;
173    if (fb1->base > fb2->base) return 1;
174    /* compare sizes */
175    if (fb1->szB < fb2->szB) return -1;
176    if (fb1->szB > fb2->szB) return 1;
177    /* compare sp/fp flag */
178    if (fb1->spRel < fb2->spRel) return -1;
179    if (fb1->spRel > fb2->spRel) return 1;
180    /* compare is/is-not array-typed flag */
181    if (fb1->isVec < fb2->isVec) return -1;
182    if (fb1->isVec > fb2->isVec) return 1;
183    /* compare the name */
184    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185    return r;
186 }
187 
188 /* Returns True if all fields except .szB are the same.  szBs may or
189    may not be the same; they are simply not consulted. */
StackBlock__all_fields_except_szB_are_equal(StackBlock * fb1,StackBlock * fb2)190 static Bool StackBlock__all_fields_except_szB_are_equal (
191                StackBlock* fb1,
192                StackBlock* fb2
193             )
194 {
195    tl_assert(StackBlock__sane(fb1));
196    tl_assert(StackBlock__sane(fb2));
197    return fb1->base == fb2->base
198           && fb1->spRel == fb2->spRel
199           && fb1->isVec == fb2->isVec
200           && 0 == VG_(strcmp)(fb1->name, fb2->name);
201 }
202 
203 
204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
StackBlocks__cmp(XArray * fb1s,XArray * fb2s)205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206 {
207    Word i, r, n1, n2;
208    n1 = VG_(sizeXA)( fb1s );
209    n2 = VG_(sizeXA)( fb2s );
210    if (n1 < n2) return -1;
211    if (n1 > n2) return 1;
212    for (i = 0; i < n1; i++) {
213       StackBlock *fb1, *fb2;
214       fb1 = VG_(indexXA)( fb1s, i );
215       fb2 = VG_(indexXA)( fb2s, i );
216       r = StackBlock__cmp( fb1, fb2 );
217       if (r != 0) return r;
218    }
219    tl_assert(i == n1 && i == n2);
220    return 0;
221 }
222 
pp_StackBlocks(XArray * sbs)223 static void pp_StackBlocks ( XArray* sbs )
224 {
225    Word i, n = VG_(sizeXA)( sbs );
226    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
227    for (i = 0; i < n; i++) {
228       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
229       VG_(message)(Vg_DebugMsg,
230          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
231          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
232          sb->isVec ? 'Y' : 'N', &sb->name[0]
233       );
234    }
235    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
236 }
237 
238 
239 /* ---------- The StackBlock vector cache ---------- */
240 
241 static WordFM* /* XArray* of StackBlock -> nothing */
242        frameBlocks_set = NULL;
243 
init_StackBlocks_set(void)244 static void init_StackBlocks_set ( void )
245 {
246    tl_assert(!frameBlocks_set);
247    frameBlocks_set
248       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
249                     (Word(*)(UWord,UWord))StackBlocks__cmp );
250    tl_assert(frameBlocks_set);
251 }
252 
253 /* Find the given StackBlock-vector in our collection thereof.  If
254    found, deallocate the supplied one, and return the address of the
255    copy.  If not found, add the supplied one to our collection and
256    return its address. */
257 static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add(XArray * orig)258        StackBlocks__find_and_dealloc__or_add
259           ( XArray* /* of StackBlock */ orig )
260 {
261    UWord key, val;
262 
263    /* First, normalise, as per comments above. */
264    VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
265    VG_(sortXA)( orig );
266 
267    /* Now get rid of any exact duplicates. */
268   nuke_dups:
269    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
270      if (n >= 2) {
271         w = 0;
272         nEQ = 0;
273         for (r = 0; r < n; r++) {
274            if (r+1 < n) {
275               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
276               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
277               Word c = StackBlock__cmp(pR0,pR1);
278               tl_assert(c == -1 || c == 0);
279               if (c == 0) { nEQ++; continue; }
280            }
281            if (w != r) {
282               StackBlock* pW = VG_(indexXA)( orig, w );
283               StackBlock* pR = VG_(indexXA)( orig, r );
284               *pW = *pR;
285            }
286            w++;
287         }
288         tl_assert(r == n);
289         tl_assert(w + nEQ == n);
290         if (w < n) {
291            VG_(dropTailXA)( orig, n-w );
292         }
293         if (0) VG_(printf)("delta %ld\n", n-w);
294      }
295    }
296 
297    /* Deal with the following strangeness, where two otherwise
298       identical blocks are claimed to have different sizes.  In which
299       case we use the larger size. */
300    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
301       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
302       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303    */
304    { Word i, n = VG_(sizeXA)( orig );
305      if (n >= 2) {
306         for (i = 0; i < n-1; i++) {
307            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
308            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
309            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
310               /* They can't be identical because the previous tidying
311                  pass would have removed the duplicates.  And they
312                  can't be > because the earlier sorting pass would
313                  have ordered otherwise-identical descriptors
314                  according to < on .szB fields.  Hence: */
315               tl_assert(sb0->szB < sb1->szB);
316               sb0->szB = sb1->szB;
317               /* This makes the blocks identical, at the size of the
318                  larger one.  Rather than go to all the hassle of
319                  sliding the rest down, simply go back to the
320                  remove-duplicates stage.  The assertion guarantees
321                  that we eventually make progress, since the rm-dups
322                  stage will get rid of one of the blocks.  This is
323                  expected to happen only exceedingly rarely. */
324               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
325               goto nuke_dups;
326            }
327         }
328      }
329    }
330 
331    /* If there are any blocks which overlap and have the same
332       fpRel-ness, junk the whole descriptor; it's obviously bogus.
333       Icc11 certainly generates bogus info from time to time.
334 
335       This check is pretty weak; really we ought to have a stronger
336       sanity check. */
337    { Word i, n = VG_(sizeXA)( orig );
338      static Int moans = 3;
339      for (i = 0; i < n-1; i++) {
340        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
341        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
342        if (sb1->spRel == sb2->spRel
343            && (sb1->base >= sb2->base
344                || sb1->base + sb1->szB > sb2->base)) {
345           if (moans > 0 && !VG_(clo_xml)) {
346              moans--;
347              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
348                                       "overlapping stack blocks\n");
349              if (VG_(clo_verbosity) >= 2)
350                 pp_StackBlocks(orig);
351              if (moans == 0)
352                 VG_(message)(Vg_UserMsg, "Further instances of this "
353                                          "message will not be shown\n" );
354           }
355           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
356           break;
357        }
358      }
359    }
360 
361    /* Now, do we have it already? */
362    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
363       /* yes */
364       XArray* res;
365       tl_assert(val == 0);
366       tl_assert(key != (UWord)orig);
367       VG_(deleteXA)(orig);
368       res = (XArray*)key;
369       return res;
370    } else {
371       /* no */
372       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
373       return orig;
374    }
375 }
376 
377 /* Top level function for getting the StackBlock vector for a given
378    instruction.  It is guaranteed that the returned pointer will be
379    valid for the entire rest of the run, and also that the addresses
380    of the individual elements of the array will not change. */
381 
get_StackBlocks_for_IP(Addr ip)382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
383 {
384    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
385    tl_assert(blocks);
386    return StackBlocks__find_and_dealloc__or_add( blocks );
387 }
388 
389 
390 //////////////////////////////////////////////////////////////
391 //                                                          //
392 // GlobalBlocks Persistent Cache                            //
393 //                                                          //
394 //////////////////////////////////////////////////////////////
395 
396 /* Generate an arbitrary total ordering on GlobalBlocks. */
GlobalBlock__cmp(GlobalBlock * gb1,GlobalBlock * gb2)397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
398 {
399    Word r;
400    /* compare addrs */
401    if (gb1->addr < gb2->addr) return -1;
402    if (gb1->addr > gb2->addr) return 1;
403    /* compare sizes */
404    if (gb1->szB < gb2->szB) return -1;
405    if (gb1->szB > gb2->szB) return 1;
406    /* compare is/is-not array-typed flag */
407    if (gb1->isVec < gb2->isVec) return -1;
408    if (gb1->isVec > gb2->isVec) return 1;
409    /* compare the name */
410    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
411    if (r != 0) return r;
412    /* compare the soname */
413    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
414    return r;
415 }
416 
417 static WordFM* /* GlobalBlock* -> nothing */
418        globalBlock_set = NULL;
419 
init_GlobalBlock_set(void)420 static void init_GlobalBlock_set ( void )
421 {
422    tl_assert(!globalBlock_set);
423     globalBlock_set
424        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
425                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
426    tl_assert(globalBlock_set);
427 }
428 
429 
430 /* Top level function for making GlobalBlocks persistent.  Call here
431    with a non-persistent version, and the returned one is guaranteed
432    to be valid for the entire rest of the run.  The supplied one is
433    copied, not stored, so can be freed after the call. */
434 
get_persistent_GlobalBlock(GlobalBlock * orig)435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
436 {
437    UWord key, val;
438    /* Now, do we have it already? */
439    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
440       /* yes, return the copy */
441       GlobalBlock* res;
442       tl_assert(val == 0);
443       res = (GlobalBlock*)key;
444       tl_assert(res != orig);
445       return res;
446    } else {
447       /* no.  clone it, store the clone and return the clone's
448          address. */
449       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
450                                       sizeof(GlobalBlock) );
451       tl_assert(clone);
452       *clone = *orig;
453       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
454       return clone;
455    }
456 }
457 
458 
459 //////////////////////////////////////////////////////////////
460 //                                                          //
461 // Interval tree of StackTreeBlock                          //
462 //                                                          //
463 //////////////////////////////////////////////////////////////
464 
465 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
466    are not allowed.
467 
468    A stack interval tree is a (WordFM StackTreeNode* void).  There is
469    one stack interval tree for each thread.
470 */
471 typedef
472    struct {
473       Addr        addr;
474       SizeT       szB;   /* copied from .descr->szB */
475       StackBlock* descr; /* it's an instance of this block */
476       UWord       depth; /* depth of stack at time block was pushed */
477    }
478    StackTreeNode;
479 
pp_StackTree(WordFM * sitree,HChar * who)480 static void pp_StackTree ( WordFM* sitree, HChar* who )
481 {
482    UWord keyW, valW;
483    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
484    VG_(initIterFM)( sitree );
485    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
486       StackTreeNode* nd = (StackTreeNode*)keyW;
487       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
488                   nd->descr, nd->descr->name, nd->descr->szB);
489    }
490    VG_(printf)(">>> END   pp_StackTree %s\n", who );
491 }
492 
493 /* Interval comparison function for StackTreeNode */
cmp_intervals_StackTreeNode(StackTreeNode * sn1,StackTreeNode * sn2)494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
495                                           StackTreeNode* sn2 )
496 {
497    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
498                                  sn2->addr, sn2->szB);
499 }
500 
501 /* Find the node holding 'a', if any. */
find_StackTreeNode(WordFM * sitree,Addr a)502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
503 {
504    UWord keyW, valW;
505    StackTreeNode key;
506    tl_assert(sitree);
507    key.addr = a;
508    key.szB  = 1;
509    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
510       StackTreeNode* res = (StackTreeNode*)keyW;
511       tl_assert(valW == 0);
512       tl_assert(res != &key);
513       return res;
514    } else {
515       return NULL;
516    }
517 }
518 
519 /* Note that the supplied XArray of FrameBlock must have been
520    made persistent already. */
521 __attribute__((noinline))
add_blocks_to_StackTree(WordFM * sitree,XArray * descrs,XArray * bases,UWord depth)522 static void add_blocks_to_StackTree (
523                /*MOD*/WordFM* sitree,
524                XArray* /* FrameBlock */ descrs,
525                XArray* /* Addr */ bases,
526                UWord depth
527             )
528 {
529    Bool debug = (Bool)0;
530    Word i, nDescrs, nBases;
531 
532    nDescrs = VG_(sizeXA)( descrs ),
533    nBases = VG_(sizeXA)( bases );
534    tl_assert(nDescrs == nBases);
535 
536    if (nDescrs == 0) return;
537 
538    tl_assert(sitree);
539    if (debug) {
540       VG_(printf)("\ndepth = %lu\n", depth);
541       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
542       pp_StackBlocks(descrs);
543    }
544 
545    for (i = 0; i < nDescrs; i++) {
546       Bool already_present;
547       StackTreeNode* nyu;
548       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
549       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
550       tl_assert(descr->szB > 0);
551       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
552       nyu->addr  = addr;
553       nyu->szB   = descr->szB;
554       nyu->descr = descr;
555       nyu->depth = depth;
556       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
557       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
558       /* The interval can't already be there; else we have
559          overlapping stack blocks. */
560       tl_assert(!already_present);
561       if (debug) {
562          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
563       }
564    }
565    if (debug) {
566       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
567       VG_(printf)("\n");
568    }
569 }
570 
del_blocks_from_StackTree(WordFM * sitree,XArray * bases)571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
572                                         XArray* /* Addr */ bases )
573 {
574    UWord oldK, oldV;
575    Word i, nBases = VG_(sizeXA)( bases );
576    for (i = 0; i < nBases; i++) {
577       Bool b;
578       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
579       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
580       /* The interval must be there; we added it earlier when
581          the associated frame was created. */
582       tl_assert(nd);
583       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
584       /* we just found the block! */
585       tl_assert(b);
586       tl_assert(oldV == 0);
587       tl_assert(nd == (StackTreeNode*)oldK);
588       sg_free(nd);
589    }
590 }
591 
592 
delete_StackTree__kFin(UWord keyW)593 static void delete_StackTree__kFin ( UWord keyW ) {
594    StackTreeNode* nd = (StackTreeNode*)keyW;
595    tl_assert(nd);
596    sg_free(nd);
597 }
delete_StackTree__vFin(UWord valW)598 static void delete_StackTree__vFin ( UWord valW ) {
599    tl_assert(valW == 0);
600 }
delete_StackTree(WordFM * sitree)601 static void delete_StackTree ( WordFM* sitree )
602 {
603    VG_(deleteFM)( sitree,
604                  delete_StackTree__kFin, delete_StackTree__vFin );
605 }
606 
new_StackTree(void)607 static WordFM* new_StackTree ( void ) {
608    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
609                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
610 }
611 
612 
613 //////////////////////////////////////////////////////////////
614 //                                                          //
615 // Interval tree of GlobalTreeBlock                         //
616 //                                                          //
617 //////////////////////////////////////////////////////////////
618 
619 /* A node in a global interval tree.  Zero length intervals
620    (.szB == 0) are not allowed.
621 
622    A global interval tree is a (WordFM GlobalTreeNode* void).  There
623    is one global interval tree for the entire process.
624 */
625 typedef
626    struct {
627       Addr         addr; /* copied from .descr->addr */
628       SizeT        szB; /* copied from .descr->szB */
629       GlobalBlock* descr; /* it's this block */
630    }
631    GlobalTreeNode;
632 
GlobalTreeNode__pp(GlobalTreeNode * nd)633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
634    tl_assert(nd->descr);
635    VG_(printf)("GTNode [%#lx,+%ld) %s",
636                nd->addr, nd->szB, nd->descr->name);
637 }
638 
GlobalTree__pp(WordFM * gitree,HChar * who)639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
640                              HChar* who )
641 {
642    UWord keyW, valW;
643    GlobalTreeNode* nd;
644    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
645    VG_(initIterFM)( gitree );
646    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
647       tl_assert(valW == 0);
648       nd = (GlobalTreeNode*)keyW;
649       VG_(printf)("  ");
650       GlobalTreeNode__pp(nd);
651       VG_(printf)("\n");
652    }
653    VG_(doneIterFM)( gitree );
654    VG_(printf)(">>>\n");
655 }
656 
657 /* Interval comparison function for GlobalTreeNode */
cmp_intervals_GlobalTreeNode(GlobalTreeNode * gn1,GlobalTreeNode * gn2)658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
659                                            GlobalTreeNode* gn2 )
660 {
661    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
662                                   gn2->addr, gn2->szB );
663 }
664 
665 /* Find the node holding 'a', if any. */
find_GlobalTreeNode(WordFM * gitree,Addr a)666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
667 {
668    UWord keyW, valW;
669    GlobalTreeNode key;
670    key.addr = a;
671    key.szB  = 1;
672    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
673       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
674       tl_assert(valW == 0);
675       tl_assert(res != &key);
676       return res;
677    } else {
678       return NULL;
679    }
680 }
681 
682 /* Note that the supplied GlobalBlock must have been made persistent
683    already. */
add_block_to_GlobalTree(WordFM * gitree,GlobalBlock * descr)684 static void add_block_to_GlobalTree (
685                /*MOD*/WordFM* gitree,
686                GlobalBlock* descr
687             )
688 {
689    Bool already_present;
690    GlobalTreeNode *nyu, *nd;
691    UWord keyW, valW;
692    static Int moans = 3;
693 
694    tl_assert(descr->szB > 0);
695    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
696    nyu->addr  = descr->addr;
697    nyu->szB   = descr->szB;
698    nyu->descr = descr;
699 
700    /* Basically it's an error to add a global block to the tree that
701       is already in the tree.  However, detect and ignore attempts to
702       insert exact duplicates; they do appear for some reason
703       (possible a bug in m_debuginfo?) */
704    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
705    if (already_present) {
706       tl_assert(valW == 0);
707       nd = (GlobalTreeNode*)keyW;
708       tl_assert(nd);
709       tl_assert(nd != nyu);
710       tl_assert(nd->descr);
711       tl_assert(nyu->descr);
712       if (nd->addr == nyu->addr && nd->szB == nyu->szB
713           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
714           /* Although it seems reasonable to demand that duplicate
715              blocks have identical names, that is too strict.  For
716              example, reading debuginfo from glibc produces two
717              otherwise identical blocks with names "tzname" and
718              "__tzname".  A constraint of the form "must be identical,
719              or one must be a substring of the other" would fix that.
720              However, such trickery is scuppered by the fact that we
721              truncate all variable names to 15 characters to make
722              storage management simpler, hence giving pairs like
723              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
724              it's simplest just to skip the name comparison
725              completely. */
726           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
727          /* exact duplicate; ignore it */
728          sg_free(nyu);
729          return;
730       }
731       /* else fall through; the assertion below will catch it */
732    }
733 
734    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
735    /* The interval can't already be there; else we have
736       overlapping global blocks. */
737    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
738       generate overlapping block descriptions in the Dwarf3; clearly
739       bogus. */
740    if (already_present && moans > 0 && !VG_(clo_xml)) {
741       moans--;
742       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
743                                "overlapping global blocks\n");
744       if (VG_(clo_verbosity) >= 2) {
745          GlobalTree__pp( gitree,
746                          "add_block_to_GlobalTree: non-exact duplicate" );
747          VG_(printf)("Overlapping block: ");
748          GlobalTreeNode__pp(nyu);
749          VG_(printf)("\n");
750       }
751       if (moans == 0)
752          VG_(message)(Vg_UserMsg, "Further instances of this "
753                                   "message will not be shown\n" );
754    }
755    /* tl_assert(!already_present); */
756 }
757 
del_GlobalTree_range(WordFM * gitree,Addr a,SizeT szB)758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
759                                    Addr a, SizeT szB )
760 {
761    /* One easy way to do this: look up [a,a+szB) in the tree.  That
762       will either succeed, producing a block which intersects that
763       range, in which case we delete it and repeat; or it will fail,
764       in which case there are no blocks intersecting the range, and we
765       can bring the process to a halt. */
766    UWord keyW, valW, oldK, oldV;
767    GlobalTreeNode key, *nd;
768    Bool b, anyFound;
769 
770    tl_assert(szB > 0);
771 
772    anyFound = False;
773 
774    key.addr = a;
775    key.szB  = szB;
776 
777    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
778       anyFound = True;
779       nd = (GlobalTreeNode*)keyW;
780       tl_assert(valW == 0);
781       tl_assert(nd != &key);
782       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
783 
784       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
785       tl_assert(b);
786       tl_assert(oldV == 0);
787       tl_assert(oldK == keyW); /* check we deleted the node we just found */
788    }
789 
790    return anyFound;
791 }
792 
793 
794 //////////////////////////////////////////////////////////////
795 //                                                          //
796 // Invar                                                    //
797 //                                                          //
798 //////////////////////////////////////////////////////////////
799 
800 /* An invariant, as resulting from watching the destination of a
801    memory referencing instruction.  Initially is Inv_Unset until the
802    instruction makes a first access. */
803 
804 typedef
805    enum {
806       Inv_Unset=1,  /* not established yet */
807       Inv_Unknown,  /* unknown location */
808       Inv_Stack0,   /* array-typed stack block in innermost frame */
809       Inv_StackN,   /* array-typed stack block in non-innermost frame */
810       Inv_Global,   /* array-typed global block */
811    }
812    InvarTag;
813 
814 typedef
815    struct {
816       InvarTag tag;
817       union {
818          struct {
819          } Unset;
820          struct {
821          } Unknown;
822          struct {
823             Addr  addr;
824             SizeT szB;
825             StackBlock* descr;
826          } Stack0; /* innermost stack frame */
827          struct {
828             /* Pointer to a node in the interval tree for
829               this thread. */
830             StackTreeNode* nd;
831          } StackN; /* non-innermost stack frame */
832          struct {
833            /* Pointer to a GlobalBlock in the interval tree of
834               global blocks. */
835            GlobalTreeNode* nd;
836          } Global;
837       }
838       Inv;
839    }
840    Invar;
841 
842 /* Partial debugging printing for an Invar. */
pp_Invar(Invar * i)843 static void pp_Invar ( Invar* i )
844 {
845    switch (i->tag) {
846       case Inv_Unset:
847          VG_(printf)("Unset");
848          break;
849       case Inv_Unknown:
850          VG_(printf)("Unknown");
851          break;
852       case Inv_Stack0:
853          VG_(printf)("Stack0 [%#lx,+%lu)",
854                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
855          break;
856       case Inv_StackN:
857          VG_(printf)("StackN [%#lx,+%lu)",
858                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
859          break;
860       case Inv_Global:
861          VG_(printf)("Global [%#lx,+%lu)",
862                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
863          break;
864       default:
865          tl_assert(0);
866    }
867 }
868 
869 /* Compare two Invars for equality. */
eq_Invar(Invar * i1,Invar * i2)870 static Bool eq_Invar ( Invar* i1, Invar* i2 )
871 {
872    tl_assert(i1->tag != Inv_Unset);
873    tl_assert(i2->tag != Inv_Unset);
874    if (i1->tag != i2->tag)
875       return False;
876    switch (i1->tag) {
877       case Inv_Unknown:
878          return True;
879       case Inv_Stack0:
880          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
881                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
882       case Inv_StackN:
883          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
884       case Inv_Global:
885          return i1->Inv.Global.nd == i2->Inv.Global.nd;
886       default:
887          tl_assert(0);
888    }
889    /*NOTREACHED*/
890    tl_assert(0);
891 }
892 
893 /* Print selected parts of an Invar, suitable for use in error
894    messages. */
show_Invar(HChar * buf,Word nBuf,Invar * inv,Word depth)895 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
896 {
897    HChar* str;
898    tl_assert(nBuf >= 96);
899    buf[0] = 0;
900    switch (inv->tag) {
901       case Inv_Unknown:
902          VG_(sprintf)(buf, "%s", "unknown");
903          break;
904       case Inv_Stack0:
905          str = "array";
906          VG_(sprintf)(buf, "stack %s \"%s\" in this frame",
907                       str, inv->Inv.Stack0.descr->name );
908          break;
909       case Inv_StackN:
910          str = "array";
911          VG_(sprintf)(buf, "stack %s \"%s\" in frame %lu back from here",
912                       str, inv->Inv.StackN.nd->descr->name,
913                            depth - inv->Inv.StackN.nd->depth );
914          break;
915       case Inv_Global:
916          str = "array";
917          VG_(sprintf)(buf, "global %s \"%s\" in object with soname \"%s\"",
918                       str, inv->Inv.Global.nd->descr->name,
919                            inv->Inv.Global.nd->descr->soname );
920          break;
921       case Inv_Unset:
922          VG_(sprintf)(buf, "%s", "Unset!");
923          break;
924       default:
925          tl_assert(0);
926    }
927 }
928 
929 
930 //////////////////////////////////////////////////////////////
931 //                                                          //
932 // our globals                                              //
933 //                                                          //
934 //////////////////////////////////////////////////////////////
935 
936 //////////////////////////////////////////////////////////////
937 ///
938 
939 #define N_QCACHE 16
940 
941 /* Powers of two only, else the result will be chaos */
942 #define QCACHE_ADVANCE_EVERY 16
943 
944 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
945    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
946 typedef
947    struct {
948       Addr  addr;
949       SizeT szB;
950       Invar inv;
951    }
952    QCElem;
953 
954 typedef
955    struct {
956       Word   nInUse;
957       QCElem elems[N_QCACHE];
958    }
959    QCache;
960 
QCache__invalidate(QCache * qc)961 static void QCache__invalidate ( QCache* qc ) {
962    tl_assert(qc->nInUse >= 0);
963    qc->nInUse = 0;
964 }
965 
QCache__pp(QCache * qc,HChar * who)966 static void QCache__pp ( QCache* qc, HChar* who )
967 {
968    Word i;
969    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
970    for (i = 0; i < qc->nInUse; i++) {
971       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
972       pp_Invar(&qc->elems[i].inv);
973       VG_(printf)("\n");
974    }
975    VG_(printf)(">>>\n");
976 }
977 
978 static ULong stats__qcache_queries = 0;
979 static ULong stats__qcache_misses  = 0;
980 static ULong stats__qcache_probes  = 0;
981 
982 ///
983 //////////////////////////////////////////////////////////////
984 
985 /* Each thread has:
986    * a shadow stack of StackFrames, which is a double-linked list
987    * an stack block interval tree
988 */
989 static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
990 
991 static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
992 
993 static  QCache                       qcaches[VG_N_THREADS];
994 
995 
996 /* Additionally, there is one global variable interval tree
997    for the entire process.
998 */
999 static WordFM* /* GlobalTreeNode */ giTree;
1000 
1001 
invalidate_all_QCaches(void)1002 static void invalidate_all_QCaches ( void )
1003 {
1004    Word i;
1005    for (i = 0; i < VG_N_THREADS; i++) {
1006       QCache__invalidate( &qcaches[i] );
1007    }
1008 }
1009 
ourGlobals_init(void)1010 static void ourGlobals_init ( void )
1011 {
1012    Word i;
1013    for (i = 0; i < VG_N_THREADS; i++) {
1014       shadowStacks[i] = NULL;
1015       siTrees[i] = NULL;
1016    }
1017    invalidate_all_QCaches();
1018    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1019                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1020 }
1021 
1022 
1023 //////////////////////////////////////////////////////////////
1024 //                                                          //
1025 // Handle global variable load/unload events                //
1026 //                                                          //
1027 //////////////////////////////////////////////////////////////
1028 
acquire_globals(ULong di_handle)1029 static void acquire_globals ( ULong di_handle )
1030 {
1031    Word n, i;
1032    XArray* /* of GlobalBlock */ gbs;
1033    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1034    gbs = VG_(di_get_global_blocks_from_dihandle)
1035             (di_handle, True/*arrays only*/);
1036    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1037 
1038    n = VG_(sizeXA)( gbs );
1039    for (i = 0; i < n; i++) {
1040       GlobalBlock* gbp;
1041       GlobalBlock* gb = VG_(indexXA)( gbs, i );
1042       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1043                          gb->szB, gb->addr, gb->soname, gb->name );
1044       tl_assert(gb->szB > 0);
1045       /* Make a persistent copy of each GlobalBlock, and add it
1046          to the tree. */
1047       gbp = get_persistent_GlobalBlock( gb );
1048       add_block_to_GlobalTree( giTree, gbp );
1049    }
1050 
1051    VG_(deleteXA)( gbs );
1052 }
1053 
1054 
1055 /* We only intercept these two because we need to see any di_handles
1056    that might arise from the mappings/allocations. */
sg_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1057 void sg_new_mem_mmap( Addr a, SizeT len,
1058                       Bool rr, Bool ww, Bool xx, ULong di_handle )
1059 {
1060    if (di_handle > 0)
1061       acquire_globals(di_handle);
1062 }
sg_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1063 void sg_new_mem_startup( Addr a, SizeT len,
1064                          Bool rr, Bool ww, Bool xx, ULong di_handle )
1065 {
1066    if (di_handle > 0)
1067       acquire_globals(di_handle);
1068 }
sg_die_mem_munmap(Addr a,SizeT len)1069 void sg_die_mem_munmap ( Addr a, SizeT len )
1070 {
1071    Bool debug = (Bool)0;
1072    Bool overlap = False;
1073 
1074    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1075 
1076    if (len == 0)
1077       return;
1078 
1079    overlap = del_GlobalTree_range(giTree, a, len);
1080 
1081    { /* redundant sanity check */
1082      UWord keyW, valW;
1083      VG_(initIterFM)( giTree );
1084      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1085        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1086         tl_assert(valW == 0);
1087         tl_assert(nd->szB > 0);
1088         tl_assert(nd->addr + nd->szB <= a
1089                   || a + len <= nd->addr);
1090      }
1091      VG_(doneIterFM)( giTree );
1092    }
1093 
1094    if (!overlap)
1095       return;
1096 
1097    /* Ok, the range contained some blocks.  Therefore we'll need to
1098       visit all the Invars in all the thread shadow stacks, and
1099       convert all Inv_Global entries that intersect [a,a+len) to
1100       Inv_Unknown. */
1101    tl_assert(len > 0);
1102    preen_global_Invars( a, len );
1103    invalidate_all_QCaches();
1104 }
1105 
1106 
1107 //////////////////////////////////////////////////////////////
1108 //                                                          //
1109 // StackFrame                                               //
1110 //                                                          //
1111 //////////////////////////////////////////////////////////////
1112 
1113 static ULong stats__total_accesses   = 0;
1114 static ULong stats__classify_Stack0  = 0;
1115 static ULong stats__classify_StackN  = 0;
1116 static ULong stats__classify_Global  = 0;
1117 static ULong stats__classify_Unknown = 0;
1118 static ULong stats__Invars_preened   = 0;
1119 static ULong stats__Invars_changed   = 0;
1120 static ULong stats__t_i_b_empty      = 0;
1121 static ULong stats__htab_fast        = 0;
1122 static ULong stats__htab_searches    = 0;
1123 static ULong stats__htab_probes      = 0;
1124 static ULong stats__htab_resizes     = 0;
1125 
1126 
1127 /* A dynamic instance of an instruction */
1128 typedef
1129    struct {
1130       /* IMMUTABLE */
1131       Addr    insn_addr; /* NB! zero means 'not in use' */
1132       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1133       /* MUTABLE */
1134       Invar invar;
1135    }
1136    IInstance;
1137 
1138 
1139 #define N_HTAB_FIXED 64
1140 
1141 typedef
1142    struct _StackFrame {
1143       /* The sp when the frame was created, so we know when to get rid
1144          of it. */
1145       Addr creation_sp;
1146       /* The stack frames for a thread are arranged as a doubly linked
1147          list.  Obviously the outermost frame in the stack has .outer
1148          as NULL and the innermost in theory has .inner as NULL.
1149          However, when a function returns, we don't delete the
1150          just-vacated StackFrame.  Instead, it is retained in the list
1151          and will be re-used when the next call happens.  This is so
1152          as to avoid constantly having to dynamically allocate and
1153          deallocate frames. */
1154       struct _StackFrame* inner;
1155       struct _StackFrame* outer;
1156       Word depth; /* 0 for outermost; increases inwards */
1157       /* Information for each memory referencing instruction, for this
1158          instantiation of the function.  The iinstances array is
1159          operated as a simple linear-probe hash table, which is
1160          dynamically expanded as necessary.  Once critical thing is
1161          that an IInstance with a .insn_addr of zero is interpreted to
1162          mean that hash table slot is unused.  This means we can't
1163          store an IInstance for address zero. */
1164       /* Note that htab initially points to htab_fixed.  If htab_fixed
1165          turns out not to be big enough then htab is made to point to
1166          dynamically allocated memory.  But it's often the case that
1167          htab_fixed is big enough, so this optimisation saves a huge
1168          number of sg_malloc/sg_free call pairs. */
1169       IInstance* htab;
1170       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1171       UWord      htab_used; /* number of hash table slots currently in use */
1172       /* If this frame is currently making a call, then the following
1173          are relevant. */
1174       Addr sp_at_call;
1175       Addr fp_at_call;
1176       XArray* /* of Addr */ blocks_added_by_call;
1177       /* See comment just above */
1178       IInstance htab_fixed[N_HTAB_FIXED];
1179    }
1180    StackFrame;
1181 
1182 
1183 
1184 
1185 
1186 /* Move this somewhere else? */
1187 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
1188    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1189    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1190    instead. */
1191 
1192 __attribute__((noinline))
preen_global_Invar(Invar * inv,Addr a,SizeT len)1193 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1194 {
1195    stats__Invars_preened++;
1196    tl_assert(len > 0);
1197    tl_assert(inv);
1198    switch (inv->tag) {
1199       case Inv_Global:
1200          tl_assert(inv->Inv.Global.nd);
1201          tl_assert(inv->Inv.Global.nd->szB > 0);
1202          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1203                             inv->Inv.Global.nd->addr,
1204                             inv->Inv.Global.nd->szB);
1205          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1206                                                  inv->Inv.Global.nd->szB)) {
1207             inv->tag = Inv_Unknown;
1208             stats__Invars_changed++;
1209          }
1210          break;
1211       case Inv_Stack0:
1212       case Inv_StackN:
1213       case Inv_Unknown:
1214          break;
1215       default:
1216          tl_assert(0);
1217    }
1218 }
1219 
1220 __attribute__((noinline))
preen_global_Invars(Addr a,SizeT len)1221 static void preen_global_Invars ( Addr a, SizeT len )
1222 {
1223    Int         i;
1224    UWord       u;
1225    StackFrame* frame;
1226    tl_assert(len > 0);
1227    for (i = 0; i < VG_N_THREADS; i++) {
1228       frame = shadowStacks[i];
1229       if (!frame)
1230          continue; /* no frames for this thread */
1231       /* start from the innermost frame */
1232       while (frame->inner)
1233          frame = frame->inner;
1234       tl_assert(frame->outer);
1235       /* work through the frames from innermost to outermost.  The
1236          order isn't important; we just need to ensure we visit each
1237          frame once (including those which are not actually active,
1238          more 'inner' than the 'innermost active frame', viz, just
1239          hanging around waiting to be used, when the current innermost
1240          active frame makes more calls.  See comments on definition of
1241          struct _StackFrame. */
1242       for (; frame; frame = frame->outer) {
1243          UWord xx = 0; /* sanity check only; count of used htab entries */
1244          if (!frame->htab)
1245             continue; /* frame not in use.  See shadowStack_unwind(). */
1246          for (u = 0; u < frame->htab_size; u++) {
1247             IInstance* ii = &frame->htab[u];
1248             if (ii->insn_addr == 0)
1249                continue; /* not in use */
1250             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1251             preen_global_Invar( &ii->invar, a, len );
1252             xx++;
1253          }
1254          tl_assert(xx == frame->htab_used);
1255       }
1256    }
1257 }
1258 
1259 
1260 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1261    of the ip are guaranteed to be zero */
compute_II_hash(Addr ip,UWord htab_size)1262 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1263    return (ip >> 0) & (htab_size - 1);
1264 }
1265 
1266 __attribute__((noinline))
initialise_II_hash_table(StackFrame * sf)1267 static void initialise_II_hash_table ( StackFrame* sf )
1268 {
1269    UWord i;
1270    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1271    sf->htab = &sf->htab_fixed[0];
1272    tl_assert(sf->htab);
1273    sf->htab_used = 0;
1274    for (i = 0; i < sf->htab_size; i++)
1275       sf->htab[i].insn_addr = 0; /* NOT IN USE */
1276 }
1277 
1278 
1279 __attribute__((noinline))
resize_II_hash_table(StackFrame * sf)1280 static void resize_II_hash_table ( StackFrame* sf )
1281 {
1282    UWord     i, j, ix, old_size, new_size;
1283    IInstance *old_htab, *new_htab, *old;
1284 
1285    tl_assert(sf && sf->htab);
1286    old_size = sf->htab_size;
1287    new_size = 2 * old_size;
1288    old_htab = sf->htab;
1289    new_htab = sg_malloc( "di.sg_main.rIht.1",
1290                          new_size * sizeof(IInstance) );
1291    for (i = 0; i < new_size; i++) {
1292       new_htab[i].insn_addr = 0; /* NOT IN USE */
1293    }
1294    for (i = 0; i < old_size; i++) {
1295       old = &old_htab[i];
1296       if (old->insn_addr == 0 /* NOT IN USE */)
1297          continue;
1298       ix = compute_II_hash(old->insn_addr, new_size);
1299       /* find out where to put this, in the new table */
1300       j = new_size;
1301       while (1) {
1302          if (new_htab[ix].insn_addr == 0)
1303             break;
1304          /* This can't ever happen, because it would mean the new
1305             table is full; that isn't allowed -- even the old table is
1306             only allowed to become half full. */
1307          tl_assert(j > 0);
1308          j--;
1309          ix++; if (ix == new_size) ix = 0;
1310       }
1311       /* copy the old entry to this location */
1312       tl_assert(ix < new_size);
1313       tl_assert(new_htab[ix].insn_addr == 0);
1314       new_htab[ix] = *old;
1315       tl_assert(new_htab[ix].insn_addr != 0);
1316    }
1317    /* all entries copied; free old table. */
1318    if (old_htab != &sf->htab_fixed[0])
1319       sg_free(old_htab);
1320    sf->htab = new_htab;
1321    sf->htab_size = new_size;
1322    /* check sf->htab_used is correct.  Optional and a bit expensive
1323       but anyway: */
1324    j = 0;
1325    for (i = 0; i < new_size; i++) {
1326       if (new_htab[i].insn_addr != 0) {
1327          j++;
1328       }
1329    }
1330    tl_assert(j == sf->htab_used);
1331    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1332 }
1333 
1334 
1335 __attribute__((noinline))
find_or_create_IInstance_SLOW(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1336 static IInstance* find_or_create_IInstance_SLOW (
1337                      StackFrame* sf,
1338                      Addr ip,
1339                      XArray* /* StackBlock */ ip_frameblocks
1340                   )
1341 {
1342    UWord i, ix;
1343 
1344    stats__htab_searches++;
1345 
1346    tl_assert(sf);
1347    tl_assert(sf->htab);
1348 
1349    /* Make sure the table loading doesn't get too high. */
1350    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1351       stats__htab_resizes++;
1352       resize_II_hash_table(sf);
1353    }
1354    tl_assert(2 * sf->htab_used <= sf->htab_size);
1355 
1356    ix = compute_II_hash(ip, sf->htab_size);
1357    i = sf->htab_size;
1358    while (1) {
1359       stats__htab_probes++;
1360       /* Note that because of the way the fast-case handler works,
1361          these two tests are actually redundant in the first iteration
1362          of this loop.  (Except they aren't redundant if the code just
1363          above resized the table first. :-) */
1364       if (sf->htab[ix].insn_addr == ip)
1365          return &sf->htab[ix];
1366       if (sf->htab[ix].insn_addr == 0)
1367          break;
1368       /* If i ever gets to zero and we have found neither what we're
1369          looking for nor an empty slot, the table must be full.  Which
1370          isn't possible -- we monitor the load factor to ensure it
1371          doesn't get above say 50%; if that ever does happen the table
1372          is resized. */
1373       tl_assert(i > 0);
1374       i--;
1375       ix++;
1376       if (ix == sf->htab_size) ix = 0;
1377    }
1378 
1379    /* So now we've found a free slot at ix, and we can use that. */
1380    tl_assert(sf->htab[ix].insn_addr == 0);
1381 
1382    /* Add a new record in this slot. */
1383    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1384    sf->htab[ix].insn_addr = ip;
1385    sf->htab[ix].blocks    = ip_frameblocks;
1386    sf->htab[ix].invar.tag = Inv_Unset;
1387    sf->htab_used++;
1388    return &sf->htab[ix];
1389 }
1390 
1391 
1392 inline
find_or_create_IInstance(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1393 static IInstance* find_or_create_IInstance (
1394                      StackFrame* sf,
1395                      Addr ip,
1396                      XArray* /* StackBlock */ ip_frameblocks
1397                   )
1398 {
1399    UWord ix = compute_II_hash(ip, sf->htab_size);
1400    /* Is it in the first slot we come to? */
1401    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1402       stats__htab_fast++;
1403       return &sf->htab[ix];
1404    }
1405    /* If the first slot we come to is empty, bag it. */
1406    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1407       stats__htab_fast++;
1408       tl_assert(ip != 0);
1409       sf->htab[ix].insn_addr = ip;
1410       sf->htab[ix].blocks    = ip_frameblocks;
1411       sf->htab[ix].invar.tag = Inv_Unset;
1412       sf->htab_used++;
1413       return &sf->htab[ix];
1414    }
1415    /* Otherwise we hand off to the slow case, which searches other
1416       slots, and optionally resizes the table if necessary. */
1417    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1418 }
1419 
1420 
1421 __attribute__((noinline))
calculate_StackBlock_EA(StackBlock * descr,Addr sp,Addr fp)1422 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1423                                       Addr sp, Addr fp ) {
1424    UWord w1 = (UWord)descr->base;
1425    UWord w2 = (UWord)(descr->spRel ? sp : fp);
1426    UWord ea = w1 + w2;
1427    return ea;
1428 }
1429 
1430 /* Given an array of StackBlocks, return an array of Addrs, holding
1431    their effective addresses.  Caller deallocates result array. */
1432 __attribute__((noinline))
calculate_StackBlock_EAs(XArray * blocks,Addr sp,Addr fp)1433 static XArray* /* Addr */ calculate_StackBlock_EAs (
1434                              XArray* /* StackBlock */ blocks,
1435                              Addr sp, Addr fp
1436                           )
1437 {
1438    XArray* res;
1439    Word i, n = VG_(sizeXA)( blocks );
1440    tl_assert(n > 0);
1441    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1442    for (i = 0; i < n; i++) {
1443       StackBlock* blk = VG_(indexXA)( blocks, i );
1444       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1445       VG_(addToXA)( res, &ea );
1446    }
1447    return res;
1448 }
1449 
1450 
1451 /* Try to classify the block into which a memory access falls, and
1452    write the result in 'inv'.  This writes all relevant fields of
1453    'inv'. */
1454 __attribute__((noinline))
classify_address(Invar * inv,ThreadId tid,Addr ea,Addr sp,Addr fp,UWord szB,XArray * thisInstrBlocks)1455 static void classify_address ( /*OUT*/Invar* inv,
1456                                ThreadId tid,
1457                                Addr ea, Addr sp, Addr fp,
1458                                UWord szB,
1459                                XArray* /* of StackBlock */ thisInstrBlocks )
1460 {
1461    tl_assert(szB > 0);
1462    /* First, look in the stack blocks accessible in this instruction's
1463       frame. */
1464    {
1465      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1466      if (nBlocks == 0) stats__t_i_b_empty++;
1467      for (i = 0; i < nBlocks; i++) {
1468         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1469         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1470         if (bea <= ea && ea + szB <= bea + descr->szB) {
1471            /* found it */
1472            inv->tag = Inv_Stack0;
1473            inv->Inv.Stack0.addr  = bea;
1474            inv->Inv.Stack0.szB   = descr->szB;
1475            inv->Inv.Stack0.descr = descr;
1476            stats__classify_Stack0++;
1477            return;
1478         }
1479      }
1480    }
1481    /* Look in this thread's query cache */
1482    { Word i;
1483      QCache* cache = &qcaches[tid];
1484      static UWord ctr = 0;
1485      stats__qcache_queries++;
1486      for (i = 0; i < cache->nInUse; i++) {
1487         if (0) /* expensive in a loop like this */
1488                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1489         stats__qcache_probes++;
1490         if (is_subinterval_of(cache->elems[i].addr,
1491                               cache->elems[i].szB, ea, szB)) {
1492            if (i > 0
1493                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1494               QCElem tmp;
1495               tmp = cache->elems[i-1];
1496               cache->elems[i-1] = cache->elems[i];
1497               cache->elems[i] = tmp;
1498               i--;
1499            }
1500            *inv = cache->elems[i].inv;
1501            return;
1502         }
1503      }
1504      stats__qcache_misses++;
1505    }
1506    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1507       in some calling frame?  Consult this thread's stack-block
1508       interval tree to find out. */
1509    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1510      /* We know that [ea,ea+1) is in the block, but we need to
1511         restrict to the case where the whole access falls within
1512         it. */
1513      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1514         nd = NULL;
1515      }
1516      if (nd) {
1517         /* found it */
1518         inv->tag = Inv_StackN;
1519         inv->Inv.StackN.nd = nd;
1520         stats__classify_StackN++;
1521         goto out;
1522      }
1523    }
1524    /* Not in a stack block.  Try the global pool. */
1525    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1526      /* We know that [ea,ea+1) is in the block, but we need to
1527         restrict to the case where the whole access falls within
1528         it. */
1529      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1530         nd = NULL;
1531      }
1532      if (nd) {
1533         /* found it */
1534         inv->tag = Inv_Global;
1535         inv->Inv.Global.nd = nd;
1536         stats__classify_Global++;
1537         goto out;
1538      }
1539    }
1540    /* No idea - give up. */
1541    inv->tag = Inv_Unknown;
1542    stats__classify_Unknown++;
1543 
1544    /* Update the cache */
1545   out:
1546    { Addr    toadd_addr = 0;
1547      SizeT   toadd_szB  = 0;
1548      QCache* cache      = &qcaches[tid];
1549 
1550      static UWord ctr = 0;
1551      Bool show = False;
1552      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1553 
1554      if (show) QCache__pp(cache, "before upd");
1555 
1556      switch (inv->tag) {
1557         case Inv_Global:
1558            toadd_addr = inv->Inv.Global.nd->addr;
1559            toadd_szB  = inv->Inv.Global.nd->szB;
1560            break;
1561         case Inv_StackN:
1562            toadd_addr = inv->Inv.StackN.nd->addr;
1563            toadd_szB  = inv->Inv.StackN.nd->szB;
1564            break;
1565         case Inv_Unknown: {
1566            /* This is more complex.  We need to figure out the
1567               intersection of the "holes" in the global and stack
1568               interval trees into which [ea,ea+szB) falls.  This is
1569               further complicated by the fact that [ea,ea+szB) might
1570               not fall cleanly into a hole; it may instead fall across
1571               the boundary of a stack or global block.  In that case
1572               we just ignore it and don't update the cache, since we
1573               have no way to represent this situation precisely. */
1574            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1575            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1576            Addr gMin, gMax, sMin, sMax, uMin, uMax;
1577            Bool sOK, gOK;
1578            sNegInf.addr = 0;
1579            sNegInf.szB  = 1;
1580            sPosInf.addr = ~(UWord)0;
1581            sPosInf.szB  = 1;
1582            gNegInf.addr = 0;
1583            gNegInf.szB  = 1;
1584            gPosInf.addr = ~(UWord)0;
1585            gPosInf.szB  = 1;
1586            sKey.addr = ea;
1587            sKey.szB  = szB;
1588            gKey.addr = ea;
1589            gKey.szB  = szB;
1590            if (0) VG_(printf)("Tree sizes %ld %ld\n",
1591                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1592            sOK = VG_(findBoundsFM)( siTrees[tid],
1593                                     (UWord*)&sLB,    NULL/*unused*/,
1594                                     (UWord*)&sUB,    NULL/*unused*/,
1595                                     (UWord)&sNegInf, 0/*unused*/,
1596                                     (UWord)&sPosInf, 0/*unused*/,
1597                                     (UWord)&sKey );
1598            gOK = VG_(findBoundsFM)( giTree,
1599                                     (UWord*)&gLB,    NULL/*unused*/,
1600                                     (UWord*)&gUB,    NULL/*unused*/,
1601                                     (UWord)&gNegInf, 0/*unused*/,
1602                                     (UWord)&gPosInf, 0/*unused*/,
1603                                     (UWord)&gKey );
1604            if (!(sOK && gOK)) {
1605               /* If this happens, then [ea,ea+szB) partially overlaps
1606                  a heap or stack block.  We can't represent that, so
1607                  just forget it (should be very rare).  However, do
1608                  maximum sanity checks first.  In such a
1609                  partial overlap case, it can't be the case that both
1610                  [ea] and [ea+szB-1] overlap the same block, since if
1611                  that were indeed the case then it wouldn't be a
1612                  partial overlap; rather it would simply fall inside
1613                  that block entirely and we shouldn't be inside this
1614                  conditional at all. */
1615               if (!sOK) {
1616                  StackTreeNode *ndFirst, *ndLast;
1617                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
1618                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1619                  /* if both ends of the range fall inside a block,
1620                     they can't be in the same block. */
1621                  if (ndFirst && ndLast)
1622                     tl_assert(ndFirst != ndLast);
1623                  /* for each end of the range, if it is in a block,
1624                     the range as a whole can't be entirely within the
1625                     block. */
1626                  if (ndFirst)
1627                     tl_assert(!is_subinterval_of(ndFirst->addr,
1628                                                  ndFirst->szB, ea, szB));
1629                  if (ndLast)
1630                     tl_assert(!is_subinterval_of(ndLast->addr,
1631                                                  ndLast->szB, ea, szB));
1632               }
1633               if (!gOK) {
1634                  GlobalTreeNode *ndFirst, *ndLast;
1635                  ndFirst = find_GlobalTreeNode( giTree, ea );
1636                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1637                  /* if both ends of the range fall inside a block,
1638                     they can't be in the same block. */
1639                  if (ndFirst && ndLast)
1640                     tl_assert(ndFirst != ndLast);
1641                  /* for each end of the range, if it is in a block,
1642                     the range as a whole can't be entirely within the
1643                     block. */
1644                  if (ndFirst)
1645                     tl_assert(!is_subinterval_of(ndFirst->addr,
1646                                                  ndFirst->szB, ea, szB));
1647                  if (ndLast)
1648                     tl_assert(!is_subinterval_of(ndLast->addr,
1649                                                  ndLast->szB, ea, szB));
1650               }
1651               if (0) VG_(printf)("overlapping blocks in cache\n");
1652               return;
1653            }
1654            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1655            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1656            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1657            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1658            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1659                               sMin, sMax, gMin, gMax);
1660            /* [sMin,sMax] and [gMin,gMax] must both contain
1661               [ea,ea+szB) (right?)  That implies they must overlap at
1662               at least over [ea,ea+szB). */
1663            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1664            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1665            /* So now compute their intersection. */
1666            uMin = Addr__max( sMin, gMin );
1667            uMax = Addr__min( sMax, gMax );
1668            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1669            tl_assert(uMin <= uMax);
1670            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1671            /* Finally, we can park [uMin,uMax] in the cache.  However,
1672               if uMax is ~0, we can't represent the difference; hence
1673               fudge uMax. */
1674            if (uMin < uMax && uMax == ~(UWord)0)
1675               uMax--;
1676            toadd_addr = uMin;
1677            toadd_szB  = uMax - uMin + 1;
1678            break;
1679         }
1680         default:
1681            /* We should only be caching info for the above 3 cases */
1682           tl_assert(0);
1683      } /* switch (inv->tag) */
1684 
1685      { /* and actually add this to the cache, finally */
1686        Word i;
1687        Word ip = cache->nInUse / 2; /* doesn't seem critical */
1688 
1689        if (cache->nInUse < N_QCACHE)
1690           cache->nInUse++;
1691        for (i = cache->nInUse-1; i > ip; i--) {
1692           cache->elems[i] = cache->elems[i-1];
1693        }
1694 
1695        tl_assert(toadd_szB > 0);
1696        cache->elems[ip].addr = toadd_addr;
1697        cache->elems[ip].szB  = toadd_szB;
1698        cache->elems[ip].inv  = *inv;
1699      }
1700 
1701      if (show) QCache__pp(cache, "after upd");
1702 
1703    }
1704 }
1705 
1706 
1707 /* CALLED FROM GENERATED CODE */
1708 static
1709 VG_REGPARM(3)
helperc__mem_access(Addr ea,Addr sp,Addr fp,Word sszB,Addr ip,XArray * ip_frameBlocks)1710 void helperc__mem_access ( /* Known only at run time: */
1711                            Addr ea, Addr sp, Addr fp,
1712                            /* Known at translation time: */
1713                            Word sszB, Addr ip, XArray* ip_frameBlocks )
1714 {
1715    UWord szB;
1716    IInstance* iinstance;
1717    Invar* inv;
1718    Invar new_inv;
1719    ThreadId tid = VG_(get_running_tid)();
1720    StackFrame* frame;
1721    HChar bufE[128], bufA[128];
1722 
1723    stats__total_accesses++;
1724 
1725    tl_assert(is_sane_TId(tid));
1726    frame = shadowStacks[tid];
1727    tl_assert(frame);
1728 
1729    /* Find the instance info for this instruction. */
1730    tl_assert(ip_frameBlocks);
1731    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1732    tl_assert(iinstance);
1733    tl_assert(iinstance->blocks == ip_frameBlocks);
1734 
1735    szB = (sszB < 0) ? (-sszB) : sszB;
1736    tl_assert(szB > 0);
1737 
1738    inv = &iinstance->invar;
1739 
1740    /* Deal with first uses of instruction instances. */
1741    if (inv->tag == Inv_Unset) {
1742       /* This is the first use of this instance of the instruction, so
1743          we can't make any check; we merely record what we saw, so we
1744          can compare it against what happens for 2nd and subsequent
1745          accesses. */
1746       classify_address( inv,
1747                         tid, ea, sp, fp, szB, iinstance->blocks );
1748       tl_assert(inv->tag != Inv_Unset);
1749       return;
1750    }
1751 
1752    /* So generate an Invar and see if it's different from what
1753       we had before. */
1754    classify_address( &new_inv,
1755                      tid, ea, sp, fp, szB, iinstance->blocks );
1756    tl_assert(new_inv.tag != Inv_Unset);
1757 
1758    /* Did we see something different from before?  If no, then there's
1759       no error. */
1760    if (eq_Invar(&new_inv, inv))
1761       return;
1762 
1763    tl_assert(inv->tag != Inv_Unset);
1764 
1765    VG_(memset)(bufE, 0, sizeof(bufE));
1766    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1767 
1768    VG_(memset)(bufA, 0, sizeof(bufA));
1769    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1770 
1771    sg_record_error_SorG( tid, ea, sszB, bufE, bufA );
1772 
1773    /* And now install the new observation as "standard", so as to
1774       make future error messages make more sense. */
1775    *inv = new_inv;
1776 }
1777 
1778 
1779 ////////////////////////////////////////
1780 /* Primary push-a-new-frame routine.  Called indirectly from
1781    generated code. */
1782 
1783 static UWord stats__max_sitree_size = 0;
1784 static UWord stats__max_gitree_size = 0;
1785 
1786 static
shadowStack_new_frame(ThreadId tid,Addr sp_at_call_insn,Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * descrs_at_call_insn)1787 void shadowStack_new_frame ( ThreadId tid,
1788                              Addr     sp_at_call_insn,
1789                              Addr     sp_post_call_insn,
1790                              Addr     fp_at_call_insn,
1791                              Addr     ip_post_call_insn,
1792                              XArray*  descrs_at_call_insn )
1793 {
1794    StackFrame *callee, *caller;
1795    tl_assert(is_sane_TId(tid));
1796 
1797    caller = shadowStacks[tid];
1798    tl_assert(caller);
1799 
1800    if (caller->outer) { /* "this is not the outermost frame" */
1801       tl_assert(caller->outer->inner == caller);
1802       tl_assert(caller->outer->depth >= 0);
1803       tl_assert(1 + caller->outer->depth == caller->depth);
1804    } else {
1805       tl_assert(caller->depth == 0);
1806    }
1807 
1808    caller->sp_at_call = sp_at_call_insn;
1809    caller->fp_at_call = fp_at_call_insn;
1810 
1811    if (descrs_at_call_insn) {
1812       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1813       caller->blocks_added_by_call
1814          = calculate_StackBlock_EAs( descrs_at_call_insn,
1815                                      sp_at_call_insn, fp_at_call_insn );
1816       if (caller->blocks_added_by_call)
1817          add_blocks_to_StackTree( siTrees[tid],
1818                                   descrs_at_call_insn,
1819                                   caller->blocks_added_by_call,
1820                                   caller->depth /* stack depth at which
1821                                                    these blocks are
1822                                                    considered to exist*/ );
1823       if (1) {
1824          UWord s  = VG_(sizeFM)( siTrees[tid] );
1825          UWord g  = VG_(sizeFM)( giTree );
1826          Bool  sb = s > stats__max_sitree_size;
1827          Bool  gb = g > stats__max_gitree_size;
1828          if (sb) stats__max_sitree_size = s;
1829          if (gb) stats__max_gitree_size = g;
1830          if (0 && (sb || gb))
1831             VG_(message)(Vg_DebugMsg,
1832                          "exp-sgcheck: new max tree sizes: "
1833                          "StackTree %ld, GlobalTree %ld\n",
1834                          stats__max_sitree_size, stats__max_gitree_size );
1835       }
1836    } else {
1837       caller->blocks_added_by_call = NULL;
1838    }
1839 
1840    /* caller->blocks_added_by_call is used again (and then freed) when
1841       this frame is removed from the stack. */
1842 
1843    if (caller->inner) {
1844       callee = caller->inner;
1845    } else {
1846       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1847       VG_(memset)(callee, 0, sizeof(StackFrame));
1848       callee->outer = caller;
1849       caller->inner = callee;
1850       callee->depth = 1 + caller->depth;
1851       tl_assert(callee->inner == NULL);
1852    }
1853 
1854    /* This sets up .htab, .htab_size and .htab_used */
1855    initialise_II_hash_table( callee );
1856 
1857    callee->creation_sp    = sp_post_call_insn;
1858    callee->sp_at_call     = 0; // not actually required ..
1859    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1860    callee->blocks_added_by_call = NULL; // .. just for cleanness
1861 
1862    /* record the new running stack frame */
1863    shadowStacks[tid] = callee;
1864 
1865    /* and this thread's query cache is now invalid */
1866    QCache__invalidate( &qcaches[tid] );
1867 
1868    if (0)
1869    { Word d = callee->depth;
1870      HChar fnname[80];
1871      Bool ok;
1872      Addr ip = ip_post_call_insn;
1873      ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1874      while (d > 0) {
1875         VG_(printf)(" ");
1876         d--;
1877      }
1878      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1879    }
1880 }
1881 
1882 /* CALLED FROM GENERATED CODE */
1883 static
1884 VG_REGPARM(3)
helperc__new_frame(Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * blocks_at_call_insn,Word sp_adjust)1885 void helperc__new_frame ( Addr sp_post_call_insn,
1886                           Addr fp_at_call_insn,
1887                           Addr ip_post_call_insn,
1888                           XArray* blocks_at_call_insn,
1889                           Word sp_adjust )
1890 {
1891    ThreadId tid = VG_(get_running_tid)();
1892    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1893    shadowStack_new_frame( tid,
1894                           sp_at_call_insn,
1895                           sp_post_call_insn,
1896                           fp_at_call_insn,
1897                           ip_post_call_insn,
1898                           blocks_at_call_insn );
1899 }
1900 
1901 
1902 ////////////////////////////////////////
1903 /* Primary remove-frame(s) routine.  Called indirectly from
1904    generated code. */
1905 
1906 __attribute__((noinline))
shadowStack_unwind(ThreadId tid,Addr sp_now)1907 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1908 {
1909    StackFrame *innermost, *innermostOrig;
1910    tl_assert(is_sane_TId(tid));
1911    innermost = shadowStacks[tid];
1912    tl_assert(innermost);
1913    innermostOrig = innermost;
1914    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1915    while (1) {
1916       if (!innermost->outer)
1917          break;
1918       if (innermost->inner)
1919          tl_assert(innermost->inner->outer == innermost);
1920       tl_assert(innermost->outer->inner == innermost);
1921       tl_assert(innermost->blocks_added_by_call == NULL);
1922       if (sp_now <= innermost->creation_sp) break;
1923       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1924       tl_assert(innermost->htab);
1925       if (innermost->htab != &innermost->htab_fixed[0])
1926          sg_free(innermost->htab);
1927       /* be on the safe side */
1928       innermost->creation_sp = 0;
1929       innermost->htab = NULL;
1930       innermost->htab_size = 0;
1931       innermost->htab_used = 0;
1932       innermost->sp_at_call = 0;
1933       innermost->fp_at_call = 0;
1934       innermost->blocks_added_by_call = NULL;
1935       innermost = innermost->outer;
1936 
1937       /* So now we're "back" in the calling frame.  Remove from this
1938          thread's stack-interval-tree, the blocks added at the time of
1939          the call. */
1940 
1941       if (innermost->outer) { /* not at the outermost frame */
1942          if (innermost->blocks_added_by_call == NULL) {
1943          } else {
1944             del_blocks_from_StackTree( siTrees[tid],
1945                                        innermost->blocks_added_by_call );
1946             VG_(deleteXA)( innermost->blocks_added_by_call );
1947             innermost->blocks_added_by_call = NULL;
1948          }
1949       }
1950       /* That completes the required tidying of the interval tree
1951          associated with the frame we just removed. */
1952 
1953       if (0) {
1954          Word d = innermost->depth;
1955          while (d > 0) {
1956             VG_(printf)(" ");
1957             d--;
1958          }
1959          VG_(printf)("X\n");
1960       }
1961 
1962    }
1963 
1964    tl_assert(innermost);
1965 
1966    if (innermost != innermostOrig) {
1967       shadowStacks[tid] = innermost;
1968       /* this thread's query cache is now invalid */
1969       QCache__invalidate( &qcaches[tid] );
1970    }
1971 }
1972 
1973 
1974 
1975 //////////////////////////////////////////////////////////////
1976 //                                                          //
1977 // Instrumentation                                          //
1978 //                                                          //
1979 //////////////////////////////////////////////////////////////
1980 
1981 /* What does instrumentation need to do?
1982 
1983    - at each Call transfer, generate a call to shadowStack_new_frame
1984      do this by manually inspecting the IR
1985 
1986    - at each sp change, if the sp change is negative,
1987      call shadowStack_unwind
1988      do this by asking for SP-change analysis
1989 
1990    - for each memory referencing instruction,
1991      call helperc__mem_access
1992 */
1993 
1994 /* A complication: sg_ instrumentation and h_ instrumentation need to
1995    be interleaved.  Since the latter is a lot more complex than the
1996    former, we split the sg_ instrumentation here into four functions
1997    and let the h_ instrumenter call the four functions as it goes.
1998    Hence the h_ instrumenter drives the sg_ instrumenter.
1999 
2000    To make this viable, the sg_ instrumenter carries what running
2001    state it needs in 'struct _SGEnv'.  This is exported only
2002    abstractly from this file.
2003 */
2004 
2005 struct _SGEnv {
2006    /* the current insn's IP */
2007    Addr64 curr_IP;
2008    /* whether the above is actually known */
2009    Bool curr_IP_known;
2010    /* if we find a mem ref, is it the first for this insn?  Used for
2011       detecting insns which make more than one memory ref, a situation
2012       we basically can't really handle properly; and so we ignore all
2013       but the first ref. */
2014    Bool firstRef;
2015    /* READONLY */
2016    IRTemp (*newIRTemp_cb)(IRType,void*);
2017    void* newIRTemp_opaque;
2018 };
2019 
2020 
2021 /* --- Helper fns for instrumentation --- */
2022 
gen_Get_SP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2023 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2024                            IRSB*           bbOut,
2025                            VexGuestLayout* layout,
2026                            Int             hWordTy_szB )
2027 {
2028    IRExpr* sp_expr;
2029    IRTemp  sp_temp;
2030    IRType  sp_type;
2031    /* This in effect forces the host and guest word sizes to be the
2032       same. */
2033    tl_assert(hWordTy_szB == layout->sizeof_SP);
2034    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2035    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2036    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2037    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2038    return sp_temp;
2039 }
2040 
gen_Get_FP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2041 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2042                            IRSB*           bbOut,
2043                            VexGuestLayout* layout,
2044                            Int             hWordTy_szB )
2045 {
2046    IRExpr* fp_expr;
2047    IRTemp  fp_temp;
2048    IRType  fp_type;
2049    /* This in effect forces the host and guest word sizes to be the
2050       same. */
2051    tl_assert(hWordTy_szB == layout->sizeof_SP);
2052    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2053    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2054    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2055    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2056    return fp_temp;
2057 }
2058 
instrument_mem_access(struct _SGEnv * sge,IRSB * bbOut,IRExpr * addr,Int szB,Bool isStore,Int hWordTy_szB,Addr curr_IP,VexGuestLayout * layout)2059 static void instrument_mem_access ( struct _SGEnv* sge,
2060                                     IRSB*   bbOut,
2061                                     IRExpr* addr,
2062                                     Int     szB,
2063                                     Bool    isStore,
2064                                     Int     hWordTy_szB,
2065                                     Addr    curr_IP,
2066                                     VexGuestLayout* layout )
2067 {
2068    IRType  tyAddr      = Ity_INVALID;
2069    XArray* frameBlocks = NULL;
2070 
2071    tl_assert(isIRAtom(addr));
2072    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2073 
2074    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2075    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2076 
2077 #if defined(VGA_x86)
2078    { UChar* p = (UChar*)curr_IP;
2079      // pop %ebp; RET
2080      if (p[-1] == 0x5d && p[0] == 0xc3) return;
2081      // pop %ebp; RET $imm16
2082      if (p[-1] == 0x5d && p[0] == 0xc2) return;
2083      // PUSH %EBP; mov %esp,%ebp
2084      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2085    }
2086 #endif
2087 
2088    /* First off, find or create the StackBlocks for this instruction. */
2089    frameBlocks = get_StackBlocks_for_IP( curr_IP );
2090    tl_assert(frameBlocks);
2091    //if (VG_(sizeXA)( frameBlocks ) == 0)
2092    //   frameBlocks = NULL;
2093 
2094    /* Generate a call to "helperc__mem_access", passing:
2095          addr current_SP current_FP szB curr_IP frameBlocks
2096    */
2097    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2098      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2099      IRExpr** args
2100         = mkIRExprVec_6( addr,
2101                          IRExpr_RdTmp(t_SP),
2102                          IRExpr_RdTmp(t_FP),
2103                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
2104                          mkIRExpr_HWord( curr_IP ),
2105                          mkIRExpr_HWord( (HWord)frameBlocks ) );
2106      IRDirty* di
2107         = unsafeIRDirty_0_N( 3/*regparms*/,
2108                              "helperc__mem_access",
2109                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2110                              args );
2111 
2112      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2113    }
2114 }
2115 
2116 
2117 /* --- Instrumentation main (4 fns) --- */
2118 
sg_instrument_init(IRTemp (* newIRTemp_cb)(IRType,void *),void * newIRTemp_opaque)2119 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2120                                      void* newIRTemp_opaque )
2121 {
2122    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2123                                    sizeof(struct _SGEnv));
2124    tl_assert(env);
2125    env->curr_IP          = 0;
2126    env->curr_IP_known    = False;
2127    env->firstRef         = True;
2128    env->newIRTemp_cb     = newIRTemp_cb;
2129    env->newIRTemp_opaque = newIRTemp_opaque;
2130    return env;
2131 }
2132 
sg_instrument_fini(struct _SGEnv * env)2133 void sg_instrument_fini ( struct _SGEnv * env )
2134 {
2135    sg_free(env);
2136 }
2137 
2138 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2139    as required.  This must be called before 'st' itself is added to
2140    'sbOut'. */
sg_instrument_IRStmt(struct _SGEnv * env,IRSB * sbOut,IRStmt * st,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2141 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2142                             /*MOD*/IRSB* sbOut,
2143                             IRStmt* st,
2144                             VexGuestLayout* layout,
2145                             IRType gWordTy, IRType hWordTy )
2146 {
2147    if (!sg_clo_enable_sg_checks)
2148       return;
2149 
2150    tl_assert(st);
2151    tl_assert(isFlatIRStmt(st));
2152    switch (st->tag) {
2153       case Ist_NoOp:
2154       case Ist_AbiHint:
2155       case Ist_Put:
2156       case Ist_PutI:
2157       case Ist_MBE:
2158          /* None of these can contain any memory references. */
2159          break;
2160 
2161       case Ist_Exit:
2162          tl_assert(st->Ist.Exit.jk != Ijk_Call);
2163          /* else we must deal with a conditional call */
2164          break;
2165 
2166       case Ist_IMark:
2167          env->curr_IP_known = True;
2168          env->curr_IP       = (Addr)st->Ist.IMark.addr;
2169          env->firstRef      = True;
2170          break;
2171 
2172       case Ist_Store:
2173          tl_assert(env->curr_IP_known);
2174          if (env->firstRef) {
2175             instrument_mem_access(
2176                env, sbOut,
2177                st->Ist.Store.addr,
2178                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2179                True/*isStore*/,
2180                sizeofIRType(hWordTy),
2181                env->curr_IP, layout
2182             );
2183             env->firstRef = False;
2184          }
2185          break;
2186 
2187       case Ist_WrTmp: {
2188          IRExpr* data = st->Ist.WrTmp.data;
2189          if (data->tag == Iex_Load) {
2190             tl_assert(env->curr_IP_known);
2191             if (env->firstRef) {
2192                instrument_mem_access(
2193                   env, sbOut,
2194                   data->Iex.Load.addr,
2195                   sizeofIRType(data->Iex.Load.ty),
2196                   False/*!isStore*/,
2197                   sizeofIRType(hWordTy),
2198                   env->curr_IP, layout
2199                );
2200                env->firstRef = False;
2201             }
2202          }
2203          break;
2204       }
2205 
2206       case Ist_Dirty: {
2207          Int      dataSize;
2208          IRDirty* d = st->Ist.Dirty.details;
2209          if (d->mFx != Ifx_None) {
2210             /* This dirty helper accesses memory.  Collect the
2211                details. */
2212             tl_assert(env->curr_IP_known);
2213             if (env->firstRef) {
2214                tl_assert(d->mAddr != NULL);
2215                tl_assert(d->mSize != 0);
2216                dataSize = d->mSize;
2217                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2218                   instrument_mem_access(
2219                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2220                      sizeofIRType(hWordTy), env->curr_IP, layout
2221                   );
2222                }
2223                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2224                   instrument_mem_access(
2225                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2226                      sizeofIRType(hWordTy), env->curr_IP, layout
2227                   );
2228                }
2229                env->firstRef = False;
2230             }
2231          } else {
2232             tl_assert(d->mAddr == NULL);
2233             tl_assert(d->mSize == 0);
2234          }
2235          break;
2236       }
2237 
2238       case Ist_CAS: {
2239          /* We treat it as a read and a write of the location.  I
2240             think that is the same behaviour as it was before IRCAS
2241             was introduced, since prior to that point, the Vex front
2242             ends would translate a lock-prefixed instruction into a
2243             (normal) read followed by a (normal) write. */
2244          if (env->firstRef) {
2245             Int    dataSize;
2246             IRCAS* cas = st->Ist.CAS.details;
2247             tl_assert(cas->addr != NULL);
2248             tl_assert(cas->dataLo != NULL);
2249             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2250             if (cas->dataHi != NULL)
2251                dataSize *= 2; /* since it's a doubleword-CAS */
2252             instrument_mem_access(
2253                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2254                sizeofIRType(hWordTy), env->curr_IP, layout
2255             );
2256             instrument_mem_access(
2257                env, sbOut, cas->addr, dataSize, True/*isStore*/,
2258                sizeofIRType(hWordTy), env->curr_IP, layout
2259             );
2260             env->firstRef = False;
2261          }
2262          break;
2263       }
2264 
2265       default:
2266          tl_assert(0);
2267 
2268    } /* switch (st->tag) */
2269 }
2270 
2271 
2272 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2273    possibly modify 'env' as required.  This must be the last
2274    instrumentation statement in the block. */
sg_instrument_final_jump(struct _SGEnv * env,IRSB * sbOut,IRExpr * next,IRJumpKind jumpkind,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2275 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2276                                 /*MOD*/IRSB* sbOut,
2277                                 IRExpr* next,
2278                                 IRJumpKind jumpkind,
2279                                 VexGuestLayout* layout,
2280                                 IRType gWordTy, IRType hWordTy )
2281 {
2282    if (!sg_clo_enable_sg_checks)
2283       return;
2284 
2285    if (jumpkind == Ijk_Call) {
2286       // Assumes x86 or amd64
2287       IRTemp   sp_post_call_insn, fp_post_call_insn;
2288       XArray*  frameBlocks;
2289       IRExpr** args;
2290       IRDirty* di;
2291       sp_post_call_insn
2292          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2293       fp_post_call_insn
2294          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2295       tl_assert(env->curr_IP_known);
2296       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2297       tl_assert(frameBlocks);
2298       if (VG_(sizeXA)(frameBlocks) == 0)
2299          frameBlocks = NULL;
2300       args
2301          = mkIRExprVec_5(
2302               IRExpr_RdTmp(sp_post_call_insn),
2303               IRExpr_RdTmp(fp_post_call_insn),
2304                          /* assume the call doesn't change FP */
2305               next,
2306               mkIRExpr_HWord( (HWord)frameBlocks ),
2307               mkIRExpr_HWord( sizeofIRType(gWordTy) )
2308            );
2309       di = unsafeIRDirty_0_N(
2310               3/*regparms*/,
2311               "helperc__new_frame",
2312               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2313               args );
2314       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2315    }
2316 }
2317 
2318 
2319 //////////////////////////////////////////////////////////////
2320 //                                                          //
2321 // end Instrumentation                                      //
2322 //                                                          //
2323 //////////////////////////////////////////////////////////////
2324 
2325 
2326 //////////////////////////////////////////////////////////////
2327 //                                                          //
2328 // misc                                                     //
2329 //                                                          //
2330 //////////////////////////////////////////////////////////////
2331 
2332 /* Make a new empty stack frame that is suitable for being the
2333    outermost frame in a stack.  It has a creation_sp of effectively
2334    infinity, so it can never be removed. */
new_root_StackFrame(void)2335 static StackFrame* new_root_StackFrame ( void )
2336 {
2337    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2338    VG_(memset)( sframe, 0, sizeof(*sframe) );
2339    sframe->creation_sp = ~0UL;
2340 
2341    /* This sets up .htab, .htab_size and .htab_used */
2342    initialise_II_hash_table( sframe );
2343 
2344    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2345 
2346    return sframe;
2347 }
2348 
2349 /* Primary routine for setting up the shadow stack for a new thread.
2350    Note that this is used to create not only child thread stacks, but
2351    the root thread's stack too.  We create a new stack with
2352    .creation_sp set to infinity, so that the outermost frame can never
2353    be removed (by shadowStack_unwind).  The core calls this function
2354    as soon as a thread is created.  We cannot yet get its SP value,
2355    since that may not yet be set. */
shadowStack_thread_create(ThreadId parent,ThreadId child)2356 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2357 {
2358    tl_assert(is_sane_TId(child));
2359    if (parent == VG_INVALID_THREADID) {
2360       /* creating the main thread's stack */
2361    } else {
2362       tl_assert(is_sane_TId(parent));
2363       tl_assert(parent != child);
2364       tl_assert(shadowStacks[parent] != NULL);
2365       tl_assert(siTrees[parent] != NULL);
2366    }
2367 
2368    /* Create the child's stack.  Bear in mind we may be re-using
2369       it. */
2370    if (shadowStacks[child] == NULL) {
2371       /* First use of this stack.  Just allocate an initial frame. */
2372       tl_assert(siTrees[child] == NULL);
2373    } else {
2374       StackFrame *frame, *frame2;
2375       /* re-using a stack. */
2376       /* get rid of the interval tree */
2377       tl_assert(siTrees[child] != NULL);
2378       delete_StackTree( siTrees[child] );
2379       siTrees[child] = NULL;
2380       /* Throw away all existing frames. */
2381       frame = shadowStacks[child];
2382       while (frame->outer)
2383          frame = frame->outer;
2384       tl_assert(frame->depth == 0);
2385       while (frame) {
2386          frame2 = frame->inner;
2387          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2388          sg_free(frame);
2389          frame = frame2;
2390       }
2391       shadowStacks[child] = NULL;
2392    }
2393 
2394    tl_assert(shadowStacks[child] == NULL);
2395    tl_assert(siTrees[child] == NULL);
2396 
2397    /* Set up the initial stack frame. */
2398    shadowStacks[child] = new_root_StackFrame();
2399 
2400    /* and set up the child's stack block interval tree. */
2401    siTrees[child] = new_StackTree();
2402 }
2403 
2404 /* Once a thread is ready to go, the core calls here.  We take the
2405    opportunity to push a second frame on its stack, with the
2406    presumably valid SP value that is going to be used for the thread's
2407    startup.  Hence we should always wind up with a valid outermost
2408    frame for the thread. */
shadowStack_set_initial_SP(ThreadId tid)2409 static void shadowStack_set_initial_SP ( ThreadId tid )
2410 {
2411    StackFrame* sf;
2412    tl_assert(is_sane_TId(tid));
2413    sf = shadowStacks[tid];
2414    tl_assert(sf != NULL);
2415    tl_assert(sf->outer == NULL);
2416    tl_assert(sf->inner == NULL);
2417    tl_assert(sf->creation_sp == ~0UL);
2418    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2419                                0, VG_(get_IP)(tid), NULL );
2420 }
2421 
2422 
2423 //////////////////////////////////////////////////////////////
2424 //                                                          //
2425 // main-ish                                                 //
2426 //                                                          //
2427 //////////////////////////////////////////////////////////////
2428 
2429 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2430    sp-change analysis, as requested in pc_pre_clo_int(). */
sg_die_mem_stack(Addr old_SP,SizeT len)2431 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2432    ThreadId  tid = VG_(get_running_tid)();
2433    shadowStack_unwind( tid, old_SP+len );
2434 }
2435 
sg_pre_clo_init(void)2436 void sg_pre_clo_init ( void ) {
2437    ourGlobals_init();
2438    init_StackBlocks_set();
2439    init_GlobalBlock_set();
2440 }
2441 
sg_post_clo_init(void)2442 void sg_post_clo_init ( void ) {
2443 }
2444 
sg_pre_thread_ll_create(ThreadId parent,ThreadId child)2445 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2446    shadowStack_thread_create(parent, child);
2447 }
2448 
sg_pre_thread_first_insn(ThreadId tid)2449 void sg_pre_thread_first_insn ( ThreadId tid ) {
2450    shadowStack_set_initial_SP(tid);
2451 }
2452 
sg_fini(Int exitcode)2453 void sg_fini(Int exitcode)
2454 {
2455    if (VG_(clo_stats)) {
2456       VG_(message)(Vg_DebugMsg,
2457          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2458       VG_(message)(Vg_DebugMsg,
2459          " sg_:     stack0: %'12llu classify\n",
2460          stats__classify_Stack0);
2461       VG_(message)(Vg_DebugMsg,
2462          " sg_:     stackN: %'12llu classify\n",
2463          stats__classify_StackN);
2464       VG_(message)(Vg_DebugMsg,
2465          " sg_:     global: %'12llu classify\n",
2466          stats__classify_Global);
2467       VG_(message)(Vg_DebugMsg,
2468          " sg_:    unknown: %'12llu classify\n",
2469          stats__classify_Unknown);
2470       VG_(message)(Vg_DebugMsg,
2471          " sg_:  %'llu Invars preened, of which %'llu changed\n",
2472          stats__Invars_preened, stats__Invars_changed);
2473       VG_(message)(Vg_DebugMsg,
2474          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2475       VG_(message)(Vg_DebugMsg,
2476          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2477          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2478       VG_(message)(Vg_DebugMsg,
2479          " sg_:  htab-fast: %'llu hits\n",
2480          stats__htab_fast);
2481       VG_(message)(Vg_DebugMsg,
2482          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2483          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2484    }
2485 }
2486 
2487 
2488 /*--------------------------------------------------------------------*/
2489 /*--- end                                                sg_main.c ---*/
2490 /*--------------------------------------------------------------------*/
2491