• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
4 /*--- This file checks stack and global array accesses.            ---*/
5 /*---                                                    sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
10    use in programs.
11 
12    Copyright (C) 2008-2013 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
50 
51 #include "pc_common.h"
52 
53 #include "sg_main.h"      // self
54 
55 
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58 
59 
60 //////////////////////////////////////////////////////////////
61 //                                                          //
62 // Basic Stuff                                              //
63 //                                                          //
64 //////////////////////////////////////////////////////////////
65 
is_sane_TId(ThreadId tid)66 static inline Bool is_sane_TId ( ThreadId tid )
67 {
68    return tid >= 0 && tid < VG_N_THREADS
69           && tid != VG_INVALID_THREADID;
70 }
71 
sg_malloc(const HChar * cc,SizeT n)72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
73    void* p;
74    tl_assert(n > 0);
75    p = VG_(malloc)( cc, n );
76    tl_assert(p);
77    return p;
78 }
79 
sg_free(void * p)80 static void sg_free ( void* p ) {
81    tl_assert(p);
82    VG_(free)(p);
83 }
84 
85 
86 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
87    first interval is lower, 1 if the first interval is higher, and 0
88    if there is any overlap.  Redundant paranoia with casting is there
89    following what looked distinctly like a bug in gcc-4.1.2, in which
90    some of the comparisons were done signedly instead of
91    unsignedly. */
92 inline
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)93 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94                                      Addr a2, SizeT n2 ) {
95    UWord a1w = (UWord)a1;
96    UWord n1w = (UWord)n1;
97    UWord a2w = (UWord)a2;
98    UWord n2w = (UWord)n2;
99    tl_assert(n1w > 0 && n2w > 0);
100    if (a1w + n1w <= a2w) return -1L;
101    if (a2w + n2w <= a1w) return 1L;
102    return 0;
103 }
104 
105 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106    within [aBig,aBig+nBig). */
107 inline
is_subinterval_of(Addr aBig,SizeT nBig,Addr aSmall,SizeT nSmall)108 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109                                 Addr aSmall, SizeT nSmall ) {
110    tl_assert(nBig > 0 && nSmall > 0);
111    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112 }
113 
114 inline
Addr__min(Addr a1,Addr a2)115 static Addr Addr__min ( Addr a1, Addr a2 ) {
116    return a1 < a2 ? a1 : a2;
117 }
118 
119 inline
Addr__max(Addr a1,Addr a2)120 static Addr Addr__max ( Addr a1, Addr a2 ) {
121    return a1 < a2 ? a2 : a1;
122 }
123 
124 
125 //////////////////////////////////////////////////////////////
126 //                                                          //
127 // StackBlocks Persistent Cache                             //
128 //                                                          //
129 //////////////////////////////////////////////////////////////
130 
131 /* We maintain a set of XArray* of StackBlocks.  These are never
132    freed.  When a new StackBlock vector is acquired from
133    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134    If not present, it is added.  If present, the just-acquired one is
135    freed and the copy used.
136 
137    This simplifies storage management elsewhere.  It allows us to
138    assume that a pointer to an XArray* of StackBlock is valid forever.
139    It also means there are no duplicates anywhere, which could be
140    important from a space point of view for programs that generate a
141    lot of translations, or where translations are frequently discarded
142    and re-made.
143 
144    Note that we normalise the arrays by sorting the elements according
145    to an arbitrary total order, so as to avoid the situation that two
146    vectors describe the same set of variables but are not structurally
147    identical. */
148 
StackBlock__sane(const StackBlock * fb)149 static inline Bool StackBlock__sane ( const StackBlock* fb )
150 {
151    if (fb->name[ sizeof(fb->name)-1 ] != 0)
152       return False;
153    if (fb->spRel != False && fb->spRel != True)
154       return False;
155    if (fb->isVec != False && fb->isVec != True)
156       return False;
157    return True;
158 }
159 
160 /* Generate an arbitrary total ordering on StackBlocks. */
StackBlock__cmp(const StackBlock * fb1,const StackBlock * fb2)161 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
162 {
163    Word r;
164    tl_assert(StackBlock__sane(fb1));
165    tl_assert(StackBlock__sane(fb2));
166    /* Hopefully the .base test hits most of the time.  For the blocks
167       associated with any particular instruction, if the .base values
168       are the same then probably it doesn't make sense for the other
169       fields to be different.  But this is supposed to be a completely
170       general structural total order, so we have to compare everything
171       anyway. */
172    if (fb1->base < fb2->base) return -1;
173    if (fb1->base > fb2->base) return 1;
174    /* compare sizes */
175    if (fb1->szB < fb2->szB) return -1;
176    if (fb1->szB > fb2->szB) return 1;
177    /* compare sp/fp flag */
178    if (fb1->spRel < fb2->spRel) return -1;
179    if (fb1->spRel > fb2->spRel) return 1;
180    /* compare is/is-not array-typed flag */
181    if (fb1->isVec < fb2->isVec) return -1;
182    if (fb1->isVec > fb2->isVec) return 1;
183    /* compare the name */
184    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185    return r;
186 }
187 
188 /* Returns True if all fields except .szB are the same.  szBs may or
189    may not be the same; they are simply not consulted. */
StackBlock__all_fields_except_szB_are_equal(StackBlock * fb1,StackBlock * fb2)190 static Bool StackBlock__all_fields_except_szB_are_equal (
191                StackBlock* fb1,
192                StackBlock* fb2
193             )
194 {
195    tl_assert(StackBlock__sane(fb1));
196    tl_assert(StackBlock__sane(fb2));
197    return fb1->base == fb2->base
198           && fb1->spRel == fb2->spRel
199           && fb1->isVec == fb2->isVec
200           && 0 == VG_(strcmp)(fb1->name, fb2->name);
201 }
202 
203 
204 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
StackBlocks__cmp(XArray * fb1s,XArray * fb2s)205 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206 {
207    Word i, r, n1, n2;
208    n1 = VG_(sizeXA)( fb1s );
209    n2 = VG_(sizeXA)( fb2s );
210    if (n1 < n2) return -1;
211    if (n1 > n2) return 1;
212    for (i = 0; i < n1; i++) {
213       StackBlock *fb1, *fb2;
214       fb1 = VG_(indexXA)( fb1s, i );
215       fb2 = VG_(indexXA)( fb2s, i );
216       r = StackBlock__cmp( fb1, fb2 );
217       if (r != 0) return r;
218    }
219    tl_assert(i == n1 && i == n2);
220    return 0;
221 }
222 
pp_StackBlocks(XArray * sbs)223 static void pp_StackBlocks ( XArray* sbs )
224 {
225    Word i, n = VG_(sizeXA)( sbs );
226    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
227    for (i = 0; i < n; i++) {
228       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
229       VG_(message)(Vg_DebugMsg,
230          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
231          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
232          sb->isVec ? 'Y' : 'N', &sb->name[0]
233       );
234    }
235    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
236 }
237 
238 
239 /* ---------- The StackBlock vector cache ---------- */
240 
241 static WordFM* /* XArray* of StackBlock -> nothing */
242        frameBlocks_set = NULL;
243 
init_StackBlocks_set(void)244 static void init_StackBlocks_set ( void )
245 {
246    tl_assert(!frameBlocks_set);
247    frameBlocks_set
248       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
249                     (Word(*)(UWord,UWord))StackBlocks__cmp );
250    tl_assert(frameBlocks_set);
251 }
252 
253 /* Find the given StackBlock-vector in our collection thereof.  If
254    found, deallocate the supplied one, and return the address of the
255    copy.  If not found, add the supplied one to our collection and
256    return its address. */
257 static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add(XArray * orig)258        StackBlocks__find_and_dealloc__or_add
259           ( XArray* /* of StackBlock */ orig )
260 {
261    UWord key, val;
262 
263    /* First, normalise, as per comments above. */
264    VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
265    VG_(sortXA)( orig );
266 
267    /* Now get rid of any exact duplicates. */
268   nuke_dups:
269    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
270      if (n >= 2) {
271         w = 0;
272         nEQ = 0;
273         for (r = 0; r < n; r++) {
274            if (r+1 < n) {
275               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
276               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
277               Word c = StackBlock__cmp(pR0,pR1);
278               tl_assert(c == -1 || c == 0);
279               if (c == 0) { nEQ++; continue; }
280            }
281            if (w != r) {
282               StackBlock* pW = VG_(indexXA)( orig, w );
283               StackBlock* pR = VG_(indexXA)( orig, r );
284               *pW = *pR;
285            }
286            w++;
287         }
288         tl_assert(r == n);
289         tl_assert(w + nEQ == n);
290         if (w < n) {
291            VG_(dropTailXA)( orig, n-w );
292         }
293         if (0) VG_(printf)("delta %ld\n", n-w);
294      }
295    }
296 
297    /* Deal with the following strangeness, where two otherwise
298       identical blocks are claimed to have different sizes.  In which
299       case we use the larger size. */
300    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
301       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
302       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303    */
304    { Word i, n = VG_(sizeXA)( orig );
305      if (n >= 2) {
306         for (i = 0; i < n-1; i++) {
307            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
308            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
309            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
310               /* They can't be identical because the previous tidying
311                  pass would have removed the duplicates.  And they
312                  can't be > because the earlier sorting pass would
313                  have ordered otherwise-identical descriptors
314                  according to < on .szB fields.  Hence: */
315               tl_assert(sb0->szB < sb1->szB);
316               sb0->szB = sb1->szB;
317               /* This makes the blocks identical, at the size of the
318                  larger one.  Rather than go to all the hassle of
319                  sliding the rest down, simply go back to the
320                  remove-duplicates stage.  The assertion guarantees
321                  that we eventually make progress, since the rm-dups
322                  stage will get rid of one of the blocks.  This is
323                  expected to happen only exceedingly rarely. */
324               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
325               goto nuke_dups;
326            }
327         }
328      }
329    }
330 
331    /* If there are any blocks which overlap and have the same
332       fpRel-ness, junk the whole descriptor; it's obviously bogus.
333       Icc11 certainly generates bogus info from time to time.
334 
335       This check is pretty weak; really we ought to have a stronger
336       sanity check. */
337    { Word i, n = VG_(sizeXA)( orig );
338      static Int moans = 3;
339      for (i = 0; i < n-1; i++) {
340        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
341        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
342        if (sb1->spRel == sb2->spRel
343            && (sb1->base >= sb2->base
344                || sb1->base + sb1->szB > sb2->base)) {
345           if (moans > 0 && !VG_(clo_xml)) {
346              moans--;
347              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
348                                       "overlapping stack blocks\n");
349              if (VG_(clo_verbosity) >= 2)
350                 pp_StackBlocks(orig);
351              if (moans == 0)
352                 VG_(message)(Vg_UserMsg, "Further instances of this "
353                                          "message will not be shown\n" );
354           }
355           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
356           break;
357        }
358      }
359    }
360 
361    /* Now, do we have it already? */
362    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
363       /* yes */
364       XArray* res;
365       tl_assert(val == 0);
366       tl_assert(key != (UWord)orig);
367       VG_(deleteXA)(orig);
368       res = (XArray*)key;
369       return res;
370    } else {
371       /* no */
372       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
373       return orig;
374    }
375 }
376 
377 /* Top level function for getting the StackBlock vector for a given
378    instruction.  It is guaranteed that the returned pointer will be
379    valid for the entire rest of the run, and also that the addresses
380    of the individual elements of the array will not change. */
381 
get_StackBlocks_for_IP(Addr ip)382 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
383 {
384    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
385    tl_assert(blocks);
386    return StackBlocks__find_and_dealloc__or_add( blocks );
387 }
388 
389 
390 //////////////////////////////////////////////////////////////
391 //                                                          //
392 // GlobalBlocks Persistent Cache                            //
393 //                                                          //
394 //////////////////////////////////////////////////////////////
395 
396 /* Generate an arbitrary total ordering on GlobalBlocks. */
GlobalBlock__cmp(GlobalBlock * gb1,GlobalBlock * gb2)397 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
398 {
399    Word r;
400    /* compare addrs */
401    if (gb1->addr < gb2->addr) return -1;
402    if (gb1->addr > gb2->addr) return 1;
403    /* compare sizes */
404    if (gb1->szB < gb2->szB) return -1;
405    if (gb1->szB > gb2->szB) return 1;
406    /* compare is/is-not array-typed flag */
407    if (gb1->isVec < gb2->isVec) return -1;
408    if (gb1->isVec > gb2->isVec) return 1;
409    /* compare the name */
410    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
411    if (r != 0) return r;
412    /* compare the soname */
413    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
414    return r;
415 }
416 
417 static WordFM* /* GlobalBlock* -> nothing */
418        globalBlock_set = NULL;
419 
init_GlobalBlock_set(void)420 static void init_GlobalBlock_set ( void )
421 {
422    tl_assert(!globalBlock_set);
423     globalBlock_set
424        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
425                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
426    tl_assert(globalBlock_set);
427 }
428 
429 
430 /* Top level function for making GlobalBlocks persistent.  Call here
431    with a non-persistent version, and the returned one is guaranteed
432    to be valid for the entire rest of the run.  The supplied one is
433    copied, not stored, so can be freed after the call. */
434 
get_persistent_GlobalBlock(GlobalBlock * orig)435 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
436 {
437    UWord key, val;
438    /* Now, do we have it already? */
439    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
440       /* yes, return the copy */
441       GlobalBlock* res;
442       tl_assert(val == 0);
443       res = (GlobalBlock*)key;
444       tl_assert(res != orig);
445       return res;
446    } else {
447       /* no.  clone it, store the clone and return the clone's
448          address. */
449       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
450                                       sizeof(GlobalBlock) );
451       tl_assert(clone);
452       *clone = *orig;
453       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
454       return clone;
455    }
456 }
457 
458 
459 //////////////////////////////////////////////////////////////
460 //                                                          //
461 // Interval tree of StackTreeBlock                          //
462 //                                                          //
463 //////////////////////////////////////////////////////////////
464 
465 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
466    are not allowed.
467 
468    A stack interval tree is a (WordFM StackTreeNode* void).  There is
469    one stack interval tree for each thread.
470 */
471 typedef
472    struct {
473       Addr        addr;
474       SizeT       szB;   /* copied from .descr->szB */
475       StackBlock* descr; /* it's an instance of this block */
476       UWord       depth; /* depth of stack at time block was pushed */
477    }
478    StackTreeNode;
479 
pp_StackTree(WordFM * sitree,const HChar * who)480 static void pp_StackTree ( WordFM* sitree, const HChar* who )
481 {
482    UWord keyW, valW;
483    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
484    VG_(initIterFM)( sitree );
485    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
486       StackTreeNode* nd = (StackTreeNode*)keyW;
487       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
488                   nd->descr, nd->descr->name, nd->descr->szB);
489    }
490    VG_(printf)(">>> END   pp_StackTree %s\n", who );
491 }
492 
493 /* Interval comparison function for StackTreeNode */
cmp_intervals_StackTreeNode(StackTreeNode * sn1,StackTreeNode * sn2)494 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
495                                           StackTreeNode* sn2 )
496 {
497    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
498                                  sn2->addr, sn2->szB);
499 }
500 
501 /* Find the node holding 'a', if any. */
find_StackTreeNode(WordFM * sitree,Addr a)502 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
503 {
504    UWord keyW, valW;
505    StackTreeNode key;
506    tl_assert(sitree);
507    key.addr = a;
508    key.szB  = 1;
509    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
510       StackTreeNode* res = (StackTreeNode*)keyW;
511       tl_assert(valW == 0);
512       tl_assert(res != &key);
513       return res;
514    } else {
515       return NULL;
516    }
517 }
518 
519 /* Note that the supplied XArray of FrameBlock must have been
520    made persistent already. */
521 __attribute__((noinline))
add_blocks_to_StackTree(WordFM * sitree,XArray * descrs,XArray * bases,UWord depth)522 static void add_blocks_to_StackTree (
523                /*MOD*/WordFM* sitree,
524                XArray* /* FrameBlock */ descrs,
525                XArray* /* Addr */ bases,
526                UWord depth
527             )
528 {
529    Bool debug = (Bool)0;
530    Word i, nDescrs, nBases;
531 
532    nDescrs = VG_(sizeXA)( descrs ),
533    nBases = VG_(sizeXA)( bases );
534    tl_assert(nDescrs == nBases);
535 
536    if (nDescrs == 0) return;
537 
538    tl_assert(sitree);
539    if (debug) {
540       VG_(printf)("\ndepth = %lu\n", depth);
541       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
542       pp_StackBlocks(descrs);
543    }
544 
545    for (i = 0; i < nDescrs; i++) {
546       Bool already_present;
547       StackTreeNode* nyu;
548       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
549       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
550       tl_assert(descr->szB > 0);
551       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
552       nyu->addr  = addr;
553       nyu->szB   = descr->szB;
554       nyu->descr = descr;
555       nyu->depth = depth;
556       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
557       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
558       /* The interval can't already be there; else we have
559          overlapping stack blocks. */
560       tl_assert(!already_present);
561       if (debug) {
562          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
563       }
564    }
565    if (debug) {
566       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
567       VG_(printf)("\n");
568    }
569 }
570 
del_blocks_from_StackTree(WordFM * sitree,XArray * bases)571 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
572                                         XArray* /* Addr */ bases )
573 {
574    UWord oldK, oldV;
575    Word i, nBases = VG_(sizeXA)( bases );
576    for (i = 0; i < nBases; i++) {
577       Bool b;
578       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
579       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
580       /* The interval must be there; we added it earlier when
581          the associated frame was created. */
582       tl_assert(nd);
583       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
584       /* we just found the block! */
585       tl_assert(b);
586       tl_assert(oldV == 0);
587       tl_assert(nd == (StackTreeNode*)oldK);
588       sg_free(nd);
589    }
590 }
591 
592 
delete_StackTree__kFin(UWord keyW)593 static void delete_StackTree__kFin ( UWord keyW ) {
594    StackTreeNode* nd = (StackTreeNode*)keyW;
595    tl_assert(nd);
596    sg_free(nd);
597 }
delete_StackTree__vFin(UWord valW)598 static void delete_StackTree__vFin ( UWord valW ) {
599    tl_assert(valW == 0);
600 }
delete_StackTree(WordFM * sitree)601 static void delete_StackTree ( WordFM* sitree )
602 {
603    VG_(deleteFM)( sitree,
604                  delete_StackTree__kFin, delete_StackTree__vFin );
605 }
606 
new_StackTree(void)607 static WordFM* new_StackTree ( void ) {
608    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
609                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
610 }
611 
612 
613 //////////////////////////////////////////////////////////////
614 //                                                          //
615 // Interval tree of GlobalTreeBlock                         //
616 //                                                          //
617 //////////////////////////////////////////////////////////////
618 
619 /* A node in a global interval tree.  Zero length intervals
620    (.szB == 0) are not allowed.
621 
622    A global interval tree is a (WordFM GlobalTreeNode* void).  There
623    is one global interval tree for the entire process.
624 */
625 typedef
626    struct {
627       Addr         addr; /* copied from .descr->addr */
628       SizeT        szB; /* copied from .descr->szB */
629       GlobalBlock* descr; /* it's this block */
630    }
631    GlobalTreeNode;
632 
GlobalTreeNode__pp(GlobalTreeNode * nd)633 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
634    tl_assert(nd->descr);
635    VG_(printf)("GTNode [%#lx,+%ld) %s",
636                nd->addr, nd->szB, nd->descr->name);
637 }
638 
GlobalTree__pp(WordFM * gitree,const HChar * who)639 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
640                              const HChar* who )
641 {
642    UWord keyW, valW;
643    GlobalTreeNode* nd;
644    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
645    VG_(initIterFM)( gitree );
646    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
647       tl_assert(valW == 0);
648       nd = (GlobalTreeNode*)keyW;
649       VG_(printf)("  ");
650       GlobalTreeNode__pp(nd);
651       VG_(printf)("\n");
652    }
653    VG_(doneIterFM)( gitree );
654    VG_(printf)(">>>\n");
655 }
656 
657 /* Interval comparison function for GlobalTreeNode */
cmp_intervals_GlobalTreeNode(GlobalTreeNode * gn1,GlobalTreeNode * gn2)658 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
659                                            GlobalTreeNode* gn2 )
660 {
661    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
662                                   gn2->addr, gn2->szB );
663 }
664 
665 /* Find the node holding 'a', if any. */
find_GlobalTreeNode(WordFM * gitree,Addr a)666 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
667 {
668    UWord keyW, valW;
669    GlobalTreeNode key;
670    key.addr = a;
671    key.szB  = 1;
672    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
673       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
674       tl_assert(valW == 0);
675       tl_assert(res != &key);
676       return res;
677    } else {
678       return NULL;
679    }
680 }
681 
682 /* Note that the supplied GlobalBlock must have been made persistent
683    already. */
add_block_to_GlobalTree(WordFM * gitree,GlobalBlock * descr)684 static void add_block_to_GlobalTree (
685                /*MOD*/WordFM* gitree,
686                GlobalBlock* descr
687             )
688 {
689    Bool already_present;
690    GlobalTreeNode *nyu, *nd;
691    UWord keyW, valW;
692    static Int moans = 3;
693 
694    tl_assert(descr->szB > 0);
695    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
696    nyu->addr  = descr->addr;
697    nyu->szB   = descr->szB;
698    nyu->descr = descr;
699 
700    /* Basically it's an error to add a global block to the tree that
701       is already in the tree.  However, detect and ignore attempts to
702       insert exact duplicates; they do appear for some reason
703       (possible a bug in m_debuginfo?) */
704    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
705    if (already_present) {
706       tl_assert(valW == 0);
707       nd = (GlobalTreeNode*)keyW;
708       tl_assert(nd);
709       tl_assert(nd != nyu);
710       tl_assert(nd->descr);
711       tl_assert(nyu->descr);
712       if (nd->addr == nyu->addr && nd->szB == nyu->szB
713           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
714           /* Although it seems reasonable to demand that duplicate
715              blocks have identical names, that is too strict.  For
716              example, reading debuginfo from glibc produces two
717              otherwise identical blocks with names "tzname" and
718              "__tzname".  A constraint of the form "must be identical,
719              or one must be a substring of the other" would fix that.
720              However, such trickery is scuppered by the fact that we
721              truncate all variable names to 15 characters to make
722              storage management simpler, hence giving pairs like
723              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
724              it's simplest just to skip the name comparison
725              completely. */
726           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
727          /* exact duplicate; ignore it */
728          sg_free(nyu);
729          return;
730       }
731       /* else fall through; the assertion below will catch it */
732    }
733 
734    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
735    /* The interval can't already be there; else we have
736       overlapping global blocks. */
737    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
738       generate overlapping block descriptions in the Dwarf3; clearly
739       bogus. */
740    if (already_present && moans > 0 && !VG_(clo_xml)) {
741       moans--;
742       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
743                                "overlapping global blocks\n");
744       if (VG_(clo_verbosity) >= 2) {
745          GlobalTree__pp( gitree,
746                          "add_block_to_GlobalTree: non-exact duplicate" );
747          VG_(printf)("Overlapping block: ");
748          GlobalTreeNode__pp(nyu);
749          VG_(printf)("\n");
750       }
751       if (moans == 0)
752          VG_(message)(Vg_UserMsg, "Further instances of this "
753                                   "message will not be shown\n" );
754    }
755    /* tl_assert(!already_present); */
756 }
757 
del_GlobalTree_range(WordFM * gitree,Addr a,SizeT szB)758 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
759                                    Addr a, SizeT szB )
760 {
761    /* One easy way to do this: look up [a,a+szB) in the tree.  That
762       will either succeed, producing a block which intersects that
763       range, in which case we delete it and repeat; or it will fail,
764       in which case there are no blocks intersecting the range, and we
765       can bring the process to a halt. */
766    UWord keyW, valW, oldK, oldV;
767    GlobalTreeNode key, *nd;
768    Bool b, anyFound;
769 
770    tl_assert(szB > 0);
771 
772    anyFound = False;
773 
774    key.addr = a;
775    key.szB  = szB;
776 
777    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
778       anyFound = True;
779       nd = (GlobalTreeNode*)keyW;
780       tl_assert(valW == 0);
781       tl_assert(nd != &key);
782       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
783 
784       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
785       tl_assert(b);
786       tl_assert(oldV == 0);
787       tl_assert(oldK == keyW); /* check we deleted the node we just found */
788    }
789 
790    return anyFound;
791 }
792 
793 
794 //////////////////////////////////////////////////////////////
795 //                                                          //
796 // Invar                                                    //
797 //                                                          //
798 //////////////////////////////////////////////////////////////
799 
800 /* An invariant, as resulting from watching the destination of a
801    memory referencing instruction.  Initially is Inv_Unset until the
802    instruction makes a first access. */
803 
804 typedef
805    enum {
806       Inv_Unset=1,  /* not established yet */
807       Inv_Unknown,  /* unknown location */
808       Inv_Stack0,   /* array-typed stack block in innermost frame */
809       Inv_StackN,   /* array-typed stack block in non-innermost frame */
810       Inv_Global,   /* array-typed global block */
811    }
812    InvarTag;
813 
814 typedef
815    struct {
816       InvarTag tag;
817       union {
818          struct {
819          } Unset;
820          struct {
821          } Unknown;
822          struct {
823             Addr  addr;
824             SizeT szB;
825             StackBlock* descr;
826          } Stack0; /* innermost stack frame */
827          struct {
828             /* Pointer to a node in the interval tree for
829               this thread. */
830             StackTreeNode* nd;
831          } StackN; /* non-innermost stack frame */
832          struct {
833            /* Pointer to a GlobalBlock in the interval tree of
834               global blocks. */
835            GlobalTreeNode* nd;
836          } Global;
837       }
838       Inv;
839    }
840    Invar;
841 
842 /* Partial debugging printing for an Invar. */
pp_Invar(Invar * i)843 static void pp_Invar ( Invar* i )
844 {
845    switch (i->tag) {
846       case Inv_Unset:
847          VG_(printf)("Unset");
848          break;
849       case Inv_Unknown:
850          VG_(printf)("Unknown");
851          break;
852       case Inv_Stack0:
853          VG_(printf)("Stack0 [%#lx,+%lu)",
854                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
855          break;
856       case Inv_StackN:
857          VG_(printf)("StackN [%#lx,+%lu)",
858                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
859          break;
860       case Inv_Global:
861          VG_(printf)("Global [%#lx,+%lu)",
862                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
863          break;
864       default:
865          tl_assert(0);
866    }
867 }
868 
869 /* Compare two Invars for equality. */
eq_Invar(Invar * i1,Invar * i2)870 static Bool eq_Invar ( Invar* i1, Invar* i2 )
871 {
872    tl_assert(i1->tag != Inv_Unset);
873    tl_assert(i2->tag != Inv_Unset);
874    if (i1->tag != i2->tag)
875       return False;
876    switch (i1->tag) {
877       case Inv_Unknown:
878          return True;
879       case Inv_Stack0:
880          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
881                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
882       case Inv_StackN:
883          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
884       case Inv_Global:
885          return i1->Inv.Global.nd == i2->Inv.Global.nd;
886       default:
887          tl_assert(0);
888    }
889    /*NOTREACHED*/
890    tl_assert(0);
891 }
892 
893 /* Generate a piece of text showing 'ea' is relative to 'invar', if
894    known.  If unknown, generate an empty string.  'buf' must be at
895    least 32 bytes in size.  Also return the absolute value of the
896    delta, if known, or zero if not known.
897 */
gen_delta_str(HChar * buf,UWord * absDelta,Invar * inv,Addr ea)898 static void gen_delta_str ( /*OUT*/HChar* buf,
899                             /*OUT*/UWord* absDelta,
900                             Invar* inv, Addr ea )
901 {
902    Addr  block = 0;
903    SizeT szB   = 0;
904 
905    buf[0] = 0;
906    *absDelta = 0;
907 
908    switch (inv->tag) {
909       case Inv_Unknown:
910       case Inv_Unset:
911          return; /* unknown */
912       case Inv_Stack0:
913          block = inv->Inv.Stack0.addr;
914          szB   = inv->Inv.Stack0.szB;
915          break;
916       case Inv_StackN:
917          block = inv->Inv.StackN.nd->addr;
918          szB   = inv->Inv.StackN.nd->szB;
919          break;
920       case Inv_Global:
921          block = inv->Inv.Global.nd->addr;
922          szB = inv->Inv.Global.nd->szB;
923          break;
924       default:
925          tl_assert(0);
926    }
927    tl_assert(szB > 0);
928    if (ea < block) {
929       *absDelta = block - ea;
930       VG_(sprintf)(buf, "%'lu before", *absDelta);
931    }
932    else if (ea >= block + szB) {
933       *absDelta = ea - (block + szB);
934       VG_(sprintf)(buf, "%'lu after", *absDelta);
935    }
936    else {
937      // Leave *absDelta at zero.
938      VG_(sprintf)(buf, "%'lu inside", ea - block);
939    }
940 }
941 
942 
943 /* Print selected parts of an Invar, suitable for use in error
944    messages. */
show_Invar(HChar * buf,Word nBuf,Invar * inv,Word depth)945 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
946 {
947    const HChar* str;
948    tl_assert(nBuf >= 128);
949    buf[0] = 0;
950    switch (inv->tag) {
951       case Inv_Unknown:
952          VG_(sprintf)(buf, "%s", "unknown");
953          break;
954       case Inv_Stack0:
955          str = "array";
956          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
957                       str, inv->Inv.Stack0.descr->name,
958                       inv->Inv.Stack0.szB );
959          break;
960       case Inv_StackN:
961          str = "array";
962          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
963                       str, inv->Inv.StackN.nd->descr->name,
964                            inv->Inv.StackN.nd->descr->szB,
965                            depth - inv->Inv.StackN.nd->depth );
966          break;
967       case Inv_Global:
968          str = "array";
969          VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
970                       str, inv->Inv.Global.nd->descr->name,
971                            inv->Inv.Global.nd->descr->szB,
972                            inv->Inv.Global.nd->descr->soname );
973          break;
974       case Inv_Unset:
975          VG_(sprintf)(buf, "%s", "Unset!");
976          break;
977       default:
978          tl_assert(0);
979    }
980 }
981 
982 
983 //////////////////////////////////////////////////////////////
984 //                                                          //
985 // our globals                                              //
986 //                                                          //
987 //////////////////////////////////////////////////////////////
988 
989 //////////////////////////////////////////////////////////////
990 ///
991 
992 #define N_QCACHE 16
993 
994 /* Powers of two only, else the result will be chaos */
995 #define QCACHE_ADVANCE_EVERY 16
996 
997 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
998    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
999 typedef
1000    struct {
1001       Addr  addr;
1002       SizeT szB;
1003       Invar inv;
1004    }
1005    QCElem;
1006 
1007 typedef
1008    struct {
1009       Word   nInUse;
1010       QCElem elems[N_QCACHE];
1011    }
1012    QCache;
1013 
QCache__invalidate(QCache * qc)1014 static void QCache__invalidate ( QCache* qc ) {
1015    tl_assert(qc->nInUse >= 0);
1016    qc->nInUse = 0;
1017 }
1018 
QCache__pp(QCache * qc,const HChar * who)1019 static void QCache__pp ( QCache* qc, const HChar* who )
1020 {
1021    Word i;
1022    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1023    for (i = 0; i < qc->nInUse; i++) {
1024       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1025       pp_Invar(&qc->elems[i].inv);
1026       VG_(printf)("\n");
1027    }
1028    VG_(printf)(">>>\n");
1029 }
1030 
1031 static ULong stats__qcache_queries = 0;
1032 static ULong stats__qcache_misses  = 0;
1033 static ULong stats__qcache_probes  = 0;
1034 
1035 ///
1036 //////////////////////////////////////////////////////////////
1037 
1038 /* Each thread has:
1039    * a shadow stack of StackFrames, which is a double-linked list
1040    * an stack block interval tree
1041 */
1042 static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
1043 
1044 static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
1045 
1046 static  QCache                       qcaches[VG_N_THREADS];
1047 
1048 
1049 /* Additionally, there is one global variable interval tree
1050    for the entire process.
1051 */
1052 static WordFM* /* GlobalTreeNode */ giTree;
1053 
1054 
invalidate_all_QCaches(void)1055 static void invalidate_all_QCaches ( void )
1056 {
1057    Word i;
1058    for (i = 0; i < VG_N_THREADS; i++) {
1059       QCache__invalidate( &qcaches[i] );
1060    }
1061 }
1062 
ourGlobals_init(void)1063 static void ourGlobals_init ( void )
1064 {
1065    Word i;
1066    for (i = 0; i < VG_N_THREADS; i++) {
1067       shadowStacks[i] = NULL;
1068       siTrees[i] = NULL;
1069    }
1070    invalidate_all_QCaches();
1071    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1072                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1073 }
1074 
1075 
1076 //////////////////////////////////////////////////////////////
1077 //                                                          //
1078 // Handle global variable load/unload events                //
1079 //                                                          //
1080 //////////////////////////////////////////////////////////////
1081 
acquire_globals(ULong di_handle)1082 static void acquire_globals ( ULong di_handle )
1083 {
1084    Word n, i;
1085    XArray* /* of GlobalBlock */ gbs;
1086    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1087    gbs = VG_(di_get_global_blocks_from_dihandle)
1088             (di_handle, True/*arrays only*/);
1089    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1090 
1091    n = VG_(sizeXA)( gbs );
1092    for (i = 0; i < n; i++) {
1093       GlobalBlock* gbp;
1094       GlobalBlock* gb = VG_(indexXA)( gbs, i );
1095       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1096                          gb->szB, gb->addr, gb->soname, gb->name );
1097       tl_assert(gb->szB > 0);
1098       /* Make a persistent copy of each GlobalBlock, and add it
1099          to the tree. */
1100       gbp = get_persistent_GlobalBlock( gb );
1101       add_block_to_GlobalTree( giTree, gbp );
1102    }
1103 
1104    VG_(deleteXA)( gbs );
1105 }
1106 
1107 
1108 /* We only intercept these two because we need to see any di_handles
1109    that might arise from the mappings/allocations. */
sg_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1110 void sg_new_mem_mmap( Addr a, SizeT len,
1111                       Bool rr, Bool ww, Bool xx, ULong di_handle )
1112 {
1113    if (di_handle > 0)
1114       acquire_globals(di_handle);
1115 }
sg_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1116 void sg_new_mem_startup( Addr a, SizeT len,
1117                          Bool rr, Bool ww, Bool xx, ULong di_handle )
1118 {
1119    if (di_handle > 0)
1120       acquire_globals(di_handle);
1121 }
sg_die_mem_munmap(Addr a,SizeT len)1122 void sg_die_mem_munmap ( Addr a, SizeT len )
1123 {
1124    Bool debug = (Bool)0;
1125    Bool overlap = False;
1126 
1127    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1128 
1129    if (len == 0)
1130       return;
1131 
1132    overlap = del_GlobalTree_range(giTree, a, len);
1133 
1134    { /* redundant sanity check */
1135      UWord keyW, valW;
1136      VG_(initIterFM)( giTree );
1137      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1138        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1139         tl_assert(valW == 0);
1140         tl_assert(nd->szB > 0);
1141         tl_assert(nd->addr + nd->szB <= a
1142                   || a + len <= nd->addr);
1143      }
1144      VG_(doneIterFM)( giTree );
1145    }
1146 
1147    if (!overlap)
1148       return;
1149 
1150    /* Ok, the range contained some blocks.  Therefore we'll need to
1151       visit all the Invars in all the thread shadow stacks, and
1152       convert all Inv_Global entries that intersect [a,a+len) to
1153       Inv_Unknown. */
1154    tl_assert(len > 0);
1155    preen_global_Invars( a, len );
1156    invalidate_all_QCaches();
1157 }
1158 
1159 
1160 //////////////////////////////////////////////////////////////
1161 //                                                          //
1162 // StackFrame                                               //
1163 //                                                          //
1164 //////////////////////////////////////////////////////////////
1165 
1166 static ULong stats__total_accesses   = 0;
1167 static ULong stats__classify_Stack0  = 0;
1168 static ULong stats__classify_StackN  = 0;
1169 static ULong stats__classify_Global  = 0;
1170 static ULong stats__classify_Unknown = 0;
1171 static ULong stats__Invars_preened   = 0;
1172 static ULong stats__Invars_changed   = 0;
1173 static ULong stats__t_i_b_empty      = 0;
1174 static ULong stats__htab_fast        = 0;
1175 static ULong stats__htab_searches    = 0;
1176 static ULong stats__htab_probes      = 0;
1177 static ULong stats__htab_resizes     = 0;
1178 
1179 
1180 /* A dynamic instance of an instruction */
1181 typedef
1182    struct {
1183       /* IMMUTABLE */
1184       Addr    insn_addr; /* NB! zero means 'not in use' */
1185       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1186       /* MUTABLE */
1187       Invar invar;
1188    }
1189    IInstance;
1190 
1191 
1192 #define N_HTAB_FIXED 64
1193 
1194 typedef
1195    struct _StackFrame {
1196       /* The sp when the frame was created, so we know when to get rid
1197          of it. */
1198       Addr creation_sp;
1199       /* The stack frames for a thread are arranged as a doubly linked
1200          list.  Obviously the outermost frame in the stack has .outer
1201          as NULL and the innermost in theory has .inner as NULL.
1202          However, when a function returns, we don't delete the
1203          just-vacated StackFrame.  Instead, it is retained in the list
1204          and will be re-used when the next call happens.  This is so
1205          as to avoid constantly having to dynamically allocate and
1206          deallocate frames. */
1207       struct _StackFrame* inner;
1208       struct _StackFrame* outer;
1209       Word depth; /* 0 for outermost; increases inwards */
1210       /* Information for each memory referencing instruction, for this
1211          instantiation of the function.  The iinstances array is
1212          operated as a simple linear-probe hash table, which is
1213          dynamically expanded as necessary.  Once critical thing is
1214          that an IInstance with a .insn_addr of zero is interpreted to
1215          mean that hash table slot is unused.  This means we can't
1216          store an IInstance for address zero. */
1217       /* Note that htab initially points to htab_fixed.  If htab_fixed
1218          turns out not to be big enough then htab is made to point to
1219          dynamically allocated memory.  But it's often the case that
1220          htab_fixed is big enough, so this optimisation saves a huge
1221          number of sg_malloc/sg_free call pairs. */
1222       IInstance* htab;
1223       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1224       UWord      htab_used; /* number of hash table slots currently in use */
1225       /* If this frame is currently making a call, then the following
1226          are relevant. */
1227       Addr sp_at_call;
1228       Addr fp_at_call;
1229       XArray* /* of Addr */ blocks_added_by_call;
1230       /* See comment just above */
1231       IInstance htab_fixed[N_HTAB_FIXED];
1232    }
1233    StackFrame;
1234 
1235 
1236 
1237 
1238 
1239 /* Move this somewhere else? */
1240 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
1241    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1242    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1243    instead. */
1244 
1245 __attribute__((noinline))
preen_global_Invar(Invar * inv,Addr a,SizeT len)1246 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1247 {
1248    stats__Invars_preened++;
1249    tl_assert(len > 0);
1250    tl_assert(inv);
1251    switch (inv->tag) {
1252       case Inv_Global:
1253          tl_assert(inv->Inv.Global.nd);
1254          tl_assert(inv->Inv.Global.nd->szB > 0);
1255          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1256                             inv->Inv.Global.nd->addr,
1257                             inv->Inv.Global.nd->szB);
1258          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1259                                                  inv->Inv.Global.nd->szB)) {
1260             inv->tag = Inv_Unknown;
1261             stats__Invars_changed++;
1262          }
1263          break;
1264       case Inv_Stack0:
1265       case Inv_StackN:
1266       case Inv_Unknown:
1267          break;
1268       default:
1269          tl_assert(0);
1270    }
1271 }
1272 
1273 __attribute__((noinline))
preen_global_Invars(Addr a,SizeT len)1274 static void preen_global_Invars ( Addr a, SizeT len )
1275 {
1276    Int         i;
1277    UWord       u;
1278    StackFrame* frame;
1279    tl_assert(len > 0);
1280    for (i = 0; i < VG_N_THREADS; i++) {
1281       frame = shadowStacks[i];
1282       if (!frame)
1283          continue; /* no frames for this thread */
1284       /* start from the innermost frame */
1285       while (frame->inner)
1286          frame = frame->inner;
1287       tl_assert(frame->outer);
1288       /* work through the frames from innermost to outermost.  The
1289          order isn't important; we just need to ensure we visit each
1290          frame once (including those which are not actually active,
1291          more 'inner' than the 'innermost active frame', viz, just
1292          hanging around waiting to be used, when the current innermost
1293          active frame makes more calls.  See comments on definition of
1294          struct _StackFrame. */
1295       for (; frame; frame = frame->outer) {
1296          UWord xx = 0; /* sanity check only; count of used htab entries */
1297          if (!frame->htab)
1298             continue; /* frame not in use.  See shadowStack_unwind(). */
1299          for (u = 0; u < frame->htab_size; u++) {
1300             IInstance* ii = &frame->htab[u];
1301             if (ii->insn_addr == 0)
1302                continue; /* not in use */
1303             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1304             preen_global_Invar( &ii->invar, a, len );
1305             xx++;
1306          }
1307          tl_assert(xx == frame->htab_used);
1308       }
1309    }
1310 }
1311 
1312 
1313 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1314    of the ip are guaranteed to be zero */
compute_II_hash(Addr ip,UWord htab_size)1315 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1316    return (ip >> 0) & (htab_size - 1);
1317 }
1318 
1319 __attribute__((noinline))
initialise_II_hash_table(StackFrame * sf)1320 static void initialise_II_hash_table ( StackFrame* sf )
1321 {
1322    UWord i;
1323    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1324    sf->htab = &sf->htab_fixed[0];
1325    tl_assert(sf->htab);
1326    sf->htab_used = 0;
1327    for (i = 0; i < sf->htab_size; i++)
1328       sf->htab[i].insn_addr = 0; /* NOT IN USE */
1329 }
1330 
1331 
1332 __attribute__((noinline))
resize_II_hash_table(StackFrame * sf)1333 static void resize_II_hash_table ( StackFrame* sf )
1334 {
1335    UWord     i, j, ix, old_size, new_size;
1336    IInstance *old_htab, *new_htab, *old;
1337 
1338    tl_assert(sf && sf->htab);
1339    old_size = sf->htab_size;
1340    new_size = 2 * old_size;
1341    old_htab = sf->htab;
1342    new_htab = sg_malloc( "di.sg_main.rIht.1",
1343                          new_size * sizeof(IInstance) );
1344    for (i = 0; i < new_size; i++) {
1345       new_htab[i].insn_addr = 0; /* NOT IN USE */
1346    }
1347    for (i = 0; i < old_size; i++) {
1348       old = &old_htab[i];
1349       if (old->insn_addr == 0 /* NOT IN USE */)
1350          continue;
1351       ix = compute_II_hash(old->insn_addr, new_size);
1352       /* find out where to put this, in the new table */
1353       j = new_size;
1354       while (1) {
1355          if (new_htab[ix].insn_addr == 0)
1356             break;
1357          /* This can't ever happen, because it would mean the new
1358             table is full; that isn't allowed -- even the old table is
1359             only allowed to become half full. */
1360          tl_assert(j > 0);
1361          j--;
1362          ix++; if (ix == new_size) ix = 0;
1363       }
1364       /* copy the old entry to this location */
1365       tl_assert(ix < new_size);
1366       tl_assert(new_htab[ix].insn_addr == 0);
1367       new_htab[ix] = *old;
1368       tl_assert(new_htab[ix].insn_addr != 0);
1369    }
1370    /* all entries copied; free old table. */
1371    if (old_htab != &sf->htab_fixed[0])
1372       sg_free(old_htab);
1373    sf->htab = new_htab;
1374    sf->htab_size = new_size;
1375    /* check sf->htab_used is correct.  Optional and a bit expensive
1376       but anyway: */
1377    j = 0;
1378    for (i = 0; i < new_size; i++) {
1379       if (new_htab[i].insn_addr != 0) {
1380          j++;
1381       }
1382    }
1383    tl_assert(j == sf->htab_used);
1384    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1385 }
1386 
1387 
1388 __attribute__((noinline))
find_or_create_IInstance_SLOW(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1389 static IInstance* find_or_create_IInstance_SLOW (
1390                      StackFrame* sf,
1391                      Addr ip,
1392                      XArray* /* StackBlock */ ip_frameblocks
1393                   )
1394 {
1395    UWord i, ix;
1396 
1397    stats__htab_searches++;
1398 
1399    tl_assert(sf);
1400    tl_assert(sf->htab);
1401 
1402    /* Make sure the table loading doesn't get too high. */
1403    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1404       stats__htab_resizes++;
1405       resize_II_hash_table(sf);
1406    }
1407    tl_assert(2 * sf->htab_used <= sf->htab_size);
1408 
1409    ix = compute_II_hash(ip, sf->htab_size);
1410    i = sf->htab_size;
1411    while (1) {
1412       stats__htab_probes++;
1413       /* Note that because of the way the fast-case handler works,
1414          these two tests are actually redundant in the first iteration
1415          of this loop.  (Except they aren't redundant if the code just
1416          above resized the table first. :-) */
1417       if (sf->htab[ix].insn_addr == ip)
1418          return &sf->htab[ix];
1419       if (sf->htab[ix].insn_addr == 0)
1420          break;
1421       /* If i ever gets to zero and we have found neither what we're
1422          looking for nor an empty slot, the table must be full.  Which
1423          isn't possible -- we monitor the load factor to ensure it
1424          doesn't get above say 50%; if that ever does happen the table
1425          is resized. */
1426       tl_assert(i > 0);
1427       i--;
1428       ix++;
1429       if (ix == sf->htab_size) ix = 0;
1430    }
1431 
1432    /* So now we've found a free slot at ix, and we can use that. */
1433    tl_assert(sf->htab[ix].insn_addr == 0);
1434 
1435    /* Add a new record in this slot. */
1436    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1437    sf->htab[ix].insn_addr = ip;
1438    sf->htab[ix].blocks    = ip_frameblocks;
1439    sf->htab[ix].invar.tag = Inv_Unset;
1440    sf->htab_used++;
1441    return &sf->htab[ix];
1442 }
1443 
1444 
1445 inline
find_or_create_IInstance(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1446 static IInstance* find_or_create_IInstance (
1447                      StackFrame* sf,
1448                      Addr ip,
1449                      XArray* /* StackBlock */ ip_frameblocks
1450                   )
1451 {
1452    UWord ix = compute_II_hash(ip, sf->htab_size);
1453    /* Is it in the first slot we come to? */
1454    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1455       stats__htab_fast++;
1456       return &sf->htab[ix];
1457    }
1458    /* If the first slot we come to is empty, bag it. */
1459    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1460       stats__htab_fast++;
1461       tl_assert(ip != 0);
1462       sf->htab[ix].insn_addr = ip;
1463       sf->htab[ix].blocks    = ip_frameblocks;
1464       sf->htab[ix].invar.tag = Inv_Unset;
1465       sf->htab_used++;
1466       return &sf->htab[ix];
1467    }
1468    /* Otherwise we hand off to the slow case, which searches other
1469       slots, and optionally resizes the table if necessary. */
1470    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1471 }
1472 
1473 
1474 __attribute__((noinline))
calculate_StackBlock_EA(StackBlock * descr,Addr sp,Addr fp)1475 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1476                                       Addr sp, Addr fp ) {
1477    UWord w1 = (UWord)descr->base;
1478    UWord w2 = (UWord)(descr->spRel ? sp : fp);
1479    UWord ea = w1 + w2;
1480    return ea;
1481 }
1482 
1483 /* Given an array of StackBlocks, return an array of Addrs, holding
1484    their effective addresses.  Caller deallocates result array. */
1485 __attribute__((noinline))
calculate_StackBlock_EAs(XArray * blocks,Addr sp,Addr fp)1486 static XArray* /* Addr */ calculate_StackBlock_EAs (
1487                              XArray* /* StackBlock */ blocks,
1488                              Addr sp, Addr fp
1489                           )
1490 {
1491    XArray* res;
1492    Word i, n = VG_(sizeXA)( blocks );
1493    tl_assert(n > 0);
1494    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1495    for (i = 0; i < n; i++) {
1496       StackBlock* blk = VG_(indexXA)( blocks, i );
1497       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1498       VG_(addToXA)( res, &ea );
1499    }
1500    return res;
1501 }
1502 
1503 
1504 /* Try to classify the block into which a memory access falls, and
1505    write the result in 'inv'.  This writes all relevant fields of
1506    'inv'. */
1507 __attribute__((noinline))
classify_address(Invar * inv,ThreadId tid,Addr ea,Addr sp,Addr fp,UWord szB,XArray * thisInstrBlocks)1508 static void classify_address ( /*OUT*/Invar* inv,
1509                                ThreadId tid,
1510                                Addr ea, Addr sp, Addr fp,
1511                                UWord szB,
1512                                XArray* /* of StackBlock */ thisInstrBlocks )
1513 {
1514    tl_assert(szB > 0);
1515    /* First, look in the stack blocks accessible in this instruction's
1516       frame. */
1517    {
1518      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1519      if (nBlocks == 0) stats__t_i_b_empty++;
1520      for (i = 0; i < nBlocks; i++) {
1521         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1522         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1523         if (bea <= ea && ea + szB <= bea + descr->szB) {
1524            /* found it */
1525            inv->tag = Inv_Stack0;
1526            inv->Inv.Stack0.addr  = bea;
1527            inv->Inv.Stack0.szB   = descr->szB;
1528            inv->Inv.Stack0.descr = descr;
1529            stats__classify_Stack0++;
1530            return;
1531         }
1532      }
1533    }
1534    /* Look in this thread's query cache */
1535    { Word i;
1536      QCache* cache = &qcaches[tid];
1537      static UWord ctr = 0;
1538      stats__qcache_queries++;
1539      for (i = 0; i < cache->nInUse; i++) {
1540         if (0) /* expensive in a loop like this */
1541                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1542         stats__qcache_probes++;
1543         if (is_subinterval_of(cache->elems[i].addr,
1544                               cache->elems[i].szB, ea, szB)) {
1545            if (i > 0
1546                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1547               QCElem tmp;
1548               tmp = cache->elems[i-1];
1549               cache->elems[i-1] = cache->elems[i];
1550               cache->elems[i] = tmp;
1551               i--;
1552            }
1553            *inv = cache->elems[i].inv;
1554            return;
1555         }
1556      }
1557      stats__qcache_misses++;
1558    }
1559    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1560       in some calling frame?  Consult this thread's stack-block
1561       interval tree to find out. */
1562    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1563      /* We know that [ea,ea+1) is in the block, but we need to
1564         restrict to the case where the whole access falls within
1565         it. */
1566      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1567         nd = NULL;
1568      }
1569      if (nd) {
1570         /* found it */
1571         inv->tag = Inv_StackN;
1572         inv->Inv.StackN.nd = nd;
1573         stats__classify_StackN++;
1574         goto out;
1575      }
1576    }
1577    /* Not in a stack block.  Try the global pool. */
1578    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1579      /* We know that [ea,ea+1) is in the block, but we need to
1580         restrict to the case where the whole access falls within
1581         it. */
1582      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1583         nd = NULL;
1584      }
1585      if (nd) {
1586         /* found it */
1587         inv->tag = Inv_Global;
1588         inv->Inv.Global.nd = nd;
1589         stats__classify_Global++;
1590         goto out;
1591      }
1592    }
1593    /* No idea - give up. */
1594    inv->tag = Inv_Unknown;
1595    stats__classify_Unknown++;
1596 
1597    /* Update the cache */
1598   out:
1599    { Addr    toadd_addr = 0;
1600      SizeT   toadd_szB  = 0;
1601      QCache* cache      = &qcaches[tid];
1602 
1603      static UWord ctr = 0;
1604      Bool show = False;
1605      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1606 
1607      if (show) QCache__pp(cache, "before upd");
1608 
1609      switch (inv->tag) {
1610         case Inv_Global:
1611            toadd_addr = inv->Inv.Global.nd->addr;
1612            toadd_szB  = inv->Inv.Global.nd->szB;
1613            break;
1614         case Inv_StackN:
1615            toadd_addr = inv->Inv.StackN.nd->addr;
1616            toadd_szB  = inv->Inv.StackN.nd->szB;
1617            break;
1618         case Inv_Unknown: {
1619            /* This is more complex.  We need to figure out the
1620               intersection of the "holes" in the global and stack
1621               interval trees into which [ea,ea+szB) falls.  This is
1622               further complicated by the fact that [ea,ea+szB) might
1623               not fall cleanly into a hole; it may instead fall across
1624               the boundary of a stack or global block.  In that case
1625               we just ignore it and don't update the cache, since we
1626               have no way to represent this situation precisely. */
1627            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1628            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1629            Addr gMin, gMax, sMin, sMax, uMin, uMax;
1630            Bool sOK, gOK;
1631            sNegInf.addr = 0;
1632            sNegInf.szB  = 1;
1633            sPosInf.addr = ~(UWord)0;
1634            sPosInf.szB  = 1;
1635            gNegInf.addr = 0;
1636            gNegInf.szB  = 1;
1637            gPosInf.addr = ~(UWord)0;
1638            gPosInf.szB  = 1;
1639            sKey.addr = ea;
1640            sKey.szB  = szB;
1641            gKey.addr = ea;
1642            gKey.szB  = szB;
1643            if (0) VG_(printf)("Tree sizes %ld %ld\n",
1644                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1645            sOK = VG_(findBoundsFM)( siTrees[tid],
1646                                     (UWord*)&sLB,    NULL/*unused*/,
1647                                     (UWord*)&sUB,    NULL/*unused*/,
1648                                     (UWord)&sNegInf, 0/*unused*/,
1649                                     (UWord)&sPosInf, 0/*unused*/,
1650                                     (UWord)&sKey );
1651            gOK = VG_(findBoundsFM)( giTree,
1652                                     (UWord*)&gLB,    NULL/*unused*/,
1653                                     (UWord*)&gUB,    NULL/*unused*/,
1654                                     (UWord)&gNegInf, 0/*unused*/,
1655                                     (UWord)&gPosInf, 0/*unused*/,
1656                                     (UWord)&gKey );
1657            if (!(sOK && gOK)) {
1658               /* If this happens, then [ea,ea+szB) partially overlaps
1659                  a heap or stack block.  We can't represent that, so
1660                  just forget it (should be very rare).  However, do
1661                  maximum sanity checks first.  In such a
1662                  partial overlap case, it can't be the case that both
1663                  [ea] and [ea+szB-1] overlap the same block, since if
1664                  that were indeed the case then it wouldn't be a
1665                  partial overlap; rather it would simply fall inside
1666                  that block entirely and we shouldn't be inside this
1667                  conditional at all. */
1668               if (!sOK) {
1669                  StackTreeNode *ndFirst, *ndLast;
1670                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
1671                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1672                  /* if both ends of the range fall inside a block,
1673                     they can't be in the same block. */
1674                  if (ndFirst && ndLast)
1675                     tl_assert(ndFirst != ndLast);
1676                  /* for each end of the range, if it is in a block,
1677                     the range as a whole can't be entirely within the
1678                     block. */
1679                  if (ndFirst)
1680                     tl_assert(!is_subinterval_of(ndFirst->addr,
1681                                                  ndFirst->szB, ea, szB));
1682                  if (ndLast)
1683                     tl_assert(!is_subinterval_of(ndLast->addr,
1684                                                  ndLast->szB, ea, szB));
1685               }
1686               if (!gOK) {
1687                  GlobalTreeNode *ndFirst, *ndLast;
1688                  ndFirst = find_GlobalTreeNode( giTree, ea );
1689                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1690                  /* if both ends of the range fall inside a block,
1691                     they can't be in the same block. */
1692                  if (ndFirst && ndLast)
1693                     tl_assert(ndFirst != ndLast);
1694                  /* for each end of the range, if it is in a block,
1695                     the range as a whole can't be entirely within the
1696                     block. */
1697                  if (ndFirst)
1698                     tl_assert(!is_subinterval_of(ndFirst->addr,
1699                                                  ndFirst->szB, ea, szB));
1700                  if (ndLast)
1701                     tl_assert(!is_subinterval_of(ndLast->addr,
1702                                                  ndLast->szB, ea, szB));
1703               }
1704               if (0) VG_(printf)("overlapping blocks in cache\n");
1705               return;
1706            }
1707            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1708            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1709            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1710            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1711            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1712                               sMin, sMax, gMin, gMax);
1713            /* [sMin,sMax] and [gMin,gMax] must both contain
1714               [ea,ea+szB) (right?)  That implies they must overlap at
1715               at least over [ea,ea+szB). */
1716            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1717            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1718            /* So now compute their intersection. */
1719            uMin = Addr__max( sMin, gMin );
1720            uMax = Addr__min( sMax, gMax );
1721            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1722            tl_assert(uMin <= uMax);
1723            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1724            /* Finally, we can park [uMin,uMax] in the cache.  However,
1725               if uMax is ~0, we can't represent the difference; hence
1726               fudge uMax. */
1727            if (uMin < uMax && uMax == ~(UWord)0)
1728               uMax--;
1729            toadd_addr = uMin;
1730            toadd_szB  = uMax - uMin + 1;
1731            break;
1732         }
1733         default:
1734            /* We should only be caching info for the above 3 cases */
1735           tl_assert(0);
1736      } /* switch (inv->tag) */
1737 
1738      { /* and actually add this to the cache, finally */
1739        Word i;
1740        Word ip = cache->nInUse / 2; /* doesn't seem critical */
1741 
1742        if (cache->nInUse < N_QCACHE)
1743           cache->nInUse++;
1744        for (i = cache->nInUse-1; i > ip; i--) {
1745           cache->elems[i] = cache->elems[i-1];
1746        }
1747 
1748        tl_assert(toadd_szB > 0);
1749        cache->elems[ip].addr = toadd_addr;
1750        cache->elems[ip].szB  = toadd_szB;
1751        cache->elems[ip].inv  = *inv;
1752      }
1753 
1754      if (show) QCache__pp(cache, "after upd");
1755 
1756    }
1757 }
1758 
1759 
1760 /* CALLED FROM GENERATED CODE */
1761 static
1762 VG_REGPARM(3)
helperc__mem_access(Addr ea,Addr sp,Addr fp,Word sszB,Addr ip,XArray * ip_frameBlocks)1763 void helperc__mem_access ( /* Known only at run time: */
1764                            Addr ea, Addr sp, Addr fp,
1765                            /* Known at translation time: */
1766                            Word sszB, Addr ip, XArray* ip_frameBlocks )
1767 {
1768    UWord szB;
1769    IInstance* iinstance;
1770    Invar* inv;
1771    Invar new_inv;
1772    ThreadId tid = VG_(get_running_tid)();
1773    StackFrame* frame;
1774    HChar bufE[160], bufA[160], bufD[32];
1775 
1776    stats__total_accesses++;
1777 
1778    tl_assert(is_sane_TId(tid));
1779    frame = shadowStacks[tid];
1780    tl_assert(frame);
1781 
1782    /* Find the instance info for this instruction. */
1783    tl_assert(ip_frameBlocks);
1784    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1785    tl_assert(iinstance);
1786    tl_assert(iinstance->blocks == ip_frameBlocks);
1787 
1788    szB = (sszB < 0) ? (-sszB) : sszB;
1789    tl_assert(szB > 0);
1790 
1791    inv = &iinstance->invar;
1792 
1793    /* Deal with first uses of instruction instances. */
1794    if (inv->tag == Inv_Unset) {
1795       /* This is the first use of this instance of the instruction, so
1796          we can't make any check; we merely record what we saw, so we
1797          can compare it against what happens for 2nd and subsequent
1798          accesses. */
1799       classify_address( inv,
1800                         tid, ea, sp, fp, szB, iinstance->blocks );
1801       tl_assert(inv->tag != Inv_Unset);
1802       return;
1803    }
1804 
1805    /* So generate an Invar and see if it's different from what
1806       we had before. */
1807    classify_address( &new_inv,
1808                      tid, ea, sp, fp, szB, iinstance->blocks );
1809    tl_assert(new_inv.tag != Inv_Unset);
1810 
1811    /* Did we see something different from before?  If no, then there's
1812       no error. */
1813    if (LIKELY(eq_Invar(&new_inv, inv)))
1814       return;
1815 
1816    tl_assert(inv->tag != Inv_Unset);
1817 
1818    VG_(memset)(bufE, 0, sizeof(bufE));
1819    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1820 
1821    VG_(memset)(bufA, 0, sizeof(bufA));
1822    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1823 
1824    VG_(memset)(bufD, 0, sizeof(bufD));
1825    UWord absDelta;
1826    gen_delta_str( bufD, &absDelta, inv, ea );
1827 
1828    if (absDelta < 1024)
1829       sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1830 
1831    /* And now install the new observation as "standard", so as to
1832       make future error messages make more sense. */
1833    *inv = new_inv;
1834 }
1835 
1836 
1837 ////////////////////////////////////////
1838 /* Primary push-a-new-frame routine.  Called indirectly from
1839    generated code. */
1840 
1841 static UWord stats__max_sitree_size = 0;
1842 static UWord stats__max_gitree_size = 0;
1843 
1844 static
shadowStack_new_frame(ThreadId tid,Addr sp_at_call_insn,Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * descrs_at_call_insn)1845 void shadowStack_new_frame ( ThreadId tid,
1846                              Addr     sp_at_call_insn,
1847                              Addr     sp_post_call_insn,
1848                              Addr     fp_at_call_insn,
1849                              Addr     ip_post_call_insn,
1850                              XArray*  descrs_at_call_insn )
1851 {
1852    StackFrame *callee, *caller;
1853    tl_assert(is_sane_TId(tid));
1854 
1855    caller = shadowStacks[tid];
1856    tl_assert(caller);
1857 
1858    if (caller->outer) { /* "this is not the outermost frame" */
1859       tl_assert(caller->outer->inner == caller);
1860       tl_assert(caller->outer->depth >= 0);
1861       tl_assert(1 + caller->outer->depth == caller->depth);
1862    } else {
1863       tl_assert(caller->depth == 0);
1864    }
1865 
1866    caller->sp_at_call = sp_at_call_insn;
1867    caller->fp_at_call = fp_at_call_insn;
1868 
1869    if (descrs_at_call_insn) {
1870       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1871       caller->blocks_added_by_call
1872          = calculate_StackBlock_EAs( descrs_at_call_insn,
1873                                      sp_at_call_insn, fp_at_call_insn );
1874       if (caller->blocks_added_by_call)
1875          add_blocks_to_StackTree( siTrees[tid],
1876                                   descrs_at_call_insn,
1877                                   caller->blocks_added_by_call,
1878                                   caller->depth /* stack depth at which
1879                                                    these blocks are
1880                                                    considered to exist*/ );
1881       if (1) {
1882          UWord s  = VG_(sizeFM)( siTrees[tid] );
1883          UWord g  = VG_(sizeFM)( giTree );
1884          Bool  sb = s > stats__max_sitree_size;
1885          Bool  gb = g > stats__max_gitree_size;
1886          if (sb) stats__max_sitree_size = s;
1887          if (gb) stats__max_gitree_size = g;
1888          if (0 && (sb || gb))
1889             VG_(message)(Vg_DebugMsg,
1890                          "exp-sgcheck: new max tree sizes: "
1891                          "StackTree %ld, GlobalTree %ld\n",
1892                          stats__max_sitree_size, stats__max_gitree_size );
1893       }
1894    } else {
1895       caller->blocks_added_by_call = NULL;
1896    }
1897 
1898    /* caller->blocks_added_by_call is used again (and then freed) when
1899       this frame is removed from the stack. */
1900 
1901    if (caller->inner) {
1902       callee = caller->inner;
1903    } else {
1904       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1905       VG_(memset)(callee, 0, sizeof(StackFrame));
1906       callee->outer = caller;
1907       caller->inner = callee;
1908       callee->depth = 1 + caller->depth;
1909       tl_assert(callee->inner == NULL);
1910    }
1911 
1912    /* This sets up .htab, .htab_size and .htab_used */
1913    initialise_II_hash_table( callee );
1914 
1915    callee->creation_sp    = sp_post_call_insn;
1916    callee->sp_at_call     = 0; // not actually required ..
1917    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1918    callee->blocks_added_by_call = NULL; // .. just for cleanness
1919 
1920    /* record the new running stack frame */
1921    shadowStacks[tid] = callee;
1922 
1923    /* and this thread's query cache is now invalid */
1924    QCache__invalidate( &qcaches[tid] );
1925 
1926    if (0)
1927    { Word d = callee->depth;
1928      HChar fnname[80];
1929      Bool ok;
1930      Addr ip = ip_post_call_insn;
1931      ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1932      while (d > 0) {
1933         VG_(printf)(" ");
1934         d--;
1935      }
1936      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1937    }
1938 }
1939 
1940 /* CALLED FROM GENERATED CODE */
1941 static
1942 VG_REGPARM(3)
helperc__new_frame(Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * blocks_at_call_insn,Word sp_adjust)1943 void helperc__new_frame ( Addr sp_post_call_insn,
1944                           Addr fp_at_call_insn,
1945                           Addr ip_post_call_insn,
1946                           XArray* blocks_at_call_insn,
1947                           Word sp_adjust )
1948 {
1949    ThreadId tid = VG_(get_running_tid)();
1950    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1951    shadowStack_new_frame( tid,
1952                           sp_at_call_insn,
1953                           sp_post_call_insn,
1954                           fp_at_call_insn,
1955                           ip_post_call_insn,
1956                           blocks_at_call_insn );
1957 }
1958 
1959 
1960 ////////////////////////////////////////
1961 /* Primary remove-frame(s) routine.  Called indirectly from
1962    generated code. */
1963 
1964 __attribute__((noinline))
shadowStack_unwind(ThreadId tid,Addr sp_now)1965 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1966 {
1967    StackFrame *innermost, *innermostOrig;
1968    tl_assert(is_sane_TId(tid));
1969    innermost = shadowStacks[tid];
1970    tl_assert(innermost);
1971    innermostOrig = innermost;
1972    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1973    while (1) {
1974       if (!innermost->outer)
1975          break;
1976       if (innermost->inner)
1977          tl_assert(innermost->inner->outer == innermost);
1978       tl_assert(innermost->outer->inner == innermost);
1979       tl_assert(innermost->blocks_added_by_call == NULL);
1980       if (sp_now <= innermost->creation_sp) break;
1981       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1982       tl_assert(innermost->htab);
1983       if (innermost->htab != &innermost->htab_fixed[0])
1984          sg_free(innermost->htab);
1985       /* be on the safe side */
1986       innermost->creation_sp = 0;
1987       innermost->htab = NULL;
1988       innermost->htab_size = 0;
1989       innermost->htab_used = 0;
1990       innermost->sp_at_call = 0;
1991       innermost->fp_at_call = 0;
1992       innermost->blocks_added_by_call = NULL;
1993       innermost = innermost->outer;
1994 
1995       /* So now we're "back" in the calling frame.  Remove from this
1996          thread's stack-interval-tree, the blocks added at the time of
1997          the call. */
1998 
1999       if (innermost->outer) { /* not at the outermost frame */
2000          if (innermost->blocks_added_by_call == NULL) {
2001          } else {
2002             del_blocks_from_StackTree( siTrees[tid],
2003                                        innermost->blocks_added_by_call );
2004             VG_(deleteXA)( innermost->blocks_added_by_call );
2005             innermost->blocks_added_by_call = NULL;
2006          }
2007       }
2008       /* That completes the required tidying of the interval tree
2009          associated with the frame we just removed. */
2010 
2011       if (0) {
2012          Word d = innermost->depth;
2013          while (d > 0) {
2014             VG_(printf)(" ");
2015             d--;
2016          }
2017          VG_(printf)("X\n");
2018       }
2019 
2020    }
2021 
2022    tl_assert(innermost);
2023 
2024    if (innermost != innermostOrig) {
2025       shadowStacks[tid] = innermost;
2026       /* this thread's query cache is now invalid */
2027       QCache__invalidate( &qcaches[tid] );
2028    }
2029 }
2030 
2031 
2032 
2033 //////////////////////////////////////////////////////////////
2034 //                                                          //
2035 // Instrumentation                                          //
2036 //                                                          //
2037 //////////////////////////////////////////////////////////////
2038 
2039 /* What does instrumentation need to do?
2040 
2041    - at each Call transfer, generate a call to shadowStack_new_frame
2042      do this by manually inspecting the IR
2043 
2044    - at each sp change, if the sp change is negative,
2045      call shadowStack_unwind
2046      do this by asking for SP-change analysis
2047 
2048    - for each memory referencing instruction,
2049      call helperc__mem_access
2050 */
2051 
2052 /* A complication: sg_ instrumentation and h_ instrumentation need to
2053    be interleaved.  Since the latter is a lot more complex than the
2054    former, we split the sg_ instrumentation here into four functions
2055    and let the h_ instrumenter call the four functions as it goes.
2056    Hence the h_ instrumenter drives the sg_ instrumenter.
2057 
2058    To make this viable, the sg_ instrumenter carries what running
2059    state it needs in 'struct _SGEnv'.  This is exported only
2060    abstractly from this file.
2061 */
2062 
2063 struct _SGEnv {
2064    /* the current insn's IP */
2065    Addr64 curr_IP;
2066    /* whether the above is actually known */
2067    Bool curr_IP_known;
2068    /* if we find a mem ref, is it the first for this insn?  Used for
2069       detecting insns which make more than one memory ref, a situation
2070       we basically can't really handle properly; and so we ignore all
2071       but the first ref. */
2072    Bool firstRef;
2073    /* READONLY */
2074    IRTemp (*newIRTemp_cb)(IRType,void*);
2075    void* newIRTemp_opaque;
2076 };
2077 
2078 
2079 /* --- Helper fns for instrumentation --- */
2080 
gen_Get_SP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2081 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2082                            IRSB*           bbOut,
2083                            VexGuestLayout* layout,
2084                            Int             hWordTy_szB )
2085 {
2086    IRExpr* sp_expr;
2087    IRTemp  sp_temp;
2088    IRType  sp_type;
2089    /* This in effect forces the host and guest word sizes to be the
2090       same. */
2091    tl_assert(hWordTy_szB == layout->sizeof_SP);
2092    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2093    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2094    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2095    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2096    return sp_temp;
2097 }
2098 
gen_Get_FP(struct _SGEnv * sge,IRSB * bbOut,VexGuestLayout * layout,Int hWordTy_szB)2099 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2100                            IRSB*           bbOut,
2101                            VexGuestLayout* layout,
2102                            Int             hWordTy_szB )
2103 {
2104    IRExpr* fp_expr;
2105    IRTemp  fp_temp;
2106    IRType  fp_type;
2107    /* This in effect forces the host and guest word sizes to be the
2108       same. */
2109    tl_assert(hWordTy_szB == layout->sizeof_SP);
2110    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2111    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2112    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2113    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2114    return fp_temp;
2115 }
2116 
instrument_mem_access(struct _SGEnv * sge,IRSB * bbOut,IRExpr * addr,Int szB,Bool isStore,Int hWordTy_szB,Addr curr_IP,VexGuestLayout * layout)2117 static void instrument_mem_access ( struct _SGEnv* sge,
2118                                     IRSB*   bbOut,
2119                                     IRExpr* addr,
2120                                     Int     szB,
2121                                     Bool    isStore,
2122                                     Int     hWordTy_szB,
2123                                     Addr    curr_IP,
2124                                     VexGuestLayout* layout )
2125 {
2126    IRType  tyAddr      = Ity_INVALID;
2127    XArray* frameBlocks = NULL;
2128 
2129    tl_assert(isIRAtom(addr));
2130    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2131 
2132    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2133    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2134 
2135 #if defined(VGA_x86)
2136    { UChar* p = (UChar*)curr_IP;
2137      // pop %ebp; RET
2138      if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2139      // pop %ebp; RET $imm16
2140      if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2141      // PUSH %EBP; mov %esp,%ebp
2142      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2143    }
2144 #endif
2145 
2146    /* First off, find or create the StackBlocks for this instruction. */
2147    frameBlocks = get_StackBlocks_for_IP( curr_IP );
2148    tl_assert(frameBlocks);
2149    //if (VG_(sizeXA)( frameBlocks ) == 0)
2150    //   frameBlocks = NULL;
2151 
2152    /* Generate a call to "helperc__mem_access", passing:
2153          addr current_SP current_FP szB curr_IP frameBlocks
2154    */
2155    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2156      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2157      IRExpr** args
2158         = mkIRExprVec_6( addr,
2159                          IRExpr_RdTmp(t_SP),
2160                          IRExpr_RdTmp(t_FP),
2161                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
2162                          mkIRExpr_HWord( curr_IP ),
2163                          mkIRExpr_HWord( (HWord)frameBlocks ) );
2164      IRDirty* di
2165         = unsafeIRDirty_0_N( 3/*regparms*/,
2166                              "helperc__mem_access",
2167                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2168                              args );
2169 
2170      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2171    }
2172 }
2173 
2174 
2175 /* --- Instrumentation main (4 fns) --- */
2176 
sg_instrument_init(IRTemp (* newIRTemp_cb)(IRType,void *),void * newIRTemp_opaque)2177 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2178                                      void* newIRTemp_opaque )
2179 {
2180    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2181                                    sizeof(struct _SGEnv));
2182    tl_assert(env);
2183    env->curr_IP          = 0;
2184    env->curr_IP_known    = False;
2185    env->firstRef         = True;
2186    env->newIRTemp_cb     = newIRTemp_cb;
2187    env->newIRTemp_opaque = newIRTemp_opaque;
2188    return env;
2189 }
2190 
sg_instrument_fini(struct _SGEnv * env)2191 void sg_instrument_fini ( struct _SGEnv * env )
2192 {
2193    sg_free(env);
2194 }
2195 
2196 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2197    as required.  This must be called before 'st' itself is added to
2198    'sbOut'. */
sg_instrument_IRStmt(struct _SGEnv * env,IRSB * sbOut,IRStmt * st,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2199 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2200                             /*MOD*/IRSB* sbOut,
2201                             IRStmt* st,
2202                             VexGuestLayout* layout,
2203                             IRType gWordTy, IRType hWordTy )
2204 {
2205    if (!sg_clo_enable_sg_checks)
2206       return;
2207 
2208    tl_assert(st);
2209    tl_assert(isFlatIRStmt(st));
2210    switch (st->tag) {
2211       case Ist_NoOp:
2212       case Ist_AbiHint:
2213       case Ist_Put:
2214       case Ist_PutI:
2215       case Ist_MBE:
2216          /* None of these can contain any memory references. */
2217          break;
2218 
2219       case Ist_Exit:
2220          tl_assert(st->Ist.Exit.jk != Ijk_Call);
2221          /* else we must deal with a conditional call */
2222          break;
2223 
2224       case Ist_IMark:
2225          env->curr_IP_known = True;
2226          env->curr_IP       = (Addr)st->Ist.IMark.addr;
2227          env->firstRef      = True;
2228          break;
2229 
2230       case Ist_Store:
2231          tl_assert(env->curr_IP_known);
2232          if (env->firstRef) {
2233             instrument_mem_access(
2234                env, sbOut,
2235                st->Ist.Store.addr,
2236                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2237                True/*isStore*/,
2238                sizeofIRType(hWordTy),
2239                env->curr_IP, layout
2240             );
2241             env->firstRef = False;
2242          }
2243          break;
2244 
2245       case Ist_WrTmp: {
2246          IRExpr* data = st->Ist.WrTmp.data;
2247          if (data->tag == Iex_Load) {
2248             tl_assert(env->curr_IP_known);
2249             if (env->firstRef) {
2250                instrument_mem_access(
2251                   env, sbOut,
2252                   data->Iex.Load.addr,
2253                   sizeofIRType(data->Iex.Load.ty),
2254                   False/*!isStore*/,
2255                   sizeofIRType(hWordTy),
2256                   env->curr_IP, layout
2257                );
2258                env->firstRef = False;
2259             }
2260          }
2261          break;
2262       }
2263 
2264       case Ist_Dirty: {
2265          Int      dataSize;
2266          IRDirty* d = st->Ist.Dirty.details;
2267          if (d->mFx != Ifx_None) {
2268             /* This dirty helper accesses memory.  Collect the
2269                details. */
2270             tl_assert(env->curr_IP_known);
2271             if (env->firstRef) {
2272                tl_assert(d->mAddr != NULL);
2273                tl_assert(d->mSize != 0);
2274                dataSize = d->mSize;
2275                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2276                   instrument_mem_access(
2277                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2278                      sizeofIRType(hWordTy), env->curr_IP, layout
2279                   );
2280                }
2281                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2282                   instrument_mem_access(
2283                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2284                      sizeofIRType(hWordTy), env->curr_IP, layout
2285                   );
2286                }
2287                env->firstRef = False;
2288             }
2289          } else {
2290             tl_assert(d->mAddr == NULL);
2291             tl_assert(d->mSize == 0);
2292          }
2293          break;
2294       }
2295 
2296       case Ist_CAS: {
2297          /* We treat it as a read and a write of the location.  I
2298             think that is the same behaviour as it was before IRCAS
2299             was introduced, since prior to that point, the Vex front
2300             ends would translate a lock-prefixed instruction into a
2301             (normal) read followed by a (normal) write. */
2302          if (env->firstRef) {
2303             Int    dataSize;
2304             IRCAS* cas = st->Ist.CAS.details;
2305             tl_assert(cas->addr != NULL);
2306             tl_assert(cas->dataLo != NULL);
2307             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2308             if (cas->dataHi != NULL)
2309                dataSize *= 2; /* since it's a doubleword-CAS */
2310             instrument_mem_access(
2311                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2312                sizeofIRType(hWordTy), env->curr_IP, layout
2313             );
2314             instrument_mem_access(
2315                env, sbOut, cas->addr, dataSize, True/*isStore*/,
2316                sizeofIRType(hWordTy), env->curr_IP, layout
2317             );
2318             env->firstRef = False;
2319          }
2320          break;
2321       }
2322 
2323       default:
2324          tl_assert(0);
2325 
2326    } /* switch (st->tag) */
2327 }
2328 
2329 
2330 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2331    possibly modify 'env' as required.  This must be the last
2332    instrumentation statement in the block. */
sg_instrument_final_jump(struct _SGEnv * env,IRSB * sbOut,IRExpr * next,IRJumpKind jumpkind,VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2333 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2334                                 /*MOD*/IRSB* sbOut,
2335                                 IRExpr* next,
2336                                 IRJumpKind jumpkind,
2337                                 VexGuestLayout* layout,
2338                                 IRType gWordTy, IRType hWordTy )
2339 {
2340    if (!sg_clo_enable_sg_checks)
2341       return;
2342 
2343    if (jumpkind == Ijk_Call) {
2344       // Assumes x86 or amd64
2345       IRTemp   sp_post_call_insn, fp_post_call_insn;
2346       XArray*  frameBlocks;
2347       IRExpr** args;
2348       IRDirty* di;
2349       sp_post_call_insn
2350          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2351       fp_post_call_insn
2352          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2353       tl_assert(env->curr_IP_known);
2354       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2355       tl_assert(frameBlocks);
2356       if (VG_(sizeXA)(frameBlocks) == 0)
2357          frameBlocks = NULL;
2358       args
2359          = mkIRExprVec_5(
2360               IRExpr_RdTmp(sp_post_call_insn),
2361               IRExpr_RdTmp(fp_post_call_insn),
2362                          /* assume the call doesn't change FP */
2363               next,
2364               mkIRExpr_HWord( (HWord)frameBlocks ),
2365               mkIRExpr_HWord( sizeofIRType(gWordTy) )
2366            );
2367       di = unsafeIRDirty_0_N(
2368               3/*regparms*/,
2369               "helperc__new_frame",
2370               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2371               args );
2372       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2373    }
2374 }
2375 
2376 
2377 //////////////////////////////////////////////////////////////
2378 //                                                          //
2379 // end Instrumentation                                      //
2380 //                                                          //
2381 //////////////////////////////////////////////////////////////
2382 
2383 
2384 //////////////////////////////////////////////////////////////
2385 //                                                          //
2386 // misc                                                     //
2387 //                                                          //
2388 //////////////////////////////////////////////////////////////
2389 
2390 /* Make a new empty stack frame that is suitable for being the
2391    outermost frame in a stack.  It has a creation_sp of effectively
2392    infinity, so it can never be removed. */
new_root_StackFrame(void)2393 static StackFrame* new_root_StackFrame ( void )
2394 {
2395    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2396    VG_(memset)( sframe, 0, sizeof(*sframe) );
2397    sframe->creation_sp = ~0UL;
2398 
2399    /* This sets up .htab, .htab_size and .htab_used */
2400    initialise_II_hash_table( sframe );
2401 
2402    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2403 
2404    return sframe;
2405 }
2406 
2407 /* Primary routine for setting up the shadow stack for a new thread.
2408    Note that this is used to create not only child thread stacks, but
2409    the root thread's stack too.  We create a new stack with
2410    .creation_sp set to infinity, so that the outermost frame can never
2411    be removed (by shadowStack_unwind).  The core calls this function
2412    as soon as a thread is created.  We cannot yet get its SP value,
2413    since that may not yet be set. */
shadowStack_thread_create(ThreadId parent,ThreadId child)2414 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2415 {
2416    tl_assert(is_sane_TId(child));
2417    if (parent == VG_INVALID_THREADID) {
2418       /* creating the main thread's stack */
2419    } else {
2420       tl_assert(is_sane_TId(parent));
2421       tl_assert(parent != child);
2422       tl_assert(shadowStacks[parent] != NULL);
2423       tl_assert(siTrees[parent] != NULL);
2424    }
2425 
2426    /* Create the child's stack.  Bear in mind we may be re-using
2427       it. */
2428    if (shadowStacks[child] == NULL) {
2429       /* First use of this stack.  Just allocate an initial frame. */
2430       tl_assert(siTrees[child] == NULL);
2431    } else {
2432       StackFrame *frame, *frame2;
2433       /* re-using a stack. */
2434       /* get rid of the interval tree */
2435       tl_assert(siTrees[child] != NULL);
2436       delete_StackTree( siTrees[child] );
2437       siTrees[child] = NULL;
2438       /* Throw away all existing frames. */
2439       frame = shadowStacks[child];
2440       while (frame->outer)
2441          frame = frame->outer;
2442       tl_assert(frame->depth == 0);
2443       while (frame) {
2444          frame2 = frame->inner;
2445          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2446          sg_free(frame);
2447          frame = frame2;
2448       }
2449       shadowStacks[child] = NULL;
2450    }
2451 
2452    tl_assert(shadowStacks[child] == NULL);
2453    tl_assert(siTrees[child] == NULL);
2454 
2455    /* Set up the initial stack frame. */
2456    shadowStacks[child] = new_root_StackFrame();
2457 
2458    /* and set up the child's stack block interval tree. */
2459    siTrees[child] = new_StackTree();
2460 }
2461 
2462 /* Once a thread is ready to go, the core calls here.  We take the
2463    opportunity to push a second frame on its stack, with the
2464    presumably valid SP value that is going to be used for the thread's
2465    startup.  Hence we should always wind up with a valid outermost
2466    frame for the thread. */
shadowStack_set_initial_SP(ThreadId tid)2467 static void shadowStack_set_initial_SP ( ThreadId tid )
2468 {
2469    StackFrame* sf;
2470    tl_assert(is_sane_TId(tid));
2471    sf = shadowStacks[tid];
2472    tl_assert(sf != NULL);
2473    tl_assert(sf->outer == NULL);
2474    tl_assert(sf->inner == NULL);
2475    tl_assert(sf->creation_sp == ~0UL);
2476    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2477                                0, VG_(get_IP)(tid), NULL );
2478 }
2479 
2480 
2481 //////////////////////////////////////////////////////////////
2482 //                                                          //
2483 // main-ish                                                 //
2484 //                                                          //
2485 //////////////////////////////////////////////////////////////
2486 
2487 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2488    sp-change analysis, as requested in pc_pre_clo_int(). */
sg_die_mem_stack(Addr old_SP,SizeT len)2489 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2490    ThreadId  tid = VG_(get_running_tid)();
2491    shadowStack_unwind( tid, old_SP+len );
2492 }
2493 
sg_pre_clo_init(void)2494 void sg_pre_clo_init ( void ) {
2495    ourGlobals_init();
2496    init_StackBlocks_set();
2497    init_GlobalBlock_set();
2498 }
2499 
sg_post_clo_init(void)2500 void sg_post_clo_init ( void ) {
2501 }
2502 
sg_pre_thread_ll_create(ThreadId parent,ThreadId child)2503 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2504    shadowStack_thread_create(parent, child);
2505 }
2506 
sg_pre_thread_first_insn(ThreadId tid)2507 void sg_pre_thread_first_insn ( ThreadId tid ) {
2508    shadowStack_set_initial_SP(tid);
2509 }
2510 
sg_fini(Int exitcode)2511 void sg_fini(Int exitcode)
2512 {
2513    if (VG_(clo_stats)) {
2514       VG_(message)(Vg_DebugMsg,
2515          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2516       VG_(message)(Vg_DebugMsg,
2517          " sg_:     stack0: %'12llu classify\n",
2518          stats__classify_Stack0);
2519       VG_(message)(Vg_DebugMsg,
2520          " sg_:     stackN: %'12llu classify\n",
2521          stats__classify_StackN);
2522       VG_(message)(Vg_DebugMsg,
2523          " sg_:     global: %'12llu classify\n",
2524          stats__classify_Global);
2525       VG_(message)(Vg_DebugMsg,
2526          " sg_:    unknown: %'12llu classify\n",
2527          stats__classify_Unknown);
2528       VG_(message)(Vg_DebugMsg,
2529          " sg_:  %'llu Invars preened, of which %'llu changed\n",
2530          stats__Invars_preened, stats__Invars_changed);
2531       VG_(message)(Vg_DebugMsg,
2532          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2533       VG_(message)(Vg_DebugMsg,
2534          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2535          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2536       VG_(message)(Vg_DebugMsg,
2537          " sg_:  htab-fast: %'llu hits\n",
2538          stats__htab_fast);
2539       VG_(message)(Vg_DebugMsg,
2540          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2541          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2542    }
2543 }
2544 
2545 
2546 /*--------------------------------------------------------------------*/
2547 /*--- end                                                sg_main.c ---*/
2548 /*--------------------------------------------------------------------*/
2549