• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Ptrcheck: a pointer-use checker.                             ---*/
4 /*--- This file checks stack and global array accesses.            ---*/
5 /*---                                                    sg_main.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Ptrcheck, a Valgrind tool for checking pointer
10    use in programs.
11 
12    Copyright (C) 2008-2015 OpenWorks Ltd
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #include "pub_tool_basics.h"
39 #include "pub_tool_libcbase.h"
40 #include "pub_tool_libcassert.h"
41 #include "pub_tool_libcprint.h"
42 #include "pub_tool_tooliface.h"
43 #include "pub_tool_wordfm.h"
44 #include "pub_tool_xarray.h"
45 #include "pub_tool_threadstate.h"
46 #include "pub_tool_mallocfree.h"
47 #include "pub_tool_machine.h"
48 #include "pub_tool_debuginfo.h"
49 #include "pub_tool_options.h"
50 
51 #include "pc_common.h"
52 
53 #include "sg_main.h"      // self
54 
55 
56 static
57 void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58 
59 
60 //////////////////////////////////////////////////////////////
61 //                                                          //
62 // Basic Stuff                                              //
63 //                                                          //
64 //////////////////////////////////////////////////////////////
65 
is_sane_TId(ThreadId tid)66 static inline Bool is_sane_TId ( ThreadId tid )
67 {
68    return tid >= 0 && tid < VG_N_THREADS
69           && tid != VG_INVALID_THREADID;
70 }
71 
sg_malloc(const HChar * cc,SizeT n)72 static void* sg_malloc ( const HChar* cc, SizeT n ) {
73    void* p;
74    tl_assert(n > 0);
75    p = VG_(malloc)( cc, n );
76    return p;
77 }
78 
sg_free(void * p)79 static void sg_free ( void* p ) {
80    tl_assert(p);
81    VG_(free)(p);
82 }
83 
84 
85 /* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
86    first interval is lower, 1 if the first interval is higher, and 0
87    if there is any overlap.  Redundant paranoia with casting is there
88    following what looked distinctly like a bug in gcc-4.1.2, in which
89    some of the comparisons were done signedly instead of
90    unsignedly. */
91 inline
cmp_nonempty_intervals(Addr a1,SizeT n1,Addr a2,SizeT n2)92 static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
93                                      Addr a2, SizeT n2 ) {
94    UWord a1w = (UWord)a1;
95    UWord n1w = (UWord)n1;
96    UWord a2w = (UWord)a2;
97    UWord n2w = (UWord)n2;
98    tl_assert(n1w > 0 && n2w > 0);
99    if (a1w + n1w <= a2w) return -1L;
100    if (a2w + n2w <= a1w) return 1L;
101    return 0;
102 }
103 
104 /* Return true iff [aSmall,aSmall+nSmall) is entirely contained
105    within [aBig,aBig+nBig). */
106 inline
is_subinterval_of(Addr aBig,SizeT nBig,Addr aSmall,SizeT nSmall)107 static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
108                                 Addr aSmall, SizeT nSmall ) {
109    tl_assert(nBig > 0 && nSmall > 0);
110    return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
111 }
112 
113 inline
Addr__min(Addr a1,Addr a2)114 static Addr Addr__min ( Addr a1, Addr a2 ) {
115    return a1 < a2 ? a1 : a2;
116 }
117 
118 inline
Addr__max(Addr a1,Addr a2)119 static Addr Addr__max ( Addr a1, Addr a2 ) {
120    return a1 < a2 ? a2 : a1;
121 }
122 
123 
124 //////////////////////////////////////////////////////////////
125 //                                                          //
126 // StackBlocks Persistent Cache                             //
127 //                                                          //
128 //////////////////////////////////////////////////////////////
129 
130 /* We maintain a set of XArray* of StackBlocks.  These are never
131    freed.  When a new StackBlock vector is acquired from
132    VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
133    If not present, it is added.  If present, the just-acquired one is
134    freed and the copy used.
135 
136    This simplifies storage management elsewhere.  It allows us to
137    assume that a pointer to an XArray* of StackBlock is valid forever.
138    It also means there are no duplicates anywhere, which could be
139    important from a space point of view for programs that generate a
140    lot of translations, or where translations are frequently discarded
141    and re-made.
142 
143    Note that we normalise the arrays by sorting the elements according
144    to an arbitrary total order, so as to avoid the situation that two
145    vectors describe the same set of variables but are not structurally
146    identical. */
147 
StackBlock__sane(const StackBlock * fb)148 static inline Bool StackBlock__sane ( const StackBlock* fb )
149 {
150    if (fb->name[ sizeof(fb->name)-1 ] != 0)
151       return False;
152    if (fb->spRel != False && fb->spRel != True)
153       return False;
154    if (fb->isVec != False && fb->isVec != True)
155       return False;
156    return True;
157 }
158 
159 /* Generate an arbitrary total ordering on StackBlocks. */
StackBlock__cmp(const StackBlock * fb1,const StackBlock * fb2)160 static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
161 {
162    Word r;
163    tl_assert(StackBlock__sane(fb1));
164    tl_assert(StackBlock__sane(fb2));
165    /* Hopefully the .base test hits most of the time.  For the blocks
166       associated with any particular instruction, if the .base values
167       are the same then probably it doesn't make sense for the other
168       fields to be different.  But this is supposed to be a completely
169       general structural total order, so we have to compare everything
170       anyway. */
171    if (fb1->base < fb2->base) return -1;
172    if (fb1->base > fb2->base) return 1;
173    /* compare sizes */
174    if (fb1->szB < fb2->szB) return -1;
175    if (fb1->szB > fb2->szB) return 1;
176    /* compare sp/fp flag */
177    if (fb1->spRel < fb2->spRel) return -1;
178    if (fb1->spRel > fb2->spRel) return 1;
179    /* compare is/is-not array-typed flag */
180    if (fb1->isVec < fb2->isVec) return -1;
181    if (fb1->isVec > fb2->isVec) return 1;
182    /* compare the name */
183    r = (Word)VG_(strcmp)(fb1->name, fb2->name);
184    return r;
185 }
186 
187 /* Returns True if all fields except .szB are the same.  szBs may or
188    may not be the same; they are simply not consulted. */
StackBlock__all_fields_except_szB_are_equal(StackBlock * fb1,StackBlock * fb2)189 static Bool StackBlock__all_fields_except_szB_are_equal (
190                StackBlock* fb1,
191                StackBlock* fb2
192             )
193 {
194    tl_assert(StackBlock__sane(fb1));
195    tl_assert(StackBlock__sane(fb2));
196    return fb1->base == fb2->base
197           && fb1->spRel == fb2->spRel
198           && fb1->isVec == fb2->isVec
199           && 0 == VG_(strcmp)(fb1->name, fb2->name);
200 }
201 
202 
203 /* Generate an arbitrary total ordering on vectors of StackBlocks. */
StackBlocks__cmp(XArray * fb1s,XArray * fb2s)204 static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
205 {
206    Word i, r, n1, n2;
207    n1 = VG_(sizeXA)( fb1s );
208    n2 = VG_(sizeXA)( fb2s );
209    if (n1 < n2) return -1;
210    if (n1 > n2) return 1;
211    for (i = 0; i < n1; i++) {
212       StackBlock *fb1, *fb2;
213       fb1 = VG_(indexXA)( fb1s, i );
214       fb2 = VG_(indexXA)( fb2s, i );
215       r = StackBlock__cmp( fb1, fb2 );
216       if (r != 0) return r;
217    }
218    tl_assert(i == n1 && i == n2);
219    return 0;
220 }
221 
pp_StackBlocks(XArray * sbs)222 static void pp_StackBlocks ( XArray* sbs )
223 {
224    Word i, n = VG_(sizeXA)( sbs );
225    VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
226    for (i = 0; i < n; i++) {
227       StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
228       VG_(message)(Vg_DebugMsg,
229          "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
230          sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
231          sb->isVec ? 'Y' : 'N', &sb->name[0]
232       );
233    }
234    VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
235 }
236 
237 
238 /* ---------- The StackBlock vector cache ---------- */
239 
240 static WordFM* /* XArray* of StackBlock -> nothing */
241        frameBlocks_set = NULL;
242 
init_StackBlocks_set(void)243 static void init_StackBlocks_set ( void )
244 {
245    tl_assert(!frameBlocks_set);
246    frameBlocks_set
247       = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
248                     (Word(*)(UWord,UWord))StackBlocks__cmp );
249    tl_assert(frameBlocks_set);
250 }
251 
252 /* Find the given StackBlock-vector in our collection thereof.  If
253    found, deallocate the supplied one, and return the address of the
254    copy.  If not found, add the supplied one to our collection and
255    return its address. */
256 static XArray* /* of StackBlock */
StackBlocks__find_and_dealloc__or_add(XArray * orig)257        StackBlocks__find_and_dealloc__or_add
258           ( XArray* /* of StackBlock */ orig )
259 {
260    UWord key, val;
261 
262    /* First, normalise, as per comments above. */
263    VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
264    VG_(sortXA)( orig );
265 
266    /* Now get rid of any exact duplicates. */
267   nuke_dups:
268    { Word r, w, nEQ, n = VG_(sizeXA)( orig );
269      if (n >= 2) {
270         w = 0;
271         nEQ = 0;
272         for (r = 0; r < n; r++) {
273            if (r+1 < n) {
274               StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
275               StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
276               Word c = StackBlock__cmp(pR0,pR1);
277               tl_assert(c == -1 || c == 0);
278               if (c == 0) { nEQ++; continue; }
279            }
280            if (w != r) {
281               StackBlock* pW = VG_(indexXA)( orig, w );
282               StackBlock* pR = VG_(indexXA)( orig, r );
283               *pW = *pR;
284            }
285            w++;
286         }
287         tl_assert(r == n);
288         tl_assert(w + nEQ == n);
289         if (w < n) {
290            VG_(dropTailXA)( orig, n-w );
291         }
292         if (0) VG_(printf)("delta %ld\n", n-w);
293      }
294    }
295 
296    /* Deal with the following strangeness, where two otherwise
297       identical blocks are claimed to have different sizes.  In which
298       case we use the larger size. */
299    /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
300       StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
301       StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
302    */
303    { Word i, n = VG_(sizeXA)( orig );
304      if (n >= 2) {
305         for (i = 0; i < n-1; i++) {
306            StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
307            StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
308            if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
309               /* They can't be identical because the previous tidying
310                  pass would have removed the duplicates.  And they
311                  can't be > because the earlier sorting pass would
312                  have ordered otherwise-identical descriptors
313                  according to < on .szB fields.  Hence: */
314               tl_assert(sb0->szB < sb1->szB);
315               sb0->szB = sb1->szB;
316               /* This makes the blocks identical, at the size of the
317                  larger one.  Rather than go to all the hassle of
318                  sliding the rest down, simply go back to the
319                  remove-duplicates stage.  The assertion guarantees
320                  that we eventually make progress, since the rm-dups
321                  stage will get rid of one of the blocks.  This is
322                  expected to happen only exceedingly rarely. */
323               tl_assert(StackBlock__cmp(sb0,sb1) == 0);
324               goto nuke_dups;
325            }
326         }
327      }
328    }
329 
330    /* If there are any blocks which overlap and have the same
331       fpRel-ness, junk the whole descriptor; it's obviously bogus.
332       Icc11 certainly generates bogus info from time to time.
333 
334       This check is pretty weak; really we ought to have a stronger
335       sanity check. */
336    { Word i, n = VG_(sizeXA)( orig );
337      static Int moans = 3;
338      for (i = 0; i < n-1; i++) {
339        StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
340        StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
341        if (sb1->spRel == sb2->spRel
342            && (sb1->base >= sb2->base
343                || sb1->base + sb1->szB > sb2->base)) {
344           if (moans > 0 && !VG_(clo_xml)) {
345              moans--;
346              VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
347                                       "overlapping stack blocks\n");
348              if (VG_(clo_verbosity) >= 2)
349                 pp_StackBlocks(orig);
350              if (moans == 0)
351                 VG_(message)(Vg_UserMsg, "Further instances of this "
352                                          "message will not be shown\n" );
353           }
354           VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
355           break;
356        }
357      }
358    }
359 
360    /* Now, do we have it already? */
361    if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
362       /* yes */
363       XArray* res;
364       tl_assert(val == 0);
365       tl_assert(key != (UWord)orig);
366       VG_(deleteXA)(orig);
367       res = (XArray*)key;
368       return res;
369    } else {
370       /* no */
371       VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
372       return orig;
373    }
374 }
375 
376 /* Top level function for getting the StackBlock vector for a given
377    instruction.  It is guaranteed that the returned pointer will be
378    valid for the entire rest of the run, and also that the addresses
379    of the individual elements of the array will not change. */
380 
get_StackBlocks_for_IP(Addr ip)381 static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
382 {
383    XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
384    tl_assert(blocks);
385    return StackBlocks__find_and_dealloc__or_add( blocks );
386 }
387 
388 
389 //////////////////////////////////////////////////////////////
390 //                                                          //
391 // GlobalBlocks Persistent Cache                            //
392 //                                                          //
393 //////////////////////////////////////////////////////////////
394 
395 /* Generate an arbitrary total ordering on GlobalBlocks. */
GlobalBlock__cmp(GlobalBlock * gb1,GlobalBlock * gb2)396 static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
397 {
398    Word r;
399    /* compare addrs */
400    if (gb1->addr < gb2->addr) return -1;
401    if (gb1->addr > gb2->addr) return 1;
402    /* compare sizes */
403    if (gb1->szB < gb2->szB) return -1;
404    if (gb1->szB > gb2->szB) return 1;
405    /* compare is/is-not array-typed flag */
406    if (gb1->isVec < gb2->isVec) return -1;
407    if (gb1->isVec > gb2->isVec) return 1;
408    /* compare the name */
409    r = (Word)VG_(strcmp)(gb1->name, gb2->name);
410    if (r != 0) return r;
411    /* compare the soname */
412    r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
413    return r;
414 }
415 
416 static WordFM* /* GlobalBlock* -> nothing */
417        globalBlock_set = NULL;
418 
init_GlobalBlock_set(void)419 static void init_GlobalBlock_set ( void )
420 {
421    tl_assert(!globalBlock_set);
422     globalBlock_set
423        = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
424                      (Word(*)(UWord,UWord))GlobalBlock__cmp );
425    tl_assert(globalBlock_set);
426 }
427 
428 
429 /* Top level function for making GlobalBlocks persistent.  Call here
430    with a non-persistent version, and the returned one is guaranteed
431    to be valid for the entire rest of the run.  The supplied one is
432    copied, not stored, so can be freed after the call. */
433 
get_persistent_GlobalBlock(GlobalBlock * orig)434 static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
435 {
436    UWord key, val;
437    /* Now, do we have it already? */
438    if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
439       /* yes, return the copy */
440       GlobalBlock* res;
441       tl_assert(val == 0);
442       res = (GlobalBlock*)key;
443       tl_assert(res != orig);
444       return res;
445    } else {
446       /* no.  clone it, store the clone and return the clone's
447          address. */
448       GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
449                                       sizeof(GlobalBlock) );
450       tl_assert(clone);
451       *clone = *orig;
452       VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
453       return clone;
454    }
455 }
456 
457 
458 //////////////////////////////////////////////////////////////
459 //                                                          //
460 // Interval tree of StackTreeBlock                          //
461 //                                                          //
462 //////////////////////////////////////////////////////////////
463 
464 /* A node in a stack interval tree.  Zero length intervals (.szB == 0)
465    are not allowed.
466 
467    A stack interval tree is a (WordFM StackTreeNode* void).  There is
468    one stack interval tree for each thread.
469 */
470 typedef
471    struct {
472       Addr        addr;
473       SizeT       szB;   /* copied from .descr->szB */
474       StackBlock* descr; /* it's an instance of this block */
475       UWord       depth; /* depth of stack at time block was pushed */
476    }
477    StackTreeNode;
478 
pp_StackTree(WordFM * sitree,const HChar * who)479 static void pp_StackTree ( WordFM* sitree, const HChar* who )
480 {
481    UWord keyW, valW;
482    VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
483    VG_(initIterFM)( sitree );
484    while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
485       StackTreeNode* nd = (StackTreeNode*)keyW;
486       VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
487                   nd->descr, nd->descr->name, nd->descr->szB);
488    }
489    VG_(printf)(">>> END   pp_StackTree %s\n", who );
490 }
491 
492 /* Interval comparison function for StackTreeNode */
cmp_intervals_StackTreeNode(StackTreeNode * sn1,StackTreeNode * sn2)493 static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
494                                           StackTreeNode* sn2 )
495 {
496    return cmp_nonempty_intervals(sn1->addr, sn1->szB,
497                                  sn2->addr, sn2->szB);
498 }
499 
500 /* Find the node holding 'a', if any. */
find_StackTreeNode(WordFM * sitree,Addr a)501 static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
502 {
503    UWord keyW, valW;
504    StackTreeNode key;
505    tl_assert(sitree);
506    key.addr = a;
507    key.szB  = 1;
508    if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
509       StackTreeNode* res = (StackTreeNode*)keyW;
510       tl_assert(valW == 0);
511       tl_assert(res != &key);
512       return res;
513    } else {
514       return NULL;
515    }
516 }
517 
518 /* Note that the supplied XArray of FrameBlock must have been
519    made persistent already. */
520 __attribute__((noinline))
add_blocks_to_StackTree(WordFM * sitree,XArray * descrs,XArray * bases,UWord depth)521 static void add_blocks_to_StackTree (
522                /*MOD*/WordFM* sitree,
523                XArray* /* FrameBlock */ descrs,
524                XArray* /* Addr */ bases,
525                UWord depth
526             )
527 {
528    Bool debug = (Bool)0;
529    Word i, nDescrs, nBases;
530 
531    nDescrs = VG_(sizeXA)( descrs ),
532    nBases = VG_(sizeXA)( bases );
533    tl_assert(nDescrs == nBases);
534 
535    if (nDescrs == 0) return;
536 
537    tl_assert(sitree);
538    if (debug) {
539       VG_(printf)("\ndepth = %lu\n", depth);
540       pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
541       pp_StackBlocks(descrs);
542    }
543 
544    for (i = 0; i < nDescrs; i++) {
545       Bool already_present;
546       StackTreeNode* nyu;
547       Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
548       StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
549       tl_assert(descr->szB > 0);
550       nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
551       nyu->addr  = addr;
552       nyu->szB   = descr->szB;
553       nyu->descr = descr;
554       nyu->depth = depth;
555       if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
556       already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
557       /* The interval can't already be there; else we have
558          overlapping stack blocks. */
559       tl_assert(!already_present);
560       if (debug) {
561          pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
562       }
563    }
564    if (debug) {
565       pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
566       VG_(printf)("\n");
567    }
568 }
569 
del_blocks_from_StackTree(WordFM * sitree,XArray * bases)570 static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
571                                         XArray* /* Addr */ bases )
572 {
573    UWord oldK, oldV;
574    Word i, nBases = VG_(sizeXA)( bases );
575    for (i = 0; i < nBases; i++) {
576       Bool b;
577       Addr addr = *(Addr*)VG_(indexXA)( bases, i );
578       StackTreeNode* nd = find_StackTreeNode(sitree, addr);
579       /* The interval must be there; we added it earlier when
580          the associated frame was created. */
581       tl_assert(nd);
582       b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
583       /* we just found the block! */
584       tl_assert(b);
585       tl_assert(oldV == 0);
586       tl_assert(nd == (StackTreeNode*)oldK);
587       sg_free(nd);
588    }
589 }
590 
591 
delete_StackTree__kFin(UWord keyW)592 static void delete_StackTree__kFin ( UWord keyW ) {
593    StackTreeNode* nd = (StackTreeNode*)keyW;
594    tl_assert(nd);
595    sg_free(nd);
596 }
delete_StackTree__vFin(UWord valW)597 static void delete_StackTree__vFin ( UWord valW ) {
598    tl_assert(valW == 0);
599 }
delete_StackTree(WordFM * sitree)600 static void delete_StackTree ( WordFM* sitree )
601 {
602    VG_(deleteFM)( sitree,
603                  delete_StackTree__kFin, delete_StackTree__vFin );
604 }
605 
new_StackTree(void)606 static WordFM* new_StackTree ( void ) {
607    return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
608                       (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
609 }
610 
611 
612 //////////////////////////////////////////////////////////////
613 //                                                          //
614 // Interval tree of GlobalTreeBlock                         //
615 //                                                          //
616 //////////////////////////////////////////////////////////////
617 
618 /* A node in a global interval tree.  Zero length intervals
619    (.szB == 0) are not allowed.
620 
621    A global interval tree is a (WordFM GlobalTreeNode* void).  There
622    is one global interval tree for the entire process.
623 */
624 typedef
625    struct {
626       Addr         addr; /* copied from .descr->addr */
627       SizeT        szB; /* copied from .descr->szB */
628       GlobalBlock* descr; /* it's this block */
629    }
630    GlobalTreeNode;
631 
GlobalTreeNode__pp(GlobalTreeNode * nd)632 static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
633    tl_assert(nd->descr);
634    VG_(printf)("GTNode [%#lx,+%lu) %s",
635                nd->addr, nd->szB, nd->descr->name);
636 }
637 
GlobalTree__pp(WordFM * gitree,const HChar * who)638 static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
639                              const HChar* who )
640 {
641    UWord keyW, valW;
642    GlobalTreeNode* nd;
643    VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
644    VG_(initIterFM)( gitree );
645    while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
646       tl_assert(valW == 0);
647       nd = (GlobalTreeNode*)keyW;
648       VG_(printf)("  ");
649       GlobalTreeNode__pp(nd);
650       VG_(printf)("\n");
651    }
652    VG_(doneIterFM)( gitree );
653    VG_(printf)(">>>\n");
654 }
655 
656 /* Interval comparison function for GlobalTreeNode */
cmp_intervals_GlobalTreeNode(GlobalTreeNode * gn1,GlobalTreeNode * gn2)657 static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
658                                            GlobalTreeNode* gn2 )
659 {
660    return cmp_nonempty_intervals( gn1->addr, gn1->szB,
661                                   gn2->addr, gn2->szB );
662 }
663 
664 /* Find the node holding 'a', if any. */
find_GlobalTreeNode(WordFM * gitree,Addr a)665 static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
666 {
667    UWord keyW, valW;
668    GlobalTreeNode key;
669    key.addr = a;
670    key.szB  = 1;
671    if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
672       GlobalTreeNode* res = (GlobalTreeNode*)keyW;
673       tl_assert(valW == 0);
674       tl_assert(res != &key);
675       return res;
676    } else {
677       return NULL;
678    }
679 }
680 
681 /* Note that the supplied GlobalBlock must have been made persistent
682    already. */
add_block_to_GlobalTree(WordFM * gitree,GlobalBlock * descr)683 static void add_block_to_GlobalTree (
684                /*MOD*/WordFM* gitree,
685                GlobalBlock* descr
686             )
687 {
688    Bool already_present;
689    GlobalTreeNode *nyu, *nd;
690    UWord keyW, valW;
691    static Int moans = 3;
692 
693    tl_assert(descr->szB > 0);
694    nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
695    nyu->addr  = descr->addr;
696    nyu->szB   = descr->szB;
697    nyu->descr = descr;
698 
699    /* Basically it's an error to add a global block to the tree that
700       is already in the tree.  However, detect and ignore attempts to
701       insert exact duplicates; they do appear for some reason
702       (possible a bug in m_debuginfo?) */
703    already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
704    if (already_present) {
705       tl_assert(valW == 0);
706       nd = (GlobalTreeNode*)keyW;
707       tl_assert(nd);
708       tl_assert(nd != nyu);
709       tl_assert(nd->descr);
710       tl_assert(nyu->descr);
711       if (nd->addr == nyu->addr && nd->szB == nyu->szB
712           /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
713           /* Although it seems reasonable to demand that duplicate
714              blocks have identical names, that is too strict.  For
715              example, reading debuginfo from glibc produces two
716              otherwise identical blocks with names "tzname" and
717              "__tzname".  A constraint of the form "must be identical,
718              or one must be a substring of the other" would fix that.
719              However, such trickery is scuppered by the fact that we
720              truncate all variable names to 15 characters to make
721              storage management simpler, hence giving pairs like
722              "__EI___pthread_" (truncated) vs "__pthread_keys".  So
723              it's simplest just to skip the name comparison
724              completely. */
725           && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
726          /* exact duplicate; ignore it */
727          sg_free(nyu);
728          return;
729       }
730       /* else fall through; the assertion below will catch it */
731    }
732 
733    already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
734    /* The interval can't already be there; else we have
735       overlapping global blocks. */
736    /* Unfortunately (25 Jan 09) at least icc11 has been seen to
737       generate overlapping block descriptions in the Dwarf3; clearly
738       bogus. */
739    if (already_present && moans > 0 && !VG_(clo_xml)) {
740       moans--;
741       VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
742                                "overlapping global blocks\n");
743       if (VG_(clo_verbosity) >= 2) {
744          GlobalTree__pp( gitree,
745                          "add_block_to_GlobalTree: non-exact duplicate" );
746          VG_(printf)("Overlapping block: ");
747          GlobalTreeNode__pp(nyu);
748          VG_(printf)("\n");
749       }
750       if (moans == 0)
751          VG_(message)(Vg_UserMsg, "Further instances of this "
752                                   "message will not be shown\n" );
753    }
754    /* tl_assert(!already_present); */
755 }
756 
del_GlobalTree_range(WordFM * gitree,Addr a,SizeT szB)757 static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
758                                    Addr a, SizeT szB )
759 {
760    /* One easy way to do this: look up [a,a+szB) in the tree.  That
761       will either succeed, producing a block which intersects that
762       range, in which case we delete it and repeat; or it will fail,
763       in which case there are no blocks intersecting the range, and we
764       can bring the process to a halt. */
765    UWord keyW, valW, oldK, oldV;
766    GlobalTreeNode key, *nd;
767    Bool b, anyFound;
768 
769    tl_assert(szB > 0);
770 
771    anyFound = False;
772 
773    key.addr = a;
774    key.szB  = szB;
775 
776    while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
777       anyFound = True;
778       nd = (GlobalTreeNode*)keyW;
779       tl_assert(valW == 0);
780       tl_assert(nd != &key);
781       tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
782 
783       b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
784       tl_assert(b);
785       tl_assert(oldV == 0);
786       tl_assert(oldK == keyW); /* check we deleted the node we just found */
787    }
788 
789    return anyFound;
790 }
791 
792 
793 //////////////////////////////////////////////////////////////
794 //                                                          //
795 // Invar                                                    //
796 //                                                          //
797 //////////////////////////////////////////////////////////////
798 
799 /* An invariant, as resulting from watching the destination of a
800    memory referencing instruction.  Initially is Inv_Unset until the
801    instruction makes a first access. */
802 
803 typedef
804    enum {
805       Inv_Unset=1,  /* not established yet */
806       Inv_Unknown,  /* unknown location */
807       Inv_Stack0,   /* array-typed stack block in innermost frame */
808       Inv_StackN,   /* array-typed stack block in non-innermost frame */
809       Inv_Global,   /* array-typed global block */
810    }
811    InvarTag;
812 
813 typedef
814    struct {
815       InvarTag tag;
816       union {
817          struct {
818          } Unset;
819          struct {
820          } Unknown;
821          struct {
822             Addr  addr;
823             SizeT szB;
824             StackBlock* descr;
825          } Stack0; /* innermost stack frame */
826          struct {
827             /* Pointer to a node in the interval tree for
828               this thread. */
829             StackTreeNode* nd;
830          } StackN; /* non-innermost stack frame */
831          struct {
832            /* Pointer to a GlobalBlock in the interval tree of
833               global blocks. */
834            GlobalTreeNode* nd;
835          } Global;
836       }
837       Inv;
838    }
839    Invar;
840 
841 /* Partial debugging printing for an Invar. */
pp_Invar(Invar * i)842 static void pp_Invar ( Invar* i )
843 {
844    switch (i->tag) {
845       case Inv_Unset:
846          VG_(printf)("Unset");
847          break;
848       case Inv_Unknown:
849          VG_(printf)("Unknown");
850          break;
851       case Inv_Stack0:
852          VG_(printf)("Stack0 [%#lx,+%lu)",
853                      i->Inv.Stack0.addr, i->Inv.Stack0.szB);
854          break;
855       case Inv_StackN:
856          VG_(printf)("StackN [%#lx,+%lu)",
857                      i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
858          break;
859       case Inv_Global:
860          VG_(printf)("Global [%#lx,+%lu)",
861                      i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
862          break;
863       default:
864          tl_assert(0);
865    }
866 }
867 
868 /* Compare two Invars for equality. */
eq_Invar(Invar * i1,Invar * i2)869 static Bool eq_Invar ( Invar* i1, Invar* i2 )
870 {
871    if (i1->tag != i2->tag)
872       return False;
873    switch (i1->tag) {
874       case Inv_Unset:
875          return True;
876       case Inv_Unknown:
877          return True;
878       case Inv_Stack0:
879          return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
880                 && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
881       case Inv_StackN:
882          return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
883       case Inv_Global:
884          return i1->Inv.Global.nd == i2->Inv.Global.nd;
885       default:
886          tl_assert(0);
887    }
888    /*NOTREACHED*/
889    tl_assert(0);
890 }
891 
892 /* Generate a piece of text showing 'ea' is relative to 'invar', if
893    known.  If unknown, generate an empty string.  'buf' must be at
894    least 32 bytes in size.  Also return the absolute value of the
895    delta, if known, or zero if not known.
896 */
gen_delta_str(HChar * buf,UWord * absDelta,Invar * inv,Addr ea)897 static void gen_delta_str ( /*OUT*/HChar* buf,
898                             /*OUT*/UWord* absDelta,
899                             Invar* inv, Addr ea )
900 {
901    Addr  block = 0;
902    SizeT szB   = 0;
903 
904    buf[0] = 0;
905    *absDelta = 0;
906 
907    switch (inv->tag) {
908       case Inv_Unknown:
909       case Inv_Unset:
910          return; /* unknown */
911       case Inv_Stack0:
912          block = inv->Inv.Stack0.addr;
913          szB   = inv->Inv.Stack0.szB;
914          break;
915       case Inv_StackN:
916          block = inv->Inv.StackN.nd->addr;
917          szB   = inv->Inv.StackN.nd->szB;
918          break;
919       case Inv_Global:
920          block = inv->Inv.Global.nd->addr;
921          szB = inv->Inv.Global.nd->szB;
922          break;
923       default:
924          tl_assert(0);
925    }
926    tl_assert(szB > 0);
927    if (ea < block) {
928       *absDelta = block - ea;
929       VG_(sprintf)(buf, "%'lu before", *absDelta);
930    }
931    else if (ea >= block + szB) {
932       *absDelta = ea - (block + szB);
933       VG_(sprintf)(buf, "%'lu after", *absDelta);
934    }
935    else {
936      // Leave *absDelta at zero.
937      VG_(sprintf)(buf, "%'lu inside", ea - block);
938    }
939 }
940 
941 
942 /* Print selected parts of an Invar, suitable for use in error
943    messages. */
show_Invar(HChar * buf,Word nBuf,Invar * inv,Word depth)944 static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
945 {
946    const HChar* str;
947    tl_assert(nBuf >= 128);
948    buf[0] = 0;
949    switch (inv->tag) {
950       case Inv_Unknown:
951          VG_(sprintf)(buf, "%s", "unknown");
952          break;
953       case Inv_Stack0:
954          str = "array";
955          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
956                       str, inv->Inv.Stack0.descr->name,
957                       inv->Inv.Stack0.szB );
958          break;
959       case Inv_StackN:
960          str = "array";
961          VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
962                       str, inv->Inv.StackN.nd->descr->name,
963                            inv->Inv.StackN.nd->descr->szB,
964                            depth - inv->Inv.StackN.nd->depth );
965          break;
966       case Inv_Global:
967          str = "array";
968          VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
969                       str, inv->Inv.Global.nd->descr->name,
970                            inv->Inv.Global.nd->descr->szB,
971                            inv->Inv.Global.nd->descr->soname );
972          break;
973       case Inv_Unset:
974          VG_(sprintf)(buf, "%s", "Unset!");
975          break;
976       default:
977          tl_assert(0);
978    }
979 }
980 
981 
982 //////////////////////////////////////////////////////////////
983 //                                                          //
984 // our globals                                              //
985 //                                                          //
986 //////////////////////////////////////////////////////////////
987 
988 //////////////////////////////////////////////////////////////
989 ///
990 
991 #define N_QCACHE 16
992 
993 /* Powers of two only, else the result will be chaos */
994 #define QCACHE_ADVANCE_EVERY 16
995 
996 /* Per-thread query cache.  Note that the invar can only be Inv_StackN
997    (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
998 typedef
999    struct {
1000       Addr  addr;
1001       SizeT szB;
1002       Invar inv;
1003    }
1004    QCElem;
1005 
1006 typedef
1007    struct {
1008       Word   nInUse;
1009       QCElem elems[N_QCACHE];
1010    }
1011    QCache;
1012 
QCache__invalidate(QCache * qc)1013 static void QCache__invalidate ( QCache* qc ) {
1014    tl_assert(qc->nInUse >= 0);
1015    qc->nInUse = 0;
1016 }
1017 
QCache__pp(QCache * qc,const HChar * who)1018 static void QCache__pp ( QCache* qc, const HChar* who )
1019 {
1020    Word i;
1021    VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1022    for (i = 0; i < qc->nInUse; i++) {
1023       VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1024       pp_Invar(&qc->elems[i].inv);
1025       VG_(printf)("\n");
1026    }
1027    VG_(printf)(">>>\n");
1028 }
1029 
1030 static ULong stats__qcache_queries = 0;
1031 static ULong stats__qcache_misses  = 0;
1032 static ULong stats__qcache_probes  = 0;
1033 
1034 ///
1035 //////////////////////////////////////////////////////////////
1036 
1037 /* Each thread has:
1038    * a shadow stack of StackFrames, which is a double-linked list
1039    * an stack block interval tree
1040 */
1041 static  struct _StackFrame**         shadowStacks;
1042 
1043 static  WordFM** /* StackTreeNode */ siTrees;
1044 
1045 static  QCache*                      qcaches;
1046 
1047 
1048 /* Additionally, there is one global variable interval tree
1049    for the entire process.
1050 */
1051 static WordFM* /* GlobalTreeNode */ giTree;
1052 
1053 
invalidate_all_QCaches(void)1054 static void invalidate_all_QCaches ( void )
1055 {
1056    Word i;
1057    for (i = 0; i < VG_N_THREADS; i++) {
1058       QCache__invalidate( &qcaches[i] );
1059    }
1060 }
1061 
ourGlobals_init(void)1062 static void ourGlobals_init ( void )
1063 {
1064    Word i;
1065 
1066    shadowStacks = sg_malloc( "di.sg_main.oGi.2",
1067                              VG_N_THREADS * sizeof shadowStacks[0] );
1068    siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
1069    qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
1070 
1071    for (i = 0; i < VG_N_THREADS; i++) {
1072       shadowStacks[i] = NULL;
1073       siTrees[i] = NULL;
1074       qcaches[i] = (QCache){};
1075    }
1076    invalidate_all_QCaches();
1077    giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1078                         (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1079 }
1080 
1081 
1082 //////////////////////////////////////////////////////////////
1083 //                                                          //
1084 // Handle global variable load/unload events                //
1085 //                                                          //
1086 //////////////////////////////////////////////////////////////
1087 
acquire_globals(ULong di_handle)1088 static void acquire_globals ( ULong di_handle )
1089 {
1090    Word n, i;
1091    XArray* /* of GlobalBlock */ gbs;
1092    if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1093    gbs = VG_(di_get_global_blocks_from_dihandle)
1094             (di_handle, True/*arrays only*/);
1095    if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1096 
1097    n = VG_(sizeXA)( gbs );
1098    for (i = 0; i < n; i++) {
1099       GlobalBlock* gbp;
1100       GlobalBlock* gb = VG_(indexXA)( gbs, i );
1101       if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1102                          gb->szB, gb->addr, gb->soname, gb->name );
1103       tl_assert(gb->szB > 0);
1104       /* Make a persistent copy of each GlobalBlock, and add it
1105          to the tree. */
1106       gbp = get_persistent_GlobalBlock( gb );
1107       add_block_to_GlobalTree( giTree, gbp );
1108    }
1109 
1110    VG_(deleteXA)( gbs );
1111 }
1112 
1113 
1114 /* We only intercept these two because we need to see any di_handles
1115    that might arise from the mappings/allocations. */
sg_new_mem_mmap(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1116 void sg_new_mem_mmap( Addr a, SizeT len,
1117                       Bool rr, Bool ww, Bool xx, ULong di_handle )
1118 {
1119    if (di_handle > 0)
1120       acquire_globals(di_handle);
1121 }
sg_new_mem_startup(Addr a,SizeT len,Bool rr,Bool ww,Bool xx,ULong di_handle)1122 void sg_new_mem_startup( Addr a, SizeT len,
1123                          Bool rr, Bool ww, Bool xx, ULong di_handle )
1124 {
1125    if (di_handle > 0)
1126       acquire_globals(di_handle);
1127 }
sg_die_mem_munmap(Addr a,SizeT len)1128 void sg_die_mem_munmap ( Addr a, SizeT len )
1129 {
1130    Bool debug = (Bool)0;
1131    Bool overlap = False;
1132 
1133    if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1134 
1135    if (len == 0)
1136       return;
1137 
1138    overlap = del_GlobalTree_range(giTree, a, len);
1139 
1140    { /* redundant sanity check */
1141      UWord keyW, valW;
1142      VG_(initIterFM)( giTree );
1143      while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1144        GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1145         tl_assert(valW == 0);
1146         tl_assert(nd->szB > 0);
1147         tl_assert(nd->addr + nd->szB <= a
1148                   || a + len <= nd->addr);
1149      }
1150      VG_(doneIterFM)( giTree );
1151    }
1152 
1153    if (!overlap)
1154       return;
1155 
1156    /* Ok, the range contained some blocks.  Therefore we'll need to
1157       visit all the Invars in all the thread shadow stacks, and
1158       convert all Inv_Global entries that intersect [a,a+len) to
1159       Inv_Unknown. */
1160    tl_assert(len > 0);
1161    preen_global_Invars( a, len );
1162    invalidate_all_QCaches();
1163 }
1164 
1165 
1166 //////////////////////////////////////////////////////////////
1167 //                                                          //
1168 // StackFrame                                               //
1169 //                                                          //
1170 //////////////////////////////////////////////////////////////
1171 
1172 static ULong stats__total_accesses   = 0;
1173 static ULong stats__classify_Stack0  = 0;
1174 static ULong stats__classify_StackN  = 0;
1175 static ULong stats__classify_Global  = 0;
1176 static ULong stats__classify_Unknown = 0;
1177 static ULong stats__Invars_preened   = 0;
1178 static ULong stats__Invars_changed   = 0;
1179 static ULong stats__t_i_b_empty      = 0;
1180 static ULong stats__htab_fast        = 0;
1181 static ULong stats__htab_searches    = 0;
1182 static ULong stats__htab_probes      = 0;
1183 static ULong stats__htab_resizes     = 0;
1184 
1185 
1186 /* A dynamic instance of an instruction */
1187 typedef
1188    struct {
1189       /* IMMUTABLE */
1190       Addr    insn_addr; /* NB! zero means 'not in use' */
1191       XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1192       /* MUTABLE */
1193       Invar invar;
1194    }
1195    IInstance;
1196 
1197 
1198 #define N_HTAB_FIXED 64
1199 
1200 typedef
1201    struct _StackFrame {
1202       /* The sp when the frame was created, so we know when to get rid
1203          of it. */
1204       Addr creation_sp;
1205       /* The stack frames for a thread are arranged as a doubly linked
1206          list.  Obviously the outermost frame in the stack has .outer
1207          as NULL and the innermost in theory has .inner as NULL.
1208          However, when a function returns, we don't delete the
1209          just-vacated StackFrame.  Instead, it is retained in the list
1210          and will be re-used when the next call happens.  This is so
1211          as to avoid constantly having to dynamically allocate and
1212          deallocate frames. */
1213       struct _StackFrame* inner;
1214       struct _StackFrame* outer;
1215       Word depth; /* 0 for outermost; increases inwards */
1216       /* Information for each memory referencing instruction, for this
1217          instantiation of the function.  The iinstances array is
1218          operated as a simple linear-probe hash table, which is
1219          dynamically expanded as necessary.  Once critical thing is
1220          that an IInstance with a .insn_addr of zero is interpreted to
1221          mean that hash table slot is unused.  This means we can't
1222          store an IInstance for address zero. */
1223       /* Note that htab initially points to htab_fixed.  If htab_fixed
1224          turns out not to be big enough then htab is made to point to
1225          dynamically allocated memory.  But it's often the case that
1226          htab_fixed is big enough, so this optimisation saves a huge
1227          number of sg_malloc/sg_free call pairs. */
1228       IInstance* htab;
1229       UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1230       UWord      htab_used; /* number of hash table slots currently in use */
1231       /* If this frame is currently making a call, then the following
1232          are relevant. */
1233       Addr sp_at_call;
1234       Addr fp_at_call;
1235       XArray* /* of Addr */ blocks_added_by_call;
1236       /* See comment just above */
1237       IInstance htab_fixed[N_HTAB_FIXED];
1238    }
1239    StackFrame;
1240 
1241 
1242 
1243 
1244 
1245 /* Move this somewhere else? */
1246 /* Visit all Invars in the entire system.  If 'isHeap' is True, change
1247    all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1248    'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1249    instead. */
1250 
1251 __attribute__((noinline))
preen_global_Invar(Invar * inv,Addr a,SizeT len)1252 static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1253 {
1254    stats__Invars_preened++;
1255    tl_assert(len > 0);
1256    tl_assert(inv);
1257    switch (inv->tag) {
1258       case Inv_Global:
1259          tl_assert(inv->Inv.Global.nd);
1260          tl_assert(inv->Inv.Global.nd->szB > 0);
1261          if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1262                             inv->Inv.Global.nd->addr,
1263                             inv->Inv.Global.nd->szB);
1264          if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1265                                                  inv->Inv.Global.nd->szB)) {
1266             inv->tag = Inv_Unknown;
1267             stats__Invars_changed++;
1268          }
1269          break;
1270       case Inv_Stack0:
1271       case Inv_StackN:
1272       case Inv_Unknown:
1273          break;
1274       case Inv_Unset: /* this should never happen */
1275          /* fallthrough */
1276       default:
1277          tl_assert(0);
1278    }
1279 }
1280 
1281 __attribute__((noinline))
preen_global_Invars(Addr a,SizeT len)1282 static void preen_global_Invars ( Addr a, SizeT len )
1283 {
1284    Int         i;
1285    UWord       u;
1286    StackFrame* frame;
1287    tl_assert(len > 0);
1288    for (i = 0; i < VG_N_THREADS; i++) {
1289       frame = shadowStacks[i];
1290       if (!frame)
1291          continue; /* no frames for this thread */
1292       /* start from the innermost frame */
1293       while (frame->inner)
1294          frame = frame->inner;
1295       tl_assert(frame->outer);
1296       /* work through the frames from innermost to outermost.  The
1297          order isn't important; we just need to ensure we visit each
1298          frame once (including those which are not actually active,
1299          more 'inner' than the 'innermost active frame', viz, just
1300          hanging around waiting to be used, when the current innermost
1301          active frame makes more calls.  See comments on definition of
1302          struct _StackFrame. */
1303       for (; frame; frame = frame->outer) {
1304          UWord xx = 0; /* sanity check only; count of used htab entries */
1305          if (!frame->htab)
1306             continue; /* frame not in use.  See shadowStack_unwind(). */
1307          for (u = 0; u < frame->htab_size; u++) {
1308             IInstance* ii = &frame->htab[u];
1309             if (ii->insn_addr == 0)
1310                continue; /* not in use */
1311             if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1312             preen_global_Invar( &ii->invar, a, len );
1313             xx++;
1314          }
1315          tl_assert(xx == frame->htab_used);
1316       }
1317    }
1318 }
1319 
1320 
1321 /* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1322    of the ip are guaranteed to be zero */
compute_II_hash(Addr ip,UWord htab_size)1323 inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1324    return (ip >> 0) & (htab_size - 1);
1325 }
1326 
1327 __attribute__((noinline))
initialise_II_hash_table(StackFrame * sf)1328 static void initialise_II_hash_table ( StackFrame* sf )
1329 {
1330    UWord i;
1331    sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1332    sf->htab = &sf->htab_fixed[0];
1333    tl_assert(sf->htab);
1334    sf->htab_used = 0;
1335    for (i = 0; i < sf->htab_size; i++)
1336       sf->htab[i].insn_addr = 0; /* NOT IN USE */
1337 }
1338 
1339 
1340 __attribute__((noinline))
resize_II_hash_table(StackFrame * sf)1341 static void resize_II_hash_table ( StackFrame* sf )
1342 {
1343    UWord     i, j, ix, old_size, new_size;
1344    IInstance *old_htab, *new_htab, *old;
1345 
1346    tl_assert(sf && sf->htab);
1347    old_size = sf->htab_size;
1348    new_size = 2 * old_size;
1349    old_htab = sf->htab;
1350    new_htab = sg_malloc( "di.sg_main.rIht.1",
1351                          new_size * sizeof(IInstance) );
1352    for (i = 0; i < new_size; i++) {
1353       new_htab[i].insn_addr = 0; /* NOT IN USE */
1354    }
1355    for (i = 0; i < old_size; i++) {
1356       old = &old_htab[i];
1357       if (old->insn_addr == 0 /* NOT IN USE */)
1358          continue;
1359       ix = compute_II_hash(old->insn_addr, new_size);
1360       /* find out where to put this, in the new table */
1361       j = new_size;
1362       while (1) {
1363          if (new_htab[ix].insn_addr == 0)
1364             break;
1365          /* This can't ever happen, because it would mean the new
1366             table is full; that isn't allowed -- even the old table is
1367             only allowed to become half full. */
1368          tl_assert(j > 0);
1369          j--;
1370          ix++; if (ix == new_size) ix = 0;
1371       }
1372       /* copy the old entry to this location */
1373       tl_assert(ix < new_size);
1374       tl_assert(new_htab[ix].insn_addr == 0);
1375       new_htab[ix] = *old;
1376       tl_assert(new_htab[ix].insn_addr != 0);
1377    }
1378    /* all entries copied; free old table. */
1379    if (old_htab != &sf->htab_fixed[0])
1380       sg_free(old_htab);
1381    sf->htab = new_htab;
1382    sf->htab_size = new_size;
1383    /* check sf->htab_used is correct.  Optional and a bit expensive
1384       but anyway: */
1385    j = 0;
1386    for (i = 0; i < new_size; i++) {
1387       if (new_htab[i].insn_addr != 0) {
1388          j++;
1389       }
1390    }
1391    tl_assert(j == sf->htab_used);
1392    if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1393 }
1394 
1395 
1396 __attribute__((noinline))
find_or_create_IInstance_SLOW(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1397 static IInstance* find_or_create_IInstance_SLOW (
1398                      StackFrame* sf,
1399                      Addr ip,
1400                      XArray* /* StackBlock */ ip_frameblocks
1401                   )
1402 {
1403    UWord i, ix;
1404 
1405    stats__htab_searches++;
1406 
1407    tl_assert(sf);
1408    tl_assert(sf->htab);
1409 
1410    /* Make sure the table loading doesn't get too high. */
1411    if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1412       stats__htab_resizes++;
1413       resize_II_hash_table(sf);
1414    }
1415    tl_assert(2 * sf->htab_used <= sf->htab_size);
1416 
1417    ix = compute_II_hash(ip, sf->htab_size);
1418    i = sf->htab_size;
1419    while (1) {
1420       stats__htab_probes++;
1421       /* Note that because of the way the fast-case handler works,
1422          these two tests are actually redundant in the first iteration
1423          of this loop.  (Except they aren't redundant if the code just
1424          above resized the table first. :-) */
1425       if (sf->htab[ix].insn_addr == ip)
1426          return &sf->htab[ix];
1427       if (sf->htab[ix].insn_addr == 0)
1428          break;
1429       /* If i ever gets to zero and we have found neither what we're
1430          looking for nor an empty slot, the table must be full.  Which
1431          isn't possible -- we monitor the load factor to ensure it
1432          doesn't get above say 50%; if that ever does happen the table
1433          is resized. */
1434       tl_assert(i > 0);
1435       i--;
1436       ix++;
1437       if (ix == sf->htab_size) ix = 0;
1438    }
1439 
1440    /* So now we've found a free slot at ix, and we can use that. */
1441    tl_assert(sf->htab[ix].insn_addr == 0);
1442 
1443    /* Add a new record in this slot. */
1444    tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1445    sf->htab[ix].insn_addr = ip;
1446    sf->htab[ix].blocks    = ip_frameblocks;
1447    sf->htab[ix].invar.tag = Inv_Unset;
1448    sf->htab_used++;
1449    return &sf->htab[ix];
1450 }
1451 
1452 
1453 inline
find_or_create_IInstance(StackFrame * sf,Addr ip,XArray * ip_frameblocks)1454 static IInstance* find_or_create_IInstance (
1455                      StackFrame* sf,
1456                      Addr ip,
1457                      XArray* /* StackBlock */ ip_frameblocks
1458                   )
1459 {
1460    UWord ix = compute_II_hash(ip, sf->htab_size);
1461    /* Is it in the first slot we come to? */
1462    if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1463       stats__htab_fast++;
1464       return &sf->htab[ix];
1465    }
1466    /* If the first slot we come to is empty, bag it. */
1467    if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1468       stats__htab_fast++;
1469       tl_assert(ip != 0);
1470       sf->htab[ix].insn_addr = ip;
1471       sf->htab[ix].blocks    = ip_frameblocks;
1472       sf->htab[ix].invar.tag = Inv_Unset;
1473       sf->htab_used++;
1474       return &sf->htab[ix];
1475    }
1476    /* Otherwise we hand off to the slow case, which searches other
1477       slots, and optionally resizes the table if necessary. */
1478    return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1479 }
1480 
1481 
1482 __attribute__((noinline))
calculate_StackBlock_EA(StackBlock * descr,Addr sp,Addr fp)1483 static Addr calculate_StackBlock_EA ( StackBlock* descr,
1484                                       Addr sp, Addr fp ) {
1485    UWord w1 = (UWord)descr->base;
1486    UWord w2 = (UWord)(descr->spRel ? sp : fp);
1487    UWord ea = w1 + w2;
1488    return ea;
1489 }
1490 
1491 /* Given an array of StackBlocks, return an array of Addrs, holding
1492    their effective addresses.  Caller deallocates result array. */
1493 __attribute__((noinline))
calculate_StackBlock_EAs(XArray * blocks,Addr sp,Addr fp)1494 static XArray* /* Addr */ calculate_StackBlock_EAs (
1495                              XArray* /* StackBlock */ blocks,
1496                              Addr sp, Addr fp
1497                           )
1498 {
1499    XArray* res;
1500    Word i, n = VG_(sizeXA)( blocks );
1501    tl_assert(n > 0);
1502    res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1503    for (i = 0; i < n; i++) {
1504       StackBlock* blk = VG_(indexXA)( blocks, i );
1505       Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1506       VG_(addToXA)( res, &ea );
1507    }
1508    return res;
1509 }
1510 
1511 
1512 /* Try to classify the block into which a memory access falls, and
1513    write the result in 'inv'.  This writes all relevant fields of
1514    'inv'. */
1515 __attribute__((noinline))
classify_address(Invar * inv,ThreadId tid,Addr ea,Addr sp,Addr fp,UWord szB,XArray * thisInstrBlocks)1516 static void classify_address ( /*OUT*/Invar* inv,
1517                                ThreadId tid,
1518                                Addr ea, Addr sp, Addr fp,
1519                                UWord szB,
1520                                XArray* /* of StackBlock */ thisInstrBlocks )
1521 {
1522    tl_assert(szB > 0);
1523    /* First, look in the stack blocks accessible in this instruction's
1524       frame. */
1525    {
1526      Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1527      if (nBlocks == 0) stats__t_i_b_empty++;
1528      for (i = 0; i < nBlocks; i++) {
1529         StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1530         Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1531         if (bea <= ea && ea + szB <= bea + descr->szB) {
1532            /* found it */
1533            inv->tag = Inv_Stack0;
1534            inv->Inv.Stack0.addr  = bea;
1535            inv->Inv.Stack0.szB   = descr->szB;
1536            inv->Inv.Stack0.descr = descr;
1537            stats__classify_Stack0++;
1538            return;
1539         }
1540      }
1541    }
1542    /* Look in this thread's query cache */
1543    { Word i;
1544      QCache* cache = &qcaches[tid];
1545      static UWord ctr = 0;
1546      stats__qcache_queries++;
1547      for (i = 0; i < cache->nInUse; i++) {
1548         if (0) /* expensive in a loop like this */
1549                tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1550         stats__qcache_probes++;
1551         if (is_subinterval_of(cache->elems[i].addr,
1552                               cache->elems[i].szB, ea, szB)) {
1553            if (i > 0
1554                && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1555               QCElem tmp;
1556               tmp = cache->elems[i-1];
1557               cache->elems[i-1] = cache->elems[i];
1558               cache->elems[i] = tmp;
1559               i--;
1560            }
1561            *inv = cache->elems[i].inv;
1562            return;
1563         }
1564      }
1565      stats__qcache_misses++;
1566    }
1567    /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1568       in some calling frame?  Consult this thread's stack-block
1569       interval tree to find out. */
1570    { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1571      /* We know that [ea,ea+1) is in the block, but we need to
1572         restrict to the case where the whole access falls within
1573         it. */
1574      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1575         nd = NULL;
1576      }
1577      if (nd) {
1578         /* found it */
1579         inv->tag = Inv_StackN;
1580         inv->Inv.StackN.nd = nd;
1581         stats__classify_StackN++;
1582         goto out;
1583      }
1584    }
1585    /* Not in a stack block.  Try the global pool. */
1586    { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1587      /* We know that [ea,ea+1) is in the block, but we need to
1588         restrict to the case where the whole access falls within
1589         it. */
1590      if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1591         nd = NULL;
1592      }
1593      if (nd) {
1594         /* found it */
1595         inv->tag = Inv_Global;
1596         inv->Inv.Global.nd = nd;
1597         stats__classify_Global++;
1598         goto out;
1599      }
1600    }
1601    /* No idea - give up. */
1602    inv->tag = Inv_Unknown;
1603    stats__classify_Unknown++;
1604 
1605    /* Update the cache */
1606   out:
1607    { Addr    toadd_addr = 0;
1608      SizeT   toadd_szB  = 0;
1609      QCache* cache      = &qcaches[tid];
1610 
1611      static UWord ctr = 0;
1612      Bool show = False;
1613      if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1614 
1615      if (show) QCache__pp(cache, "before upd");
1616 
1617      switch (inv->tag) {
1618         case Inv_Global:
1619            toadd_addr = inv->Inv.Global.nd->addr;
1620            toadd_szB  = inv->Inv.Global.nd->szB;
1621            break;
1622         case Inv_StackN:
1623            toadd_addr = inv->Inv.StackN.nd->addr;
1624            toadd_szB  = inv->Inv.StackN.nd->szB;
1625            break;
1626         case Inv_Unknown: {
1627            /* This is more complex.  We need to figure out the
1628               intersection of the "holes" in the global and stack
1629               interval trees into which [ea,ea+szB) falls.  This is
1630               further complicated by the fact that [ea,ea+szB) might
1631               not fall cleanly into a hole; it may instead fall across
1632               the boundary of a stack or global block.  In that case
1633               we just ignore it and don't update the cache, since we
1634               have no way to represent this situation precisely. */
1635            StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1636            GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1637            Addr gMin, gMax, sMin, sMax, uMin, uMax;
1638            Bool sOK, gOK;
1639            sNegInf.addr = 0;
1640            sNegInf.szB  = 1;
1641            sPosInf.addr = ~(UWord)0;
1642            sPosInf.szB  = 1;
1643            gNegInf.addr = 0;
1644            gNegInf.szB  = 1;
1645            gPosInf.addr = ~(UWord)0;
1646            gPosInf.szB  = 1;
1647            sKey.addr = ea;
1648            sKey.szB  = szB;
1649            gKey.addr = ea;
1650            gKey.szB  = szB;
1651            if (0) VG_(printf)("Tree sizes %lu %lu\n",
1652                               VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1653            sOK = VG_(findBoundsFM)( siTrees[tid],
1654                                     (UWord*)&sLB,    NULL/*unused*/,
1655                                     (UWord*)&sUB,    NULL/*unused*/,
1656                                     (UWord)&sNegInf, 0/*unused*/,
1657                                     (UWord)&sPosInf, 0/*unused*/,
1658                                     (UWord)&sKey );
1659            gOK = VG_(findBoundsFM)( giTree,
1660                                     (UWord*)&gLB,    NULL/*unused*/,
1661                                     (UWord*)&gUB,    NULL/*unused*/,
1662                                     (UWord)&gNegInf, 0/*unused*/,
1663                                     (UWord)&gPosInf, 0/*unused*/,
1664                                     (UWord)&gKey );
1665            if (!(sOK && gOK)) {
1666               /* If this happens, then [ea,ea+szB) partially overlaps
1667                  a heap or stack block.  We can't represent that, so
1668                  just forget it (should be very rare).  However, do
1669                  maximum sanity checks first.  In such a
1670                  partial overlap case, it can't be the case that both
1671                  [ea] and [ea+szB-1] overlap the same block, since if
1672                  that were indeed the case then it wouldn't be a
1673                  partial overlap; rather it would simply fall inside
1674                  that block entirely and we shouldn't be inside this
1675                  conditional at all. */
1676               if (!sOK) {
1677                  StackTreeNode *ndFirst, *ndLast;
1678                  ndFirst = find_StackTreeNode( siTrees[tid], ea );
1679                  ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1680                  /* if both ends of the range fall inside a block,
1681                     they can't be in the same block. */
1682                  if (ndFirst && ndLast)
1683                     tl_assert(ndFirst != ndLast);
1684                  /* for each end of the range, if it is in a block,
1685                     the range as a whole can't be entirely within the
1686                     block. */
1687                  if (ndFirst)
1688                     tl_assert(!is_subinterval_of(ndFirst->addr,
1689                                                  ndFirst->szB, ea, szB));
1690                  if (ndLast)
1691                     tl_assert(!is_subinterval_of(ndLast->addr,
1692                                                  ndLast->szB, ea, szB));
1693               }
1694               if (!gOK) {
1695                  GlobalTreeNode *ndFirst, *ndLast;
1696                  ndFirst = find_GlobalTreeNode( giTree, ea );
1697                  ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1698                  /* if both ends of the range fall inside a block,
1699                     they can't be in the same block. */
1700                  if (ndFirst && ndLast)
1701                     tl_assert(ndFirst != ndLast);
1702                  /* for each end of the range, if it is in a block,
1703                     the range as a whole can't be entirely within the
1704                     block. */
1705                  if (ndFirst)
1706                     tl_assert(!is_subinterval_of(ndFirst->addr,
1707                                                  ndFirst->szB, ea, szB));
1708                  if (ndLast)
1709                     tl_assert(!is_subinterval_of(ndLast->addr,
1710                                                  ndLast->szB, ea, szB));
1711               }
1712               if (0) VG_(printf)("overlapping blocks in cache\n");
1713               return;
1714            }
1715            sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1716            sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1717            gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1718            gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1719            if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1720                               sMin, sMax, gMin, gMax);
1721            /* [sMin,sMax] and [gMin,gMax] must both contain
1722               [ea,ea+szB) (right?)  That implies they must overlap at
1723               at least over [ea,ea+szB). */
1724            tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1725            tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1726            /* So now compute their intersection. */
1727            uMin = Addr__max( sMin, gMin );
1728            uMax = Addr__min( sMax, gMax );
1729            if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1730            tl_assert(uMin <= uMax);
1731            tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1732            /* Finally, we can park [uMin,uMax] in the cache.  However,
1733               if uMax is ~0, we can't represent the difference; hence
1734               fudge uMax. */
1735            if (uMin < uMax && uMax == ~(UWord)0)
1736               uMax--;
1737            toadd_addr = uMin;
1738            toadd_szB  = uMax - uMin + 1;
1739            break;
1740         }
1741         default:
1742            /* We should only be caching info for the above 3 cases */
1743           tl_assert(0);
1744      } /* switch (inv->tag) */
1745 
1746      { /* and actually add this to the cache, finally */
1747        Word i;
1748        Word ip = cache->nInUse / 2; /* doesn't seem critical */
1749 
1750        if (cache->nInUse < N_QCACHE)
1751           cache->nInUse++;
1752        for (i = cache->nInUse-1; i > ip; i--) {
1753           cache->elems[i] = cache->elems[i-1];
1754        }
1755 
1756        tl_assert(toadd_szB > 0);
1757        cache->elems[ip].addr = toadd_addr;
1758        cache->elems[ip].szB  = toadd_szB;
1759        cache->elems[ip].inv  = *inv;
1760      }
1761 
1762      if (show) QCache__pp(cache, "after upd");
1763 
1764    }
1765 }
1766 
1767 
1768 /* CALLED FROM GENERATED CODE */
1769 static
1770 VG_REGPARM(3)
helperc__mem_access(Addr ea,Addr sp,Addr fp,Word sszB,Addr ip,XArray * ip_frameBlocks)1771 void helperc__mem_access ( /* Known only at run time: */
1772                            Addr ea, Addr sp, Addr fp,
1773                            /* Known at translation time: */
1774                            Word sszB, Addr ip, XArray* ip_frameBlocks )
1775 {
1776    UWord szB;
1777    IInstance* iinstance;
1778    Invar* inv;
1779    Invar new_inv;
1780    ThreadId tid = VG_(get_running_tid)();
1781    StackFrame* frame;
1782    HChar bufE[160], bufA[160], bufD[32];
1783 
1784    stats__total_accesses++;
1785 
1786    tl_assert(is_sane_TId(tid));
1787    frame = shadowStacks[tid];
1788    tl_assert(frame);
1789 
1790    /* Find the instance info for this instruction. */
1791    tl_assert(ip_frameBlocks);
1792    iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1793    tl_assert(iinstance);
1794    tl_assert(iinstance->blocks == ip_frameBlocks);
1795 
1796    szB = (sszB < 0) ? (-sszB) : sszB;
1797    tl_assert(szB > 0);
1798 
1799    inv = &iinstance->invar;
1800 
1801    /* Deal with first uses of instruction instances. */
1802    if (inv->tag == Inv_Unset) {
1803       /* This is the first use of this instance of the instruction, so
1804          we can't make any check; we merely record what we saw, so we
1805          can compare it against what happens for 2nd and subsequent
1806          accesses. */
1807       classify_address( inv,
1808                         tid, ea, sp, fp, szB, iinstance->blocks );
1809       tl_assert(inv->tag != Inv_Unset);
1810       return;
1811    }
1812 
1813    /* So generate an Invar and see if it's different from what
1814       we had before. */
1815    classify_address( &new_inv,
1816                      tid, ea, sp, fp, szB, iinstance->blocks );
1817    tl_assert(new_inv.tag != Inv_Unset);
1818 
1819    /* Did we see something different from before?  If no, then there's
1820       no error. */
1821    tl_assert(inv->tag != Inv_Unset);
1822 
1823    if (LIKELY(eq_Invar(&new_inv, inv)))
1824       return;
1825 
1826    VG_(memset)(bufE, 0, sizeof(bufE));
1827    show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1828 
1829    VG_(memset)(bufA, 0, sizeof(bufA));
1830    show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1831 
1832    VG_(memset)(bufD, 0, sizeof(bufD));
1833    UWord absDelta;
1834    gen_delta_str( bufD, &absDelta, inv, ea );
1835 
1836    if (absDelta < 1024)
1837       sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1838 
1839    /* And now install the new observation as "standard", so as to
1840       make future error messages make more sense. */
1841    *inv = new_inv;
1842 }
1843 
1844 
1845 ////////////////////////////////////////
1846 /* Primary push-a-new-frame routine.  Called indirectly from
1847    generated code. */
1848 
1849 static UWord stats__max_sitree_size = 0;
1850 static UWord stats__max_gitree_size = 0;
1851 
1852 static
shadowStack_new_frame(ThreadId tid,Addr sp_at_call_insn,Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * descrs_at_call_insn)1853 void shadowStack_new_frame ( ThreadId tid,
1854                              Addr     sp_at_call_insn,
1855                              Addr     sp_post_call_insn,
1856                              Addr     fp_at_call_insn,
1857                              Addr     ip_post_call_insn,
1858                              XArray*  descrs_at_call_insn )
1859 {
1860    StackFrame *callee, *caller;
1861    tl_assert(is_sane_TId(tid));
1862 
1863    caller = shadowStacks[tid];
1864    tl_assert(caller);
1865 
1866    if (caller->outer) { /* "this is not the outermost frame" */
1867       tl_assert(caller->outer->inner == caller);
1868       tl_assert(caller->outer->depth >= 0);
1869       tl_assert(1 + caller->outer->depth == caller->depth);
1870    } else {
1871       tl_assert(caller->depth == 0);
1872    }
1873 
1874    caller->sp_at_call = sp_at_call_insn;
1875    caller->fp_at_call = fp_at_call_insn;
1876 
1877    if (descrs_at_call_insn) {
1878       tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1879       caller->blocks_added_by_call
1880          = calculate_StackBlock_EAs( descrs_at_call_insn,
1881                                      sp_at_call_insn, fp_at_call_insn );
1882       if (caller->blocks_added_by_call)
1883          add_blocks_to_StackTree( siTrees[tid],
1884                                   descrs_at_call_insn,
1885                                   caller->blocks_added_by_call,
1886                                   caller->depth /* stack depth at which
1887                                                    these blocks are
1888                                                    considered to exist*/ );
1889       if (1) {
1890          UWord s  = VG_(sizeFM)( siTrees[tid] );
1891          UWord g  = VG_(sizeFM)( giTree );
1892          Bool  sb = s > stats__max_sitree_size;
1893          Bool  gb = g > stats__max_gitree_size;
1894          if (sb) stats__max_sitree_size = s;
1895          if (gb) stats__max_gitree_size = g;
1896          if (0 && (sb || gb))
1897             VG_(message)(Vg_DebugMsg,
1898                          "exp-sgcheck: new max tree sizes: "
1899                          "StackTree %lu, GlobalTree %lu\n",
1900                          stats__max_sitree_size, stats__max_gitree_size );
1901       }
1902    } else {
1903       caller->blocks_added_by_call = NULL;
1904    }
1905 
1906    /* caller->blocks_added_by_call is used again (and then freed) when
1907       this frame is removed from the stack. */
1908 
1909    if (caller->inner) {
1910       callee = caller->inner;
1911    } else {
1912       callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1913       VG_(memset)(callee, 0, sizeof(StackFrame));
1914       callee->outer = caller;
1915       caller->inner = callee;
1916       callee->depth = 1 + caller->depth;
1917       tl_assert(callee->inner == NULL);
1918    }
1919 
1920    /* This sets up .htab, .htab_size and .htab_used */
1921    initialise_II_hash_table( callee );
1922 
1923    callee->creation_sp    = sp_post_call_insn;
1924    callee->sp_at_call     = 0; // not actually required ..
1925    callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1926    callee->blocks_added_by_call = NULL; // .. just for cleanness
1927 
1928    /* record the new running stack frame */
1929    shadowStacks[tid] = callee;
1930 
1931    /* and this thread's query cache is now invalid */
1932    QCache__invalidate( &qcaches[tid] );
1933 
1934    if (0)
1935    { Word d = callee->depth;
1936      const HChar *fnname;
1937      Bool ok;
1938      Addr ip = ip_post_call_insn;
1939      ok = VG_(get_fnname_w_offset)( ip, &fnname );
1940      while (d > 0) {
1941         VG_(printf)(" ");
1942         d--;
1943      }
1944      VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1945    }
1946 }
1947 
1948 /* CALLED FROM GENERATED CODE */
1949 static
1950 VG_REGPARM(3)
helperc__new_frame(Addr sp_post_call_insn,Addr fp_at_call_insn,Addr ip_post_call_insn,XArray * blocks_at_call_insn,Word sp_adjust)1951 void helperc__new_frame ( Addr sp_post_call_insn,
1952                           Addr fp_at_call_insn,
1953                           Addr ip_post_call_insn,
1954                           XArray* blocks_at_call_insn,
1955                           Word sp_adjust )
1956 {
1957    ThreadId tid = VG_(get_running_tid)();
1958    Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1959    shadowStack_new_frame( tid,
1960                           sp_at_call_insn,
1961                           sp_post_call_insn,
1962                           fp_at_call_insn,
1963                           ip_post_call_insn,
1964                           blocks_at_call_insn );
1965 }
1966 
1967 
1968 ////////////////////////////////////////
1969 /* Primary remove-frame(s) routine.  Called indirectly from
1970    generated code. */
1971 
1972 __attribute__((noinline))
shadowStack_unwind(ThreadId tid,Addr sp_now)1973 static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1974 {
1975    StackFrame *innermost, *innermostOrig;
1976    tl_assert(is_sane_TId(tid));
1977    innermost = shadowStacks[tid];
1978    tl_assert(innermost);
1979    innermostOrig = innermost;
1980    //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1981    while (1) {
1982       if (!innermost->outer)
1983          break;
1984       if (innermost->inner)
1985          tl_assert(innermost->inner->outer == innermost);
1986       tl_assert(innermost->outer->inner == innermost);
1987       tl_assert(innermost->blocks_added_by_call == NULL);
1988       if (sp_now <= innermost->creation_sp) break;
1989       //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1990       tl_assert(innermost->htab);
1991       if (innermost->htab != &innermost->htab_fixed[0])
1992          sg_free(innermost->htab);
1993       /* be on the safe side */
1994       innermost->creation_sp = 0;
1995       innermost->htab = NULL;
1996       innermost->htab_size = 0;
1997       innermost->htab_used = 0;
1998       innermost->sp_at_call = 0;
1999       innermost->fp_at_call = 0;
2000       innermost->blocks_added_by_call = NULL;
2001       innermost = innermost->outer;
2002 
2003       /* So now we're "back" in the calling frame.  Remove from this
2004          thread's stack-interval-tree, the blocks added at the time of
2005          the call. */
2006 
2007       if (innermost->outer) { /* not at the outermost frame */
2008          if (innermost->blocks_added_by_call == NULL) {
2009          } else {
2010             del_blocks_from_StackTree( siTrees[tid],
2011                                        innermost->blocks_added_by_call );
2012             VG_(deleteXA)( innermost->blocks_added_by_call );
2013             innermost->blocks_added_by_call = NULL;
2014          }
2015       }
2016       /* That completes the required tidying of the interval tree
2017          associated with the frame we just removed. */
2018 
2019       if (0) {
2020          Word d = innermost->depth;
2021          while (d > 0) {
2022             VG_(printf)(" ");
2023             d--;
2024          }
2025          VG_(printf)("X\n");
2026       }
2027 
2028    }
2029 
2030    tl_assert(innermost);
2031 
2032    if (innermost != innermostOrig) {
2033       shadowStacks[tid] = innermost;
2034       /* this thread's query cache is now invalid */
2035       QCache__invalidate( &qcaches[tid] );
2036    }
2037 }
2038 
2039 
2040 
2041 //////////////////////////////////////////////////////////////
2042 //                                                          //
2043 // Instrumentation                                          //
2044 //                                                          //
2045 //////////////////////////////////////////////////////////////
2046 
2047 /* What does instrumentation need to do?
2048 
2049    - at each Call transfer, generate a call to shadowStack_new_frame
2050      do this by manually inspecting the IR
2051 
2052    - at each sp change, if the sp change is negative,
2053      call shadowStack_unwind
2054      do this by asking for SP-change analysis
2055 
2056    - for each memory referencing instruction,
2057      call helperc__mem_access
2058 */
2059 
2060 /* A complication: sg_ instrumentation and h_ instrumentation need to
2061    be interleaved.  Since the latter is a lot more complex than the
2062    former, we split the sg_ instrumentation here into four functions
2063    and let the h_ instrumenter call the four functions as it goes.
2064    Hence the h_ instrumenter drives the sg_ instrumenter.
2065 
2066    To make this viable, the sg_ instrumenter carries what running
2067    state it needs in 'struct _SGEnv'.  This is exported only
2068    abstractly from this file.
2069 */
2070 
2071 struct _SGEnv {
2072    /* the current insn's IP */
2073    Addr curr_IP;
2074    /* whether the above is actually known */
2075    Bool curr_IP_known;
2076    /* if we find a mem ref, is it the first for this insn?  Used for
2077       detecting insns which make more than one memory ref, a situation
2078       we basically can't really handle properly; and so we ignore all
2079       but the first ref. */
2080    Bool firstRef;
2081    /* READONLY */
2082    IRTemp (*newIRTemp_cb)(IRType,void*);
2083    void* newIRTemp_opaque;
2084 };
2085 
2086 
2087 /* --- Helper fns for instrumentation --- */
2088 
gen_Get_SP(struct _SGEnv * sge,IRSB * bbOut,const VexGuestLayout * layout,Int hWordTy_szB)2089 static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2090                            IRSB*           bbOut,
2091                            const VexGuestLayout* layout,
2092                            Int             hWordTy_szB )
2093 {
2094    IRExpr* sp_expr;
2095    IRTemp  sp_temp;
2096    IRType  sp_type;
2097    /* This in effect forces the host and guest word sizes to be the
2098       same. */
2099    tl_assert(hWordTy_szB == layout->sizeof_SP);
2100    sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2101    sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2102    sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2103    addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2104    return sp_temp;
2105 }
2106 
gen_Get_FP(struct _SGEnv * sge,IRSB * bbOut,const VexGuestLayout * layout,Int hWordTy_szB)2107 static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2108                            IRSB*           bbOut,
2109                            const VexGuestLayout* layout,
2110                            Int             hWordTy_szB )
2111 {
2112    IRExpr* fp_expr;
2113    IRTemp  fp_temp;
2114    IRType  fp_type;
2115    /* This in effect forces the host and guest word sizes to be the
2116       same. */
2117    tl_assert(hWordTy_szB == layout->sizeof_SP);
2118    fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2119    fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2120    fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2121    addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2122    return fp_temp;
2123 }
2124 
instrument_mem_access(struct _SGEnv * sge,IRSB * bbOut,IRExpr * addr,Int szB,Bool isStore,Int hWordTy_szB,Addr curr_IP,const VexGuestLayout * layout)2125 static void instrument_mem_access ( struct _SGEnv* sge,
2126                                     IRSB*   bbOut,
2127                                     IRExpr* addr,
2128                                     Int     szB,
2129                                     Bool    isStore,
2130                                     Int     hWordTy_szB,
2131                                     Addr    curr_IP,
2132                                     const VexGuestLayout* layout )
2133 {
2134    IRType  tyAddr      = Ity_INVALID;
2135    XArray* frameBlocks = NULL;
2136 
2137    tl_assert(isIRAtom(addr));
2138    tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2139 
2140    tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2141    tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2142 
2143 #if defined(VGA_x86)
2144    { UChar* p = (UChar*)curr_IP;
2145      // pop %ebp; RET
2146      if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2147      // pop %ebp; RET $imm16
2148      if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2149      // PUSH %EBP; mov %esp,%ebp
2150      if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2151    }
2152 #endif
2153 
2154    /* First off, find or create the StackBlocks for this instruction. */
2155    frameBlocks = get_StackBlocks_for_IP( curr_IP );
2156    tl_assert(frameBlocks);
2157    //if (VG_(sizeXA)( frameBlocks ) == 0)
2158    //   frameBlocks = NULL;
2159 
2160    /* Generate a call to "helperc__mem_access", passing:
2161          addr current_SP current_FP szB curr_IP frameBlocks
2162    */
2163    { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2164      IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2165      IRExpr** args
2166         = mkIRExprVec_6( addr,
2167                          IRExpr_RdTmp(t_SP),
2168                          IRExpr_RdTmp(t_FP),
2169                          mkIRExpr_HWord( isStore ? (-szB) : szB ),
2170                          mkIRExpr_HWord( curr_IP ),
2171                          mkIRExpr_HWord( (HWord)frameBlocks ) );
2172      IRDirty* di
2173         = unsafeIRDirty_0_N( 3/*regparms*/,
2174                              "helperc__mem_access",
2175                             VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2176                              args );
2177 
2178      addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2179    }
2180 }
2181 
2182 
2183 /* --- Instrumentation main (4 fns) --- */
2184 
sg_instrument_init(IRTemp (* newIRTemp_cb)(IRType,void *),void * newIRTemp_opaque)2185 struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2186                                      void* newIRTemp_opaque )
2187 {
2188    struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2189                                    sizeof(struct _SGEnv));
2190    tl_assert(env);
2191    env->curr_IP          = 0;
2192    env->curr_IP_known    = False;
2193    env->firstRef         = True;
2194    env->newIRTemp_cb     = newIRTemp_cb;
2195    env->newIRTemp_opaque = newIRTemp_opaque;
2196    return env;
2197 }
2198 
sg_instrument_fini(struct _SGEnv * env)2199 void sg_instrument_fini ( struct _SGEnv * env )
2200 {
2201    sg_free(env);
2202 }
2203 
2204 /* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2205    as required.  This must be called before 'st' itself is added to
2206    'sbOut'. */
sg_instrument_IRStmt(struct _SGEnv * env,IRSB * sbOut,IRStmt * st,const VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2207 void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2208                             /*MOD*/IRSB* sbOut,
2209                             IRStmt* st,
2210                             const VexGuestLayout* layout,
2211                             IRType gWordTy, IRType hWordTy )
2212 {
2213    if (!sg_clo_enable_sg_checks)
2214       return;
2215 
2216    tl_assert(st);
2217    tl_assert(isFlatIRStmt(st));
2218    switch (st->tag) {
2219       case Ist_NoOp:
2220       case Ist_AbiHint:
2221       case Ist_Put:
2222       case Ist_PutI:
2223       case Ist_MBE:
2224          /* None of these can contain any memory references. */
2225          break;
2226 
2227       case Ist_Exit:
2228          tl_assert(st->Ist.Exit.jk != Ijk_Call);
2229          /* else we must deal with a conditional call */
2230          break;
2231 
2232       case Ist_IMark:
2233          env->curr_IP_known = True;
2234          env->curr_IP       = st->Ist.IMark.addr;
2235          env->firstRef      = True;
2236          break;
2237 
2238       case Ist_Store:
2239          tl_assert(env->curr_IP_known);
2240          if (env->firstRef) {
2241             instrument_mem_access(
2242                env, sbOut,
2243                st->Ist.Store.addr,
2244                sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2245                True/*isStore*/,
2246                sizeofIRType(hWordTy),
2247                env->curr_IP, layout
2248             );
2249             env->firstRef = False;
2250          }
2251          break;
2252 
2253       case Ist_WrTmp: {
2254          IRExpr* data = st->Ist.WrTmp.data;
2255          if (data->tag == Iex_Load) {
2256             tl_assert(env->curr_IP_known);
2257             if (env->firstRef) {
2258                instrument_mem_access(
2259                   env, sbOut,
2260                   data->Iex.Load.addr,
2261                   sizeofIRType(data->Iex.Load.ty),
2262                   False/*!isStore*/,
2263                   sizeofIRType(hWordTy),
2264                   env->curr_IP, layout
2265                );
2266                env->firstRef = False;
2267             }
2268          }
2269          break;
2270       }
2271 
2272       case Ist_Dirty: {
2273          Int      dataSize;
2274          IRDirty* d = st->Ist.Dirty.details;
2275          if (d->mFx != Ifx_None) {
2276             /* This dirty helper accesses memory.  Collect the
2277                details. */
2278             tl_assert(env->curr_IP_known);
2279             if (env->firstRef) {
2280                tl_assert(d->mAddr != NULL);
2281                tl_assert(d->mSize != 0);
2282                dataSize = d->mSize;
2283                if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2284                   instrument_mem_access(
2285                      env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2286                      sizeofIRType(hWordTy), env->curr_IP, layout
2287                   );
2288                }
2289                if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2290                   instrument_mem_access(
2291                      env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2292                      sizeofIRType(hWordTy), env->curr_IP, layout
2293                   );
2294                }
2295                env->firstRef = False;
2296             }
2297          } else {
2298             tl_assert(d->mAddr == NULL);
2299             tl_assert(d->mSize == 0);
2300          }
2301          break;
2302       }
2303 
2304       case Ist_CAS: {
2305          /* We treat it as a read and a write of the location.  I
2306             think that is the same behaviour as it was before IRCAS
2307             was introduced, since prior to that point, the Vex front
2308             ends would translate a lock-prefixed instruction into a
2309             (normal) read followed by a (normal) write. */
2310          if (env->firstRef) {
2311             Int    dataSize;
2312             IRCAS* cas = st->Ist.CAS.details;
2313             tl_assert(cas->addr != NULL);
2314             tl_assert(cas->dataLo != NULL);
2315             dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2316             if (cas->dataHi != NULL)
2317                dataSize *= 2; /* since it's a doubleword-CAS */
2318             instrument_mem_access(
2319                env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2320                sizeofIRType(hWordTy), env->curr_IP, layout
2321             );
2322             instrument_mem_access(
2323                env, sbOut, cas->addr, dataSize, True/*isStore*/,
2324                sizeofIRType(hWordTy), env->curr_IP, layout
2325             );
2326             env->firstRef = False;
2327          }
2328          break;
2329       }
2330 
2331       default:
2332          tl_assert(0);
2333 
2334    } /* switch (st->tag) */
2335 }
2336 
2337 
2338 /* Add instrumentation for the final jump of an IRSB 'sbOut', and
2339    possibly modify 'env' as required.  This must be the last
2340    instrumentation statement in the block. */
sg_instrument_final_jump(struct _SGEnv * env,IRSB * sbOut,IRExpr * next,IRJumpKind jumpkind,const VexGuestLayout * layout,IRType gWordTy,IRType hWordTy)2341 void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2342                                 /*MOD*/IRSB* sbOut,
2343                                 IRExpr* next,
2344                                 IRJumpKind jumpkind,
2345                                 const VexGuestLayout* layout,
2346                                 IRType gWordTy, IRType hWordTy )
2347 {
2348    if (!sg_clo_enable_sg_checks)
2349       return;
2350 
2351    if (jumpkind == Ijk_Call) {
2352       // Assumes x86 or amd64
2353       IRTemp   sp_post_call_insn, fp_post_call_insn;
2354       XArray*  frameBlocks;
2355       IRExpr** args;
2356       IRDirty* di;
2357       sp_post_call_insn
2358          = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2359       fp_post_call_insn
2360          = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2361       tl_assert(env->curr_IP_known);
2362       frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2363       tl_assert(frameBlocks);
2364       if (VG_(sizeXA)(frameBlocks) == 0)
2365          frameBlocks = NULL;
2366       args
2367          = mkIRExprVec_5(
2368               IRExpr_RdTmp(sp_post_call_insn),
2369               IRExpr_RdTmp(fp_post_call_insn),
2370                          /* assume the call doesn't change FP */
2371               next,
2372               mkIRExpr_HWord( (HWord)frameBlocks ),
2373               mkIRExpr_HWord( sizeofIRType(gWordTy) )
2374            );
2375       di = unsafeIRDirty_0_N(
2376               3/*regparms*/,
2377               "helperc__new_frame",
2378               VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2379               args );
2380       addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2381    }
2382 }
2383 
2384 
2385 //////////////////////////////////////////////////////////////
2386 //                                                          //
2387 // end Instrumentation                                      //
2388 //                                                          //
2389 //////////////////////////////////////////////////////////////
2390 
2391 
2392 //////////////////////////////////////////////////////////////
2393 //                                                          //
2394 // misc                                                     //
2395 //                                                          //
2396 //////////////////////////////////////////////////////////////
2397 
2398 /* Make a new empty stack frame that is suitable for being the
2399    outermost frame in a stack.  It has a creation_sp of effectively
2400    infinity, so it can never be removed. */
new_root_StackFrame(void)2401 static StackFrame* new_root_StackFrame ( void )
2402 {
2403    StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2404    VG_(memset)( sframe, 0, sizeof(*sframe) );
2405    sframe->creation_sp = ~0UL;
2406 
2407    /* This sets up .htab, .htab_size and .htab_used */
2408    initialise_II_hash_table( sframe );
2409 
2410    /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2411 
2412    return sframe;
2413 }
2414 
2415 /* Primary routine for setting up the shadow stack for a new thread.
2416    Note that this is used to create not only child thread stacks, but
2417    the root thread's stack too.  We create a new stack with
2418    .creation_sp set to infinity, so that the outermost frame can never
2419    be removed (by shadowStack_unwind).  The core calls this function
2420    as soon as a thread is created.  We cannot yet get its SP value,
2421    since that may not yet be set. */
shadowStack_thread_create(ThreadId parent,ThreadId child)2422 static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2423 {
2424    tl_assert(is_sane_TId(child));
2425    if (parent == VG_INVALID_THREADID) {
2426       /* creating the main thread's stack */
2427    } else {
2428       tl_assert(is_sane_TId(parent));
2429       tl_assert(parent != child);
2430       tl_assert(shadowStacks[parent] != NULL);
2431       tl_assert(siTrees[parent] != NULL);
2432    }
2433 
2434    /* Create the child's stack.  Bear in mind we may be re-using
2435       it. */
2436    if (shadowStacks[child] == NULL) {
2437       /* First use of this stack.  Just allocate an initial frame. */
2438       tl_assert(siTrees[child] == NULL);
2439    } else {
2440       StackFrame *frame, *frame2;
2441       /* re-using a stack. */
2442       /* get rid of the interval tree */
2443       tl_assert(siTrees[child] != NULL);
2444       delete_StackTree( siTrees[child] );
2445       siTrees[child] = NULL;
2446       /* Throw away all existing frames. */
2447       frame = shadowStacks[child];
2448       while (frame->outer)
2449          frame = frame->outer;
2450       tl_assert(frame->depth == 0);
2451       while (frame) {
2452          frame2 = frame->inner;
2453          if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2454          sg_free(frame);
2455          frame = frame2;
2456       }
2457       shadowStacks[child] = NULL;
2458    }
2459 
2460    tl_assert(shadowStacks[child] == NULL);
2461    tl_assert(siTrees[child] == NULL);
2462 
2463    /* Set up the initial stack frame. */
2464    shadowStacks[child] = new_root_StackFrame();
2465 
2466    /* and set up the child's stack block interval tree. */
2467    siTrees[child] = new_StackTree();
2468 }
2469 
2470 /* Once a thread is ready to go, the core calls here.  We take the
2471    opportunity to push a second frame on its stack, with the
2472    presumably valid SP value that is going to be used for the thread's
2473    startup.  Hence we should always wind up with a valid outermost
2474    frame for the thread. */
shadowStack_set_initial_SP(ThreadId tid)2475 static void shadowStack_set_initial_SP ( ThreadId tid )
2476 {
2477    StackFrame* sf;
2478    tl_assert(is_sane_TId(tid));
2479    sf = shadowStacks[tid];
2480    tl_assert(sf != NULL);
2481    tl_assert(sf->outer == NULL);
2482    tl_assert(sf->inner == NULL);
2483    tl_assert(sf->creation_sp == ~0UL);
2484    shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2485                                0, VG_(get_IP)(tid), NULL );
2486 }
2487 
2488 
2489 //////////////////////////////////////////////////////////////
2490 //                                                          //
2491 // main-ish                                                 //
2492 //                                                          //
2493 //////////////////////////////////////////////////////////////
2494 
2495 /* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2496    sp-change analysis, as requested in pc_pre_clo_int(). */
sg_die_mem_stack(Addr old_SP,SizeT len)2497 void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2498    ThreadId  tid = VG_(get_running_tid)();
2499    shadowStack_unwind( tid, old_SP+len );
2500 }
2501 
sg_pre_clo_init(void)2502 void sg_pre_clo_init ( void ) {
2503    ourGlobals_init();
2504    init_StackBlocks_set();
2505    init_GlobalBlock_set();
2506 }
2507 
sg_post_clo_init(void)2508 void sg_post_clo_init ( void ) {
2509 }
2510 
sg_pre_thread_ll_create(ThreadId parent,ThreadId child)2511 void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2512    shadowStack_thread_create(parent, child);
2513 }
2514 
sg_pre_thread_first_insn(ThreadId tid)2515 void sg_pre_thread_first_insn ( ThreadId tid ) {
2516    shadowStack_set_initial_SP(tid);
2517 }
2518 
sg_fini(Int exitcode)2519 void sg_fini(Int exitcode)
2520 {
2521    if (VG_(clo_stats)) {
2522       VG_(message)(Vg_DebugMsg,
2523          " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2524       VG_(message)(Vg_DebugMsg,
2525          " sg_:     stack0: %'12llu classify\n",
2526          stats__classify_Stack0);
2527       VG_(message)(Vg_DebugMsg,
2528          " sg_:     stackN: %'12llu classify\n",
2529          stats__classify_StackN);
2530       VG_(message)(Vg_DebugMsg,
2531          " sg_:     global: %'12llu classify\n",
2532          stats__classify_Global);
2533       VG_(message)(Vg_DebugMsg,
2534          " sg_:    unknown: %'12llu classify\n",
2535          stats__classify_Unknown);
2536       VG_(message)(Vg_DebugMsg,
2537          " sg_:  %'llu Invars preened, of which %'llu changed\n",
2538          stats__Invars_preened, stats__Invars_changed);
2539       VG_(message)(Vg_DebugMsg,
2540          " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2541       VG_(message)(Vg_DebugMsg,
2542          " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2543          stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2544       VG_(message)(Vg_DebugMsg,
2545          " sg_:  htab-fast: %'llu hits\n",
2546          stats__htab_fast);
2547       VG_(message)(Vg_DebugMsg,
2548          " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2549          stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2550    }
2551 }
2552 
2553 
2554 /*--------------------------------------------------------------------*/
2555 /*--- end                                                sg_main.c ---*/
2556 /*--------------------------------------------------------------------*/
2557