• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Take snapshots of client stacks.              m_stacktrace.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2017 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "pub_core_basics.h"
32 #include "pub_core_vki.h"
33 #include "pub_core_threadstate.h"
34 #include "pub_core_debuginfo.h"     // XXX: circular dependency
35 #include "pub_core_aspacemgr.h"     // For VG_(is_addressable)()
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcassert.h"
38 #include "pub_core_libcprint.h"
39 #include "pub_core_machine.h"
40 #include "pub_core_options.h"
41 #include "pub_core_stacks.h"        // VG_(stack_limits)
42 #include "pub_core_stacktrace.h"
43 #include "pub_core_syswrap.h"       // VG_(is_in_syscall)
44 #include "pub_core_xarray.h"
45 #include "pub_core_clientstate.h"   // VG_(client__dl_sysinfo_int80)
46 #include "pub_core_trampoline.h"
47 
48 
49 /*------------------------------------------------------------*/
50 /*---                                                      ---*/
51 /*--- BEGIN platform-dependent unwinder worker functions   ---*/
52 /*---                                                      ---*/
53 /*------------------------------------------------------------*/
54 
55 /* Take a snapshot of the client's stack, putting up to 'max_n_ips'
56    IPs into 'ips'.  In order to be thread-safe, we pass in the
57    thread's IP SP, FP if that's meaningful, and LR if that's
58    meaningful.  Returns number of IPs put in 'ips'.
59 
60    If you know what the thread ID for this stack is, send that as the
61    first parameter, else send zero.  This helps generate better stack
62    traces on ppc64-linux and has no effect on other platforms.
63 */
64 
65 /* Do frame merging in the _i frames in _ips array of recursive cycles
66    of up to _nframes.  The merge is done during stack unwinding
67    (i.e. in platform specific unwinders) to collect as many
68    "interesting" stack traces as possible. */
69 #define RECURSIVE_MERGE(_nframes,_ips,_i) if (UNLIKELY(_nframes > 0)) \
70 do {                                                                  \
71    Int dist;                                                          \
72    for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) {       \
73       if (_ips[_i-1] == _ips[_i-1-dist]) {                            \
74          _i = _i - dist;                                              \
75          break;                                                       \
76       }                                                               \
77    }                                                                  \
78 } while (0)
79 
80 /* Note about calculation of fp_min : fp_min is the lowest address
81    which can be accessed during unwinding. This is SP - VG_STACK_REDZONE_SZB.
82    On most platforms, this will be equal to SP (as VG_STACK_REDZONE_SZB
83    is 0). However, on some platforms (e.g. amd64), there is an accessible
84    redzone below the SP. Some CFI unwind info are generated, taking this
85    into account. As an example, the following is a CFI unwind info on
86    amd64 found for a 'retq' instruction:
87 [0x400f7e .. 0x400f7e]: let cfa=oldSP+8 in RA=*(cfa+-8) SP=cfa+0 BP=*(cfa+-16)
88   0x400f7e: retq
89   As you can see, the previous BP is found 16 bytes below the cfa, which
90   is the oldSP+8. So, effectively, the BP is found 8 bytes below the SP.
91   The fp_min must take this into account, otherwise, VG_(use_CF_info) will
92   not unwind the BP. */
93 
94 /* ------------------------ x86 ------------------------- */
95 
96 #if defined(VGP_x86_linux) || defined(VGP_x86_darwin) \
97     || defined(VGP_x86_solaris)
98 
99 #define N_FP_CF_VERIF 1021
100 // prime number so that size of fp_CF_verif is just below 4K or 8K
101 // Note that this prime nr differs from the one chosen in
102 // m_debuginfo/debuginfo.c for the cfsi cache : in case we have
103 // a collision here between two IPs, we expect to not (often) have the
104 // same collision in the cfsi cache (and vice-versa).
105 
106 // unwinding with fp chain is ok:
107 #define FPUNWIND 0
108 // there is no CFI info for this IP:
109 #define NOINFO   1
110 // Unwind with FP is not ok, must use CF unwind:
111 #define CFUNWIND 2
112 
113 static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
114 
115 /* An unwind done by following the fp chain technique can be incorrect
116    as not all frames are respecting the standard bp/sp ABI.
117    The CF information is now generated by default by gcc
118    (as part of the dwarf info). However, unwinding using CF information
119    is significantly slower : a slowdown of 20% has been observed
120    on an helgrind test case.
121    So, by default, the unwinding will be done using the fp chain.
122    But before accepting to unwind an IP with fp_chain, the result
123    of the unwind will be checked with the CF information.
124    This check can give 3 results:
125      FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
126        => it is assumed that future unwind for this IP can be done
127           with the fast fp chain, without further CF checking
128      NOINFO   (1): there is no CF info (so, fp unwind is the only do-able thing)
129      CFUNWIND (2): there is CF info, but unwind result differs.
130        => it is assumed that future unwind for this IP must be done
131        with the CF info.
132    Of course, if each fp unwind implies a check done with a CF unwind,
133    it would just be slower => we cache the check result in an
134    array of checked Addr.
135    The check for an IP will be stored at
136     fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
137                      IP ^ FPUNWIND
138                      IP ^ NOINFO
139                      IP ^ CFUNWIND
140 
141    Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
142    to store the check result, as they are guaranteed to be non significant
143    in the comparison between 2 IPs stored in fp_CF_verif_cache).
144    In other words, if two IPs are only differing on the last 2 bits,
145    then they will not land in the same cache bucket.
146 */
147 
148 /* cached result of VG_(FPO_info_present)(). Refreshed each time
149    the fp_CF_verif_generation is different of the current debuginfo
150    generation. */
151 static Bool FPO_info_present = False;
152 
153 static UInt fp_CF_verif_generation = 0;
154 // Our cache has to be maintained in sync with the CFI cache.
155 // Each time the debuginfo is changed, its generation will be incremented.
156 // We will clear our cache when our saved generation differs from
157 // the debuginfo generation.
158 
VG_(get_StackTrace_wrk)159 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
160                                /*OUT*/Addr* ips, UInt max_n_ips,
161                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
162                                const UnwindStartRegs* startRegs,
163                                Addr fp_max_orig )
164 {
165    const Bool do_stats = False; // compute and output some stats regularly.
166    static struct {
167       UInt nr; // nr of stacktraces computed
168       UInt nf; // nr of frames computed
169       UInt Ca; // unwind for which cache indicates CFUnwind must be used.
170       UInt FF; // unwind for which cache indicates FPUnwind can be used.
171       UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
172       UInt Fw; // unwind at end of stack+store FPUNWIND
173       UInt FO; // unwind + store FPUNWIND
174       UInt CF; // unwind + store CFUNWIND. Details below.
175       UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
176       UInt Ck; // unwind fp invalid+store FPUNWIND
177       UInt MS; // microsoft unwind
178    } stats;
179 
180    const Bool   debug = False;
181    //                 = VG_(debugLog_getLevel) () > 3;
182    //                 = True;
183    //                 = stats.nr >= 123456;
184    const HChar* unwind_case; // used when debug is True.
185    // Debugging this function is not straightforward.
186    // Here is the easiest way I have found:
187    // 1. Change the above to True.
188    // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
189    // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
190    // 4. Continue till breakpoint is encountered
191    // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
192    //    You might have to do twice 'monitor v.info scheduler' to see
193    //    the effect of caching the results of the verification.
194    //    You can also modify the debug dynamically using by using
195    //    'monitor v.set debuglog 4.
196 
197    Int   i;
198    Addr  fp_max;
199    UInt  n_found = 0;
200    const Int cmrf = VG_(clo_merge_recursive_frames);
201 
202    vg_assert(sizeof(Addr) == sizeof(UWord));
203    vg_assert(sizeof(Addr) == sizeof(void*));
204 
205    D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
206    Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
207    // 0 assigned to silence false positive -Wuninitialized warning
208    // This is a false positive as xip_verified is assigned when
209    // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
210 
211    D3UnwindRegs uregs;
212    uregs.xip = (Addr)startRegs->r_pc;
213    uregs.xsp = (Addr)startRegs->r_sp;
214    uregs.xbp = startRegs->misc.X86.r_ebp;
215    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
216 
217    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
218       stopping when the trail goes cold, which we guess to be
219       when FP is not a reasonable stack location. */
220 
221    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
222    // current page, at least.  Dunno if it helps.
223    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
224    fp_max = VG_PGROUNDUP(fp_max_orig);
225    if (fp_max >= sizeof(Addr))
226       fp_max -= sizeof(Addr);
227 
228    if (debug)
229       VG_(printf)("max_n_ips=%u fp_min=0x%08lx fp_max_orig=0x08%lx, "
230                   "fp_max=0x%08lx ip=0x%08lx fp=0x%08lx\n",
231                   max_n_ips, fp_min, fp_max_orig, fp_max,
232                   uregs.xip, uregs.xbp);
233 
234    /* Assertion broken before main() is reached in pthreaded programs;  the
235     * offending stack traces only have one item.  --njn, 2002-aug-16 */
236    /* vg_assert(fp_min <= fp_max);*/
237    // On Darwin, this kicks in for pthread-related stack traces, so they're
238    // only 1 entry long which is wrong.
239 #  if defined(VGO_linux)
240    if (fp_min + 512 >= fp_max) {
241       /* If the stack limits look bogus, don't poke around ... but
242          don't bomb out either. */
243 #  elif defined(VGO_solaris)
244    if (fp_max == 0) {
245       /* VG_(get_StackTrace)() can be called by tools very early when
246          various tracing options are enabled. Don't proceed further
247          if the stack limits look bogus.
248        */
249 #  endif
250 #  if defined(VGO_linux) || defined(VGO_solaris)
251       if (sps) sps[0] = uregs.xsp;
252       if (fps) fps[0] = uregs.xbp;
253       ips[0] = uregs.xip;
254       return 1;
255    }
256 #  endif
257 
258    if (UNLIKELY (fp_CF_verif_generation != VG_(debuginfo_generation)())) {
259       fp_CF_verif_generation = VG_(debuginfo_generation)();
260       VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
261       FPO_info_present = VG_(FPO_info_present)();
262    }
263 
264 
265    /* Loop unwinding the stack. Note that the IP value we get on
266     * each pass (whether from CFI info or a stack frame) is a
267     * return address so is actually after the calling instruction
268     * in the calling function.
269     *
270     * Because of this we subtract one from the IP after each pass
271     * of the loop so that we find the right CFI block on the next
272     * pass - otherwise we can find the wrong CFI info if it happens
273     * to change after the calling instruction and that will mean
274     * that we will fail to unwind the next step.
275     *
276     * This most frequently happens at the end of a function when
277     * a tail call occurs and we wind up using the CFI info for the
278     * next function which is completely wrong.
279     */
280    if (sps) sps[0] = uregs.xsp;
281    if (fps) fps[0] = uregs.xbp;
282    ips[0] = uregs.xip;
283    i = 1;
284    if (do_stats) stats.nr++;
285 
286    while (True) {
287 
288       if (i >= max_n_ips)
289          break;
290 
291       UWord hash = uregs.xip % N_FP_CF_VERIF;
292       Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
293       if (debug)
294          VG_(printf)("     uregs.xip 0x%08lx xip_verif[0x%08lx]"
295                      " xbp 0x%08lx xsp 0x%08lx\n",
296                      uregs.xip, xip_verif,
297                      uregs.xbp, uregs.xsp);
298       // If xip is in cache, then xip_verif will be <= CFUNWIND.
299       // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
300 
301       /* Try to derive a new (ip,sp,fp) triple from the current set. */
302 
303       /* Do we have to do CFI unwinding ?
304          We do CFI unwinding if one of the following condition holds:
305          a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
306             be done (i.e. fp unwind check failed when we did the first
307             unwind for this IP).
308          b. fp_CF_verif_cache does not contain xip.
309             We will try CFI unwinding in fpverif_uregs and compare with
310             FP unwind result to insert xip in the cache with the correct
311             indicator. */
312       if (UNLIKELY(xip_verif >= CFUNWIND)) {
313          if (xip_verif == CFUNWIND) {
314             /* case a : do "real" cfi unwind */
315             if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
316                if (debug) unwind_case = "Ca";
317                if (do_stats) stats.Ca++;
318                goto unwind_done;
319             }
320             /* ??? cache indicates we have to do CFI unwind (so, we
321              previously found CFI info, and failed the fp unwind
322              check). Now, we just failed with CFI.  So, once we
323              succeed, once we fail.  No idea what is going on =>
324              cleanup the cache entry and fallover to fp unwind (this
325              time). */
326             fp_CF_verif_cache [hash] = 0;
327             if (debug) VG_(printf)("     cache reset as CFI ok then nok\n");
328             //??? stats
329             xip_verif = NOINFO;
330          } else {
331             /* case b : do "verif" cfi unwind in fpverif_uregs */
332             fpverif_uregs = uregs;
333             xip_verified = uregs.xip;
334             if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
335                fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
336                if (debug) VG_(printf)("     cache NOINFO fpverif_uregs\n");
337                xip_verif = NOINFO;
338             }
339          }
340       }
341 
342       /* On x86, try the old-fashioned method of following the
343          %ebp-chain.  This can be done if the fp_CF_verif_cache for xip
344          indicate fp unwind is ok. This must be done if the cache indicates
345          there is no info. This is also done to confirm what to put in the cache
346          if xip was not in the cache. */
347       /* This deals with frames resulting from functions which begin "pushl%
348          ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
349       if (fp_min <= uregs.xbp &&
350           uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/ &&
351           VG_IS_4_ALIGNED(uregs.xbp))
352       {
353          Addr old_xsp;
354 
355          /* fp looks sane, so use it. */
356          uregs.xip = (((UWord*)uregs.xbp)[1]);
357          // We stop if we hit a zero (the traditional end-of-stack
358          // marker) or a one -- these correspond to recorded IPs of 0 or -1.
359          // The latter because r8818 (in this file) changes the meaning of
360          // entries [1] and above in a stack trace, by subtracting 1 from
361          // them.  Hence stacks that used to end with a zero value now end in
362          // -1 and so we must detect that too.
363          if (0 == uregs.xip || 1 == uregs.xip) {
364             if (xip_verif > CFUNWIND) {
365                // Check if we obtain the same result with fp unwind.
366                // If same result, then mark xip as fp unwindable
367                if (uregs.xip == fpverif_uregs.xip) {
368                   fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
369                   if (debug) VG_(printf)("     cache FPUNWIND 0\n");
370                   unwind_case = "Fw";
371                   if (do_stats) stats.Fw++;
372                   break;
373                } else {
374                   fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
375                   uregs = fpverif_uregs;
376                   if (debug) VG_(printf)("     cache CFUNWIND 0\n");
377                   unwind_case = "Cf";
378                   if (do_stats) stats.Cf++;
379                   goto unwind_done;
380                }
381             } else {
382                // end of stack => out of the loop.
383                break;
384             }
385          }
386 
387          old_xsp = uregs.xsp;
388          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
389                                + sizeof(Addr) /*ra*/;
390          uregs.xbp = (((UWord*)uregs.xbp)[0]);
391          if (xip_verif > CFUNWIND) {
392             if (uregs.xip == fpverif_uregs.xip
393                 && uregs.xsp == fpverif_uregs.xsp
394                 && uregs.xbp == fpverif_uregs.xbp) {
395                fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
396                if (debug) VG_(printf)("     cache FPUNWIND >2\n");
397                if (debug) unwind_case = "FO";
398                if (do_stats) stats.FO++;
399                if (old_xsp >= uregs.xsp) {
400                   if (debug)
401                     VG_(printf) ("     FO end of stack old_xsp %p >= xsp %p\n",
402                                  (void*)old_xsp, (void*)uregs.xsp);
403                   break;
404                }
405             } else {
406                fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
407                if (debug) VG_(printf)("     cache CFUNWIND >2\n");
408                if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
409                if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
410                if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
411                uregs = fpverif_uregs;
412                if (debug) unwind_case = "CF";
413                if (do_stats) stats.CF++;
414             }
415          } else {
416             if (debug) unwind_case = "FF";
417             if (do_stats) stats.FF++;
418             if (old_xsp >= uregs.xsp) {
419                if (debug)
420                   VG_(printf) ("     FF end of stack old_xsp %p >= xsp %p\n",
421                                (void*)old_xsp, (void*)uregs.xsp);
422                break;
423             }
424          }
425          goto unwind_done;
426       } else {
427          // fp unwind has failed.
428          // If we were checking the validity of the cfi unwinding,
429          // we mark in the cache that the fp unwind cannot be done, and that
430          // cfi unwind is desired.
431          if (xip_verif > CFUNWIND) {
432             // We know that fpverif_uregs contains valid information,
433             // as a failed cf unwind would have put NOINFO in xip_verif.
434             fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
435             if (debug) VG_(printf)("     cache CFUNWIND as fp failed\n");
436             uregs = fpverif_uregs;
437             if (debug) unwind_case = "Ck";
438             if (do_stats) stats.Ck++;
439             goto unwind_done;
440          }
441          // xip_verif is FPUNWIND or NOINFO.
442          // We failed the cfi unwind and/or the fp unwind.
443          // => fallback to FPO info.
444       }
445 
446       /* And, similarly, try for MSVC FPO unwind info. */
447       if (FPO_info_present
448           && VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
449                                 fp_min, fp_max ) ) {
450          if (debug) unwind_case = "MS";
451          if (do_stats) stats.MS++;
452          goto unwind_done;
453       }
454 
455       /* No luck.  We have to give up. */
456       break;
457 
458    unwind_done:
459       /* Add a frame in ips/sps/fps */
460       /* fp is %ebp.  sp is %esp.  ip is %eip. */
461       if (0 == uregs.xip || 1 == uregs.xip) break;
462       if (sps) sps[i] = uregs.xsp;
463       if (fps) fps[i] = uregs.xbp;
464       ips[i++] = uregs.xip - 1;
465       /* -1: refer to calling insn, not the RA */
466       if (debug)
467          VG_(printf)("     ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
468       uregs.xip = uregs.xip - 1;
469       /* as per comment at the head of this loop */
470       RECURSIVE_MERGE(cmrf,ips,i);
471    }
472 
473    if (do_stats) stats.nf += i;
474    if (do_stats && stats.nr % 10000 == 0) {
475      VG_(printf)("nr %u nf %u "
476                  "Ca %u FF %u "
477                  "Cf %u "
478                  "Fw %u FO %u "
479                  "CF %u (xi %u xs %u xb %u) "
480                  "Ck %u MS %u\n",
481                  stats.nr, stats.nf,
482                  stats.Ca, stats.FF,
483                  stats.Cf,
484                  stats.Fw, stats.FO,
485                  stats.CF, stats.xi, stats.xs, stats.xb,
486                  stats.Ck, stats.MS);
487    }
488    n_found = i;
489    return n_found;
490 }
491 
492 #undef N_FP_CF_VERIF
493 #undef FPUNWIND
494 #undef NOINFO
495 #undef CFUNWIND
496 
497 #endif
498 
499 /* ----------------------- amd64 ------------------------ */
500 
501 #if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin) \
502     || defined(VGP_amd64_solaris)
503 
504 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
505                                /*OUT*/Addr* ips, UInt max_n_ips,
506                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
507                                const UnwindStartRegs* startRegs,
508                                Addr fp_max_orig )
509 {
510    const Bool  debug = False;
511    Int   i;
512    Addr  fp_max;
513    UInt  n_found = 0;
514    const Int cmrf = VG_(clo_merge_recursive_frames);
515 
516    vg_assert(sizeof(Addr) == sizeof(UWord));
517    vg_assert(sizeof(Addr) == sizeof(void*));
518 
519    D3UnwindRegs uregs;
520    uregs.xip = startRegs->r_pc;
521    uregs.xsp = startRegs->r_sp;
522    uregs.xbp = startRegs->misc.AMD64.r_rbp;
523    Addr fp_min = uregs.xsp - VG_STACK_REDZONE_SZB;
524 
525    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
526       stopping when the trail goes cold, which we guess to be
527       when FP is not a reasonable stack location. */
528 
529    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
530    // current page, at least.  Dunno if it helps.
531    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
532    fp_max = VG_PGROUNDUP(fp_max_orig);
533    if (fp_max >= sizeof(Addr))
534       fp_max -= sizeof(Addr);
535 
536    if (debug)
537       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
538                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
539                   max_n_ips, fp_min, fp_max_orig, fp_max,
540                   uregs.xip, uregs.xbp);
541 
542    /* Assertion broken before main() is reached in pthreaded programs;  the
543     * offending stack traces only have one item.  --njn, 2002-aug-16 */
544    /* vg_assert(fp_min <= fp_max);*/
545    // On Darwin, this kicks in for pthread-related stack traces, so they're
546    // only 1 entry long which is wrong.
547 #  if defined(VGO_linux)
548    if (fp_min + 256 >= fp_max) {
549       /* If the stack limits look bogus, don't poke around ... but
550          don't bomb out either. */
551 #  elif defined(VGO_solaris)
552    if (fp_max == 0) {
553       /* VG_(get_StackTrace)() can be called by tools very early when
554          various tracing options are enabled. Don't proceed further
555          if the stack limits look bogus.
556        */
557 #  endif
558 #  if defined(VGO_linux) || defined(VGO_solaris)
559 
560       if (sps) sps[0] = uregs.xsp;
561       if (fps) fps[0] = uregs.xbp;
562       ips[0] = uregs.xip;
563       return 1;
564    }
565 #  endif
566 
567    /* fp is %rbp.  sp is %rsp.  ip is %rip. */
568 
569    ips[0] = uregs.xip;
570    if (sps) sps[0] = uregs.xsp;
571    if (fps) fps[0] = uregs.xbp;
572    i = 1;
573    if (debug)
574       VG_(printf)("     ipsS[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
575                   i-1, ips[i-1], uregs.xbp, uregs.xsp);
576 
577 #  if defined(VGO_darwin)
578    if (VG_(is_valid_tid)(tid_if_known) &&
579       VG_(is_in_syscall)(tid_if_known) &&
580       i < max_n_ips) {
581       /* On Darwin, all the system call stubs have no function
582        * prolog.  So instead of top of the stack being a new
583        * frame comprising a saved BP and a return address, we
584        * just have the return address in the caller's frame.
585        * Adjust for this by recording the return address.
586        */
587       ips[i] = *(Addr *)uregs.xsp - 1;
588       if (sps) sps[i] = uregs.xsp;
589       if (fps) fps[i] = uregs.xbp;
590       i++;
591    }
592 #  endif
593 
594    /* Loop unwinding the stack. Note that the IP value we get on
595     * each pass (whether from CFI info or a stack frame) is a
596     * return address so is actually after the calling instruction
597     * in the calling function.
598     *
599     * Because of this we subtract one from the IP after each pass
600     * of the loop so that we find the right CFI block on the next
601     * pass - otherwise we can find the wrong CFI info if it happens
602     * to change after the calling instruction and that will mean
603     * that we will fail to unwind the next step.
604     *
605     * This most frequently happens at the end of a function when
606     * a tail call occurs and we wind up using the CFI info for the
607     * next function which is completely wrong.
608     */
609    while (True) {
610       Addr old_xsp;
611 
612       if (i >= max_n_ips)
613          break;
614 
615       old_xsp = uregs.xsp;
616 
617       /* Try to derive a new (ip,sp,fp) triple from the current set. */
618 
619       /* First off, see if there is any CFI info to hand which can
620          be used. */
621       if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
622          if (0 == uregs.xip || 1 == uregs.xip) break;
623          if (old_xsp >= uregs.xsp) {
624             if (debug)
625                VG_(printf) ("     CF end of stack old_xsp %p >= xsp %p\n",
626                             (void*)old_xsp, (void*)uregs.xsp);
627             break;
628          }
629          if (sps) sps[i] = uregs.xsp;
630          if (fps) fps[i] = uregs.xbp;
631          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
632          if (debug)
633             VG_(printf)("     ipsC[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
634                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
635          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
636          RECURSIVE_MERGE(cmrf,ips,i);
637          continue;
638       }
639 
640       /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
641          we can safely try the old-fashioned method. */
642       /* This bit is supposed to deal with frames resulting from
643          functions which begin "pushq %rbp ; movq %rsp, %rbp".
644          Unfortunately, since we can't (easily) look at the insns at
645          the start of the fn, like GDB does, there's no reliable way
646          to tell.  Hence the hack of first trying out CFI, and if that
647          fails, then use this as a fallback. */
648       /* Note: re "- 1 * sizeof(UWord)", need to take account of the
649          fact that we are prodding at & ((UWord*)fp)[1] and so need to
650          adjust the limit check accordingly.  Omitting this has been
651          observed to cause segfaults on rare occasions. */
652       if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
653          /* fp looks sane, so use it. */
654          uregs.xip = (((UWord*)uregs.xbp)[1]);
655          if (0 == uregs.xip || 1 == uregs.xip) break;
656          uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
657                                + sizeof(Addr) /*ra*/;
658          if (old_xsp >= uregs.xsp) {
659             if (debug)
660                VG_(printf) ("     FF end of stack old_xsp %p >= xsp %p\n",
661                             (void*)old_xsp, (void*)uregs.xsp);
662             break;
663          }
664          uregs.xbp = (((UWord*)uregs.xbp)[0]);
665          if (sps) sps[i] = uregs.xsp;
666          if (fps) fps[i] = uregs.xbp;
667          ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
668          if (debug)
669             VG_(printf)("     ipsF[%d]=%#08lx rbp %#08lx rsp %#08lx\n",
670                         i-1, ips[i-1], uregs.xbp, uregs.xsp);
671          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
672          RECURSIVE_MERGE(cmrf,ips,i);
673          continue;
674       }
675 
676       /* Last-ditch hack (evidently GDB does something similar).  We
677          are in the middle of nowhere and we have a nonsense value for
678          the frame pointer.  If the stack pointer is still valid,
679          assume that what it points at is a return address.  Yes,
680          desperate measures.  Could do better here:
681          - check that the supposed return address is in
682            an executable page
683          - check that the supposed return address is just after a call insn
684          - given those two checks, don't just consider *sp as the return
685            address; instead scan a likely section of stack (eg sp .. sp+256)
686            and use suitable values found there.
687       */
688       if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
689          uregs.xip = ((UWord*)uregs.xsp)[0];
690          if (0 == uregs.xip || 1 == uregs.xip) break;
691          if (sps) sps[i] = uregs.xsp;
692          if (fps) fps[i] = uregs.xbp;
693          ips[i++] = uregs.xip == 0
694                     ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
695                            thread stack */
696                     : uregs.xip - 1;
697                         /* -1: refer to calling insn, not the RA */
698          if (debug)
699             VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
700          uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
701          uregs.xsp += 8;
702          RECURSIVE_MERGE(cmrf,ips,i);
703          continue;
704       }
705 
706       /* No luck at all.  We have to give up. */
707       break;
708    }
709 
710    n_found = i;
711    return n_found;
712 }
713 
714 #endif
715 
716 /* -----------------------ppc32/64 ---------------------- */
717 
718 #if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
719     || defined(VGP_ppc64le_linux)
720 
721 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
722                                /*OUT*/Addr* ips, UInt max_n_ips,
723                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
724                                const UnwindStartRegs* startRegs,
725                                Addr fp_max_orig )
726 {
727    Bool  lr_is_first_RA = False;
728 #  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
729    Word redir_stack_size = 0;
730    Word redirs_used      = 0;
731 #  endif
732    const Int cmrf = VG_(clo_merge_recursive_frames);
733 
734    Bool  debug = False;
735    Int   i;
736    Addr  fp_max;
737    UInt  n_found = 0;
738 
739    vg_assert(sizeof(Addr) == sizeof(UWord));
740    vg_assert(sizeof(Addr) == sizeof(void*));
741 
742    Addr ip = (Addr)startRegs->r_pc;
743    Addr sp = (Addr)startRegs->r_sp;
744    Addr fp = sp;
745 #  if defined(VGP_ppc32_linux)
746    Addr lr = startRegs->misc.PPC32.r_lr;
747 #  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
748    Addr lr = startRegs->misc.PPC64.r_lr;
749 #  endif
750    Addr fp_min = sp - VG_STACK_REDZONE_SZB;
751 
752    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
753       stopping when the trail goes cold, which we guess to be
754       when FP is not a reasonable stack location. */
755 
756    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
757    // current page, at least.  Dunno if it helps.
758    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
759    fp_max = VG_PGROUNDUP(fp_max_orig);
760    if (fp_max >= sizeof(Addr))
761       fp_max -= sizeof(Addr);
762 
763    if (debug)
764       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
765                   "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
766 		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
767 
768    /* Assertion broken before main() is reached in pthreaded programs;  the
769     * offending stack traces only have one item.  --njn, 2002-aug-16 */
770    /* vg_assert(fp_min <= fp_max);*/
771    if (fp_min + 512 >= fp_max) {
772       /* If the stack limits look bogus, don't poke around ... but
773          don't bomb out either. */
774       if (sps) sps[0] = sp;
775       if (fps) fps[0] = fp;
776       ips[0] = ip;
777       return 1;
778    }
779 
780    /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
781       frame pointers. */
782 
783 #  if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
784    redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
785    redirs_used      = 0;
786 #  endif
787 
788 #  if defined(VG_PLAT_USES_PPCTOC) || defined (VGP_ppc64le_linux)
789    /* Deal with bogus LR values caused by function
790       interception/wrapping on ppc-TOC platforms; see comment on
791       similar code a few lines further down. */
792    if (lr == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
793        && VG_(is_valid_tid)(tid_if_known)) {
794       Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
795       redirs_used++;
796       if (hsp >= 1 && hsp < redir_stack_size)
797          lr = VG_(threads)[tid_if_known]
798                  .arch.vex.guest_REDIR_STACK[hsp-1];
799    }
800 #  endif
801 
802    /* We have to determine whether or not LR currently holds this fn
803       (call it F)'s return address.  It might not if F has previously
804       called some other function, hence overwriting LR with a pointer
805       to some part of F.  Hence if LR and IP point to the same
806       function then we conclude LR does not hold this function's
807       return address; instead the LR at entry must have been saved in
808       the stack by F's prologue and so we must get it from there
809       instead.  Note all this guff only applies to the innermost
810       frame. */
811    lr_is_first_RA = False;
812    {
813       const HChar *buf_lr, *buf_ip;
814       /* The following conditional looks grossly inefficient and
815          surely could be majorly improved, with not much effort. */
816       if (VG_(get_fnname_raw) (lr, &buf_lr)) {
817          HChar buf_lr_copy[VG_(strlen)(buf_lr) + 1];
818          VG_(strcpy)(buf_lr_copy, buf_lr);
819          if (VG_(get_fnname_raw) (ip, &buf_ip))
820             if (VG_(strcmp)(buf_lr_copy, buf_ip))
821                lr_is_first_RA = True;
822       }
823    }
824 
825    if (sps) sps[0] = fp; /* NB. not sp */
826    if (fps) fps[0] = fp;
827    ips[0] = ip;
828    i = 1;
829 
830    if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
831 
832       /* initial FP is sane; keep going */
833       fp = (((UWord*)fp)[0]);
834 
835       while (True) {
836 
837         /* On ppc64-linux (ppc64-elf, really), the lr save
838            slot is 2 words back from sp, whereas on ppc32-elf(?) it's
839            only one word back. */
840 #        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
841          const Int lr_offset = 2;
842 #        else
843          const Int lr_offset = 1;
844 #        endif
845 
846          if (i >= max_n_ips)
847             break;
848 
849          /* Try to derive a new (ip,fp) pair from the current set. */
850 
851          if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
852             /* fp looks sane, so use it. */
853 
854             if (i == 1 && lr_is_first_RA)
855                ip = lr;
856             else
857                ip = (((UWord*)fp)[lr_offset]);
858 
859 #           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
860             /* Nasty hack to do with function replacement/wrapping on
861                ppc64-linux.  If LR points to our magic return stub,
862                then we are in a wrapped or intercepted function, in
863                which LR has been messed with.  The original LR will
864                have been pushed onto the thread's hidden REDIR stack
865                one down from the top (top element is the saved R2) and
866                so we should restore the value from there instead.
867                Since nested redirections can and do happen, we keep
868                track of the number of nested LRs used by the unwinding
869                so far with 'redirs_used'. */
870             if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
871                 && VG_(is_valid_tid)(tid_if_known)) {
872                Word hsp = VG_(threads)[tid_if_known]
873                              .arch.vex.guest_REDIR_SP;
874                hsp -= 2 * redirs_used;
875                redirs_used ++;
876                if (hsp >= 1 && hsp < redir_stack_size)
877                   ip = VG_(threads)[tid_if_known]
878                           .arch.vex.guest_REDIR_STACK[hsp-1];
879             }
880 #           endif
881 
882             if (0 == ip || 1 == ip) break;
883             if (sps) sps[i] = fp; /* NB. not sp */
884             if (fps) fps[i] = fp;
885             fp = (((UWord*)fp)[0]);
886             ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
887             if (debug)
888                VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
889             ip = ip - 1; /* ip is probably dead at this point, but
890                             play safe, a la x86/amd64 above.  See
891                             extensive comments above. */
892             RECURSIVE_MERGE(cmrf,ips,i);
893             continue;
894          }
895 
896          /* No luck there.  We have to give up. */
897          break;
898       }
899    }
900 
901    n_found = i;
902    return n_found;
903 }
904 
905 #endif
906 
907 /* ------------------------ arm ------------------------- */
908 
909 #if defined(VGP_arm_linux)
910 
911 static Bool in_same_fn ( Addr a1, Addr a2 )
912 {
913    const HChar *buf_a1, *buf_a2;
914    /* The following conditional looks grossly inefficient and
915       surely could be majorly improved, with not much effort. */
916    if (VG_(get_fnname_raw) (a1, &buf_a1)) {
917       HChar buf_a1_copy[VG_(strlen)(buf_a1) + 1];
918       VG_(strcpy)(buf_a1_copy, buf_a1);
919       if (VG_(get_fnname_raw) (a2, &buf_a2))
920          if (VG_(strcmp)(buf_a1_copy, buf_a2))
921             return True;
922    }
923    return False;
924 }
925 
926 static Bool in_same_page ( Addr a1, Addr a2 ) {
927    return (a1 & ~0xFFF) == (a2 & ~0xFFF);
928 }
929 
930 static Addr abs_diff ( Addr a1, Addr a2 ) {
931    return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
932 }
933 
934 static Bool has_XT_perms ( Addr a )
935 {
936    NSegment const* seg = VG_(am_find_nsegment)(a);
937    return seg && seg->hasX && seg->hasT;
938 }
939 
940 static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
941 {
942    if (0)
943       VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
944    // BL  simm26
945    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
946    // BLX simm26
947    if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
948    return False;
949 }
950 
951 static Bool looks_like_Thumb_call16 ( UShort w0 )
952 {
953    return False;
954 }
955 
956 static Bool looks_like_ARM_call ( UInt a0 )
957 {
958    if (0)
959       VG_(printf)("isA32call %08x\n", a0);
960    // Leading E forces unconditional only -- fix
961    if ((a0 & 0xFF000000) == 0xEB000000) return True;
962    return False;
963 }
964 
965 static Bool looks_like_RA ( Addr ra )
966 {
967    /* 'ra' is a plausible return address if it points to
968        an instruction after a call insn. */
969    Bool isT = (ra & 1);
970    if (isT) {
971       // returning to Thumb code
972       ra &= ~1;
973       ra -= 4;
974       if (has_XT_perms(ra)) {
975          UShort w0 = *(UShort*)ra;
976          UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
977          if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
978             return True;
979       }
980    } else {
981       // ARM
982       ra &= ~3;
983       ra -= 4;
984       if (has_XT_perms(ra)) {
985          UInt a0 = *(UInt*)ra;
986          if (looks_like_ARM_call(a0))
987             return True;
988       }
989    }
990    return False;
991 }
992 
993 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
994                                /*OUT*/Addr* ips, UInt max_n_ips,
995                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
996                                const UnwindStartRegs* startRegs,
997                                Addr fp_max_orig )
998 {
999    Bool  debug = False;
1000    Int   i;
1001    Addr  fp_max;
1002    UInt  n_found = 0;
1003    const Int cmrf = VG_(clo_merge_recursive_frames);
1004 
1005    vg_assert(sizeof(Addr) == sizeof(UWord));
1006    vg_assert(sizeof(Addr) == sizeof(void*));
1007 
1008    D3UnwindRegs uregs;
1009    uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
1010    uregs.r14 = startRegs->misc.ARM.r14;
1011    uregs.r13 = startRegs->r_sp;
1012    uregs.r12 = startRegs->misc.ARM.r12;
1013    uregs.r11 = startRegs->misc.ARM.r11;
1014    uregs.r7  = startRegs->misc.ARM.r7;
1015    Addr fp_min = uregs.r13 - VG_STACK_REDZONE_SZB;
1016 
1017    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1018       stopping when the trail goes cold, which we guess to be
1019       when FP is not a reasonable stack location. */
1020 
1021    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1022    // current page, at least.  Dunno if it helps.
1023    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1024    fp_max = VG_PGROUNDUP(fp_max_orig);
1025    if (fp_max >= sizeof(Addr))
1026       fp_max -= sizeof(Addr);
1027 
1028    if (debug)
1029       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1030                   "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
1031                   max_n_ips, fp_min, fp_max_orig, fp_max,
1032                   uregs.r15, uregs.r13);
1033 
1034    /* Assertion broken before main() is reached in pthreaded programs;  the
1035     * offending stack traces only have one item.  --njn, 2002-aug-16 */
1036    /* vg_assert(fp_min <= fp_max);*/
1037    // On Darwin, this kicks in for pthread-related stack traces, so they're
1038    // only 1 entry long which is wrong.
1039    if (fp_min + 512 >= fp_max) {
1040       /* If the stack limits look bogus, don't poke around ... but
1041          don't bomb out either. */
1042       if (sps) sps[0] = uregs.r13;
1043       if (fps) fps[0] = 0;
1044       ips[0] = uregs.r15;
1045       return 1;
1046    }
1047 
1048    /* */
1049 
1050    if (sps) sps[0] = uregs.r13;
1051    if (fps) fps[0] = 0;
1052    ips[0] = uregs.r15;
1053    i = 1;
1054 
1055    /* Loop unwinding the stack. */
1056    Bool do_stack_scan = False;
1057 
1058    /* First try the Official Way, using Dwarf CFI. */
1059    while (True) {
1060       if (debug) {
1061          VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
1062                      i, uregs.r15, uregs.r13);
1063       }
1064 
1065       if (i >= max_n_ips)
1066          break;
1067 
1068       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1069          if (sps) sps[i] = uregs.r13;
1070          if (fps) fps[i] = 0;
1071          ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
1072          if (debug)
1073             VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
1074                         uregs.r15, uregs.r13);
1075          uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
1076          RECURSIVE_MERGE(cmrf,ips,i);
1077          continue;
1078       }
1079 
1080       /* No luck.  We have to give up. */
1081       do_stack_scan = True;
1082       break;
1083    }
1084 
1085    /* Now try Plan B (maybe) -- stack scanning.  This often gives
1086       pretty bad results, so this has to be enabled explicitly by the
1087       user. */
1088    if (do_stack_scan
1089        && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
1090       Int  nByStackScan = 0;
1091       Addr lr = uregs.r14;
1092       Addr sp = uregs.r13 & ~3;
1093       Addr pc = uregs.r15;
1094       // First see if LR contains
1095       // something that could be a valid return address.
1096       if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
1097          // take it only if 'cand' isn't obviously a duplicate
1098          // of the last found IP value
1099          Addr cand = (lr & 0xFFFFFFFE) - 1;
1100          if (abs_diff(cand, ips[i-1]) > 1) {
1101             if (sps) sps[i] = 0;
1102             if (fps) fps[i] = 0;
1103             ips[i++] = cand;
1104             RECURSIVE_MERGE(cmrf,ips,i);
1105             nByStackScan++;
1106          }
1107       }
1108       while (in_same_page(sp, uregs.r13)) {
1109          if (i >= max_n_ips)
1110             break;
1111          // we're in the same page; fairly safe to keep going
1112          UWord w = *(UWord*)(sp & ~0x3);
1113          if (looks_like_RA(w)) {
1114             Addr cand = (w & 0xFFFFFFFE) - 1;
1115             // take it only if 'cand' isn't obviously a duplicate
1116             // of the last found IP value
1117             if (abs_diff(cand, ips[i-1]) > 1) {
1118                if (sps) sps[i] = 0;
1119                if (fps) fps[i] = 0;
1120                ips[i++] = cand;
1121                RECURSIVE_MERGE(cmrf,ips,i);
1122                if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
1123             }
1124          }
1125          sp += 4;
1126       }
1127    }
1128 
1129    n_found = i;
1130    return n_found;
1131 }
1132 
1133 #endif
1134 
1135 /* ------------------------ arm64 ------------------------- */
1136 
1137 #if defined(VGP_arm64_linux)
1138 
1139 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1140                                /*OUT*/Addr* ips, UInt max_n_ips,
1141                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1142                                const UnwindStartRegs* startRegs,
1143                                Addr fp_max_orig )
1144 {
1145    Bool  debug = False;
1146    Int   i;
1147    Addr  fp_max;
1148    UInt  n_found = 0;
1149    const Int cmrf = VG_(clo_merge_recursive_frames);
1150 
1151    vg_assert(sizeof(Addr) == sizeof(UWord));
1152    vg_assert(sizeof(Addr) == sizeof(void*));
1153 
1154    D3UnwindRegs uregs;
1155    uregs.pc = startRegs->r_pc;
1156    uregs.sp = startRegs->r_sp;
1157    uregs.x30 = startRegs->misc.ARM64.x30;
1158    uregs.x29 = startRegs->misc.ARM64.x29;
1159    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1160 
1161    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1162       stopping when the trail goes cold, which we guess to be
1163       when FP is not a reasonable stack location. */
1164 
1165    // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1166    // current page, at least.  Dunno if it helps.
1167    // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1168    fp_max = VG_PGROUNDUP(fp_max_orig);
1169    if (fp_max >= sizeof(Addr))
1170       fp_max -= sizeof(Addr);
1171 
1172    if (debug)
1173       VG_(printf)("\nmax_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1174                   "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
1175                   max_n_ips, fp_min, fp_max_orig, fp_max,
1176                   uregs.pc, uregs.sp);
1177 
1178    /* Assertion broken before main() is reached in pthreaded programs;  the
1179     * offending stack traces only have one item.  --njn, 2002-aug-16 */
1180    /* vg_assert(fp_min <= fp_max);*/
1181    // On Darwin, this kicks in for pthread-related stack traces, so they're
1182    // only 1 entry long which is wrong.
1183    if (fp_min + 512 >= fp_max) {
1184       /* If the stack limits look bogus, don't poke around ... but
1185          don't bomb out either. */
1186       if (sps) sps[0] = uregs.sp;
1187       if (fps) fps[0] = uregs.x29;
1188       ips[0] = uregs.pc;
1189       return 1;
1190    }
1191 
1192    /* */
1193 
1194    if (sps) sps[0] = uregs.sp;
1195    if (fps) fps[0] = uregs.x29;
1196    ips[0] = uregs.pc;
1197    i = 1;
1198 
1199    /* Loop unwinding the stack, using CFI. */
1200    while (True) {
1201       if (debug) {
1202          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
1203                      i, uregs.pc, uregs.sp);
1204       }
1205 
1206       if (i >= max_n_ips)
1207          break;
1208 
1209       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1210          if (sps) sps[i] = uregs.sp;
1211          if (fps) fps[i] = uregs.x29;
1212          ips[i++] = uregs.pc - 1;
1213          if (debug)
1214             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
1215                         uregs.pc, uregs.sp);
1216          uregs.pc = uregs.pc - 1;
1217          RECURSIVE_MERGE(cmrf,ips,i);
1218          continue;
1219       }
1220 
1221       /* No luck.  We have to give up. */
1222       break;
1223    }
1224 
1225    n_found = i;
1226    return n_found;
1227 }
1228 
1229 #endif
1230 
1231 /* ------------------------ s390x ------------------------- */
1232 
1233 #if defined(VGP_s390x_linux)
1234 
1235 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1236                                /*OUT*/Addr* ips, UInt max_n_ips,
1237                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1238                                const UnwindStartRegs* startRegs,
1239                                Addr fp_max_orig )
1240 {
1241    Bool  debug = False;
1242    Int   i;
1243    Addr  fp_max;
1244    UInt  n_found = 0;
1245    const Int cmrf = VG_(clo_merge_recursive_frames);
1246 
1247    vg_assert(sizeof(Addr) == sizeof(UWord));
1248    vg_assert(sizeof(Addr) == sizeof(void*));
1249 
1250    D3UnwindRegs uregs;
1251    uregs.ia = startRegs->r_pc;
1252    uregs.sp = startRegs->r_sp;
1253    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1254    uregs.fp = startRegs->misc.S390X.r_fp;
1255    uregs.lr = startRegs->misc.S390X.r_lr;
1256 
1257    fp_max = VG_PGROUNDUP(fp_max_orig);
1258    if (fp_max >= sizeof(Addr))
1259       fp_max -= sizeof(Addr);
1260 
1261    if (debug)
1262       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1263                   "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
1264                   max_n_ips, fp_min, fp_max_orig, fp_max,
1265                   uregs.ia, uregs.sp,uregs.fp);
1266 
1267    /* The first frame is pretty obvious */
1268    ips[0] = uregs.ia;
1269    if (sps) sps[0] = uregs.sp;
1270    if (fps) fps[0] = uregs.fp;
1271    i = 1;
1272 
1273    /* for everything else we have to rely on the eh_frame. gcc defaults to
1274       not create a backchain and all the other  tools (like gdb) also have
1275       to use the CFI. */
1276    while (True) {
1277       if (i >= max_n_ips)
1278          break;
1279 
1280       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1281          if (sps) sps[i] = uregs.sp;
1282          if (fps) fps[i] = uregs.fp;
1283          ips[i++] = uregs.ia - 1;
1284          uregs.ia = uregs.ia - 1;
1285          RECURSIVE_MERGE(cmrf,ips,i);
1286          continue;
1287       }
1288       /* A problem on the first frame? Lets assume it was a bad jump.
1289          We will use the link register and the current stack and frame
1290          pointers and see if we can use the CFI in the next round. */
1291       if (i == 1) {
1292          if (sps) {
1293             sps[i] = sps[0];
1294             uregs.sp = sps[0];
1295          }
1296          if (fps) {
1297             fps[i] = fps[0];
1298             uregs.fp = fps[0];
1299          }
1300          uregs.ia = uregs.lr - 1;
1301          ips[i++] = uregs.lr - 1;
1302          RECURSIVE_MERGE(cmrf,ips,i);
1303          continue;
1304       }
1305 
1306       /* No luck.  We have to give up. */
1307       break;
1308    }
1309 
1310    n_found = i;
1311    return n_found;
1312 }
1313 
1314 #endif
1315 
1316 /* ------------------------ mips 32/64 ------------------------- */
1317 #if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
1318 UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1319                                /*OUT*/Addr* ips, UInt max_n_ips,
1320                                /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1321                                const UnwindStartRegs* startRegs,
1322                                Addr fp_max_orig )
1323 {
1324    Bool  debug = False;
1325    Int   i;
1326    Addr  fp_max;
1327    UInt  n_found = 0;
1328    const Int cmrf = VG_(clo_merge_recursive_frames);
1329 
1330    vg_assert(sizeof(Addr) == sizeof(UWord));
1331    vg_assert(sizeof(Addr) == sizeof(void*));
1332 
1333    D3UnwindRegs uregs;
1334    uregs.pc = startRegs->r_pc;
1335    uregs.sp = startRegs->r_sp;
1336    Addr fp_min = uregs.sp - VG_STACK_REDZONE_SZB;
1337 
1338 #if defined(VGP_mips32_linux)
1339    uregs.fp = startRegs->misc.MIPS32.r30;
1340    uregs.ra = startRegs->misc.MIPS32.r31;
1341 #elif defined(VGP_mips64_linux)
1342    uregs.fp = startRegs->misc.MIPS64.r30;
1343    uregs.ra = startRegs->misc.MIPS64.r31;
1344 #endif
1345 
1346    /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1347       stopping when the trail goes cold, which we guess to be
1348       when FP is not a reasonable stack location. */
1349 
1350    fp_max = VG_PGROUNDUP(fp_max_orig);
1351    if (fp_max >= sizeof(Addr))
1352       fp_max -= sizeof(Addr);
1353 
1354    if (debug)
1355       VG_(printf)("max_n_ips=%u fp_min=0x%lx fp_max_orig=0x%lx, "
1356                   "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
1357                   max_n_ips, fp_min, fp_max_orig, fp_max,
1358                   uregs.pc, uregs.sp, uregs.fp);
1359 
1360    if (sps) sps[0] = uregs.sp;
1361    if (fps) fps[0] = uregs.fp;
1362    ips[0] = uregs.pc;
1363    i = 1;
1364 
1365    /* Loop unwinding the stack. */
1366 
1367    while (True) {
1368       if (debug) {
1369          VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1370                      i, uregs.pc, uregs.sp, uregs.ra);
1371       }
1372       if (i >= max_n_ips)
1373          break;
1374 
1375       D3UnwindRegs uregs_copy = uregs;
1376       if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1377          if (debug)
1378             VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1379                         uregs.pc, uregs.sp, uregs.ra);
1380          if (0 != uregs.pc && 1 != uregs.pc) {
1381             if (sps) sps[i] = uregs.sp;
1382             if (fps) fps[i] = uregs.fp;
1383             ips[i++] = uregs.pc - 4;
1384             uregs.pc = uregs.pc - 4;
1385             RECURSIVE_MERGE(cmrf,ips,i);
1386             continue;
1387          } else
1388             uregs = uregs_copy;
1389       }
1390 
1391       int seen_sp_adjust = 0;
1392       long frame_offset = 0;
1393       PtrdiffT offset;
1394       if (VG_(get_inst_offset_in_function)(uregs.pc, &offset)) {
1395          Addr start_pc = uregs.pc - offset;
1396          Addr limit_pc = uregs.pc;
1397          Addr cur_pc;
1398          for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
1399             unsigned long inst, high_word, low_word;
1400             unsigned long * cur_inst;
1401             /* Fetch the instruction.   */
1402             cur_inst = (unsigned long *)cur_pc;
1403             inst = *((UInt *) cur_inst);
1404             if(debug)
1405                VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
1406 
1407             /* Save some code by pre-extracting some useful fields.  */
1408             high_word = (inst >> 16) & 0xffff;
1409             low_word = inst & 0xffff;
1410 
1411             if (high_word == 0x27bd        /* addiu $sp,$sp,-i */
1412                 || high_word == 0x23bd     /* addi $sp,$sp,-i */
1413                 || high_word == 0x67bd) {  /* daddiu $sp,$sp,-i */
1414                if (low_word & 0x8000)	/* negative stack adjustment? */
1415                   frame_offset += 0x10000 - low_word;
1416                else
1417                   /* Exit loop if a positive stack adjustment is found, which
1418                      usually means that the stack cleanup code in the function
1419                      epilogue is reached.  */
1420                break;
1421             seen_sp_adjust = 1;
1422             }
1423          }
1424          if(debug)
1425             VG_(printf)("offset: 0x%lx\n", frame_offset);
1426       }
1427       if (seen_sp_adjust) {
1428          if (0 == uregs.pc || 1 == uregs.pc) break;
1429          if (uregs.pc == uregs.ra - 8) break;
1430          if (sps) {
1431             sps[i] = uregs.sp + frame_offset;
1432          }
1433          uregs.sp = uregs.sp + frame_offset;
1434 
1435          if (fps) {
1436             fps[i] = fps[0];
1437             uregs.fp = fps[0];
1438          }
1439          if (0 == uregs.ra || 1 == uregs.ra) break;
1440          uregs.pc = uregs.ra - 8;
1441          ips[i++] = uregs.ra - 8;
1442          RECURSIVE_MERGE(cmrf,ips,i);
1443          continue;
1444       }
1445 
1446       if (i == 1) {
1447          if (sps) {
1448             sps[i] = sps[0];
1449             uregs.sp = sps[0];
1450          }
1451          if (fps) {
1452             fps[i] = fps[0];
1453             uregs.fp = fps[0];
1454          }
1455          if (0 == uregs.ra || 1 == uregs.ra) break;
1456          uregs.pc = uregs.ra - 8;
1457          ips[i++] = uregs.ra - 8;
1458          RECURSIVE_MERGE(cmrf,ips,i);
1459          continue;
1460       }
1461       /* No luck.  We have to give up. */
1462       break;
1463    }
1464 
1465    n_found = i;
1466    return n_found;
1467 }
1468 
1469 #endif
1470 
1471 /*------------------------------------------------------------*/
1472 /*---                                                      ---*/
1473 /*--- END platform-dependent unwinder worker functions     ---*/
1474 /*---                                                      ---*/
1475 /*------------------------------------------------------------*/
1476 
1477 /*------------------------------------------------------------*/
1478 /*--- Exported functions.                                  ---*/
1479 /*------------------------------------------------------------*/
1480 
1481 UInt VG_(get_StackTrace) ( ThreadId tid,
1482                            /*OUT*/StackTrace ips, UInt max_n_ips,
1483                            /*OUT*/StackTrace sps,
1484                            /*OUT*/StackTrace fps,
1485                            Word first_ip_delta )
1486 {
1487    /* Get the register values with which to start the unwind. */
1488    UnwindStartRegs startRegs;
1489    VG_(memset)( &startRegs, 0, sizeof(startRegs) );
1490    VG_(get_UnwindStartRegs)( &startRegs, tid );
1491 
1492    Addr stack_highest_byte = VG_(threads)[tid].client_stack_highest_byte;
1493    Addr stack_lowest_byte  = 0;
1494 
1495 #  if defined(VGP_x86_linux)
1496    /* Nasty little hack to deal with syscalls - if libc is using its
1497       _dl_sysinfo_int80 function for syscalls (the TLS version does),
1498       then ip will always appear to be in that function when doing a
1499       syscall, not the actual libc function doing the syscall.  This
1500       check sees if IP is within that function, and pops the return
1501       address off the stack so that ip is placed within the library
1502       function calling the syscall.  This makes stack backtraces much
1503       more useful.
1504 
1505       The function is assumed to look like this (from glibc-2.3.6 sources):
1506          _dl_sysinfo_int80:
1507             int $0x80
1508             ret
1509       That is 3 (2+1) bytes long.  We could be more thorough and check
1510       the 3 bytes of the function are as expected, but I can't be
1511       bothered.
1512    */
1513    if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
1514        && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
1515        && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
1516        && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
1517                                       VKI_PROT_READ)) {
1518       startRegs.r_pc  = (ULong) *(Addr*)(UWord)startRegs.r_sp;
1519       startRegs.r_sp += (ULong) sizeof(Addr);
1520    }
1521 #  endif
1522 
1523    /* See if we can get a better idea of the stack limits */
1524    VG_(stack_limits)( (Addr)startRegs.r_sp,
1525                       &stack_lowest_byte, &stack_highest_byte );
1526 
1527    /* Take into account the first_ip_delta. */
1528    startRegs.r_pc += (Long)(Word)first_ip_delta;
1529 
1530    if (0)
1531       VG_(printf)("tid %u: stack_highest=0x%08lx ip=0x%010llx "
1532                   "sp=0x%010llx\n",
1533                   tid, stack_highest_byte,
1534                   startRegs.r_pc, startRegs.r_sp);
1535 
1536    return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips,
1537                                        sps, fps,
1538                                        &startRegs,
1539                                        stack_highest_byte);
1540 }
1541 
1542 static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
1543 {
1544    InlIPCursor *iipc = VG_(new_IIPC)(ip);
1545 
1546    do {
1547       const HChar *buf = VG_(describe_IP)(ip, iipc);
1548       if (VG_(clo_xml)) {
1549          VG_(printf_xml)("    %s\n", buf);
1550       } else {
1551          VG_(message)(Vg_UserMsg, "   %s %s\n",
1552                       ( n == 0 ? "at" : "by" ), buf);
1553       }
1554       n++;
1555       // Increase n to show "at" for only one level.
1556    } while (VG_(next_IIPC)(iipc));
1557    VG_(delete_IIPC)(iipc);
1558 }
1559 
1560 /* Print a StackTrace. */
1561 void VG_(pp_StackTrace) ( StackTrace ips, UInt n_ips )
1562 {
1563    vg_assert( n_ips > 0 );
1564 
1565    if (VG_(clo_xml))
1566       VG_(printf_xml)("  <stack>\n");
1567 
1568    VG_(apply_StackTrace)( printIpDesc, NULL, ips, n_ips );
1569 
1570    if (VG_(clo_xml))
1571       VG_(printf_xml)("  </stack>\n");
1572 }
1573 
1574 /* Get and immediately print a StackTrace. */
1575 void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
1576 {
1577    Addr ips[max_n_ips];
1578    UInt n_ips
1579       = VG_(get_StackTrace)(tid, ips, max_n_ips,
1580                             NULL/*array to dump SP values in*/,
1581                             NULL/*array to dump FP values in*/,
1582                             0/*first_ip_delta*/);
1583    VG_(pp_StackTrace)(ips, n_ips);
1584 }
1585 
1586 void VG_(apply_StackTrace)(
1587         void(*action)(UInt n, Addr ip, void* opaque),
1588         void* opaque,
1589         StackTrace ips, UInt n_ips
1590      )
1591 {
1592    Int i;
1593 
1594    vg_assert(n_ips > 0);
1595    if ( ! VG_(clo_show_below_main) ) {
1596       // Search (from the outer frame onwards) the appearance of "main"
1597       // or the last appearance of a below main function.
1598       // Then decrease n_ips so as to not call action for the below main
1599       for (i = n_ips - 1; i >= 0; i--) {
1600          Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ips[i]);
1601          if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind)
1602             n_ips = i + 1;
1603          if (Vg_FnNameMain == kind)
1604             break;
1605       }
1606    }
1607 
1608    for (i = 0; i < n_ips; i++)
1609       // Act on the ip
1610       action(i, ips[i], opaque);
1611 }
1612 
1613 
1614 /*--------------------------------------------------------------------*/
1615 /*--- end                                                          ---*/
1616 /*--------------------------------------------------------------------*/
1617