• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* libunwind - a platform-independent unwind library
2    Copyright (C) 2010, 2011 by FERMI NATIONAL ACCELERATOR LABORATORY
3    Copyright (C) 2014 CERN and Aalto University
4         Contributed by Filip Nyback
5 
6 This file is part of libunwind.
7 
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15 
16 The above copyright notice and this permission notice shall be
17 included in all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
26 
27 #include "unwind_i.h"
28 #include "offsets.h"
29 #include <signal.h>
30 #include <limits.h>
31 
32 #pragma weak pthread_once
33 #pragma weak pthread_key_create
34 #pragma weak pthread_getspecific
35 #pragma weak pthread_setspecific
36 
37 /* Initial hash table size. Table expands by 2 bits (times four). */
38 #define HASH_MIN_BITS 14
39 
40 typedef struct
41 {
42   unw_tdep_frame_t *frames;
43   size_t log_size;
44   size_t used;
45   size_t dtor_count;  /* Counts how many times our destructor has already
46                          been called. */
47 } unw_trace_cache_t;
48 
49 static const unw_tdep_frame_t empty_frame = { 0, UNW_AARCH64_FRAME_OTHER, -1, -1, 0, -1, -1, -1 };
50 static define_lock (trace_init_lock);
51 static pthread_once_t trace_cache_once = PTHREAD_ONCE_INIT;
52 static sig_atomic_t trace_cache_once_happen;
53 static pthread_key_t trace_cache_key;
54 static struct mempool trace_cache_pool;
55 static _Thread_local  unw_trace_cache_t *tls_cache;
56 static _Thread_local  int tls_cache_destroyed;
57 
58 /* Free memory for a thread's trace cache. */
59 static void
trace_cache_free(void * arg)60 trace_cache_free (void *arg)
61 {
62   unw_trace_cache_t *cache = arg;
63   if (++cache->dtor_count < PTHREAD_DESTRUCTOR_ITERATIONS)
64   {
65     /* Not yet our turn to get destroyed. Re-install ourselves into the key. */
66     pthread_setspecific(trace_cache_key, cache);
67     Debug(5, "delayed freeing cache %p (%zx to go)\n", cache,
68           PTHREAD_DESTRUCTOR_ITERATIONS - cache->dtor_count);
69     return;
70   }
71   tls_cache_destroyed = 1;
72   tls_cache = NULL;
73   munmap (cache->frames, (1u << cache->log_size) * sizeof(unw_tdep_frame_t));
74   mempool_free (&trace_cache_pool, cache);
75   Debug(5, "freed cache %p\n", cache);
76 }
77 
78 /* Initialise frame tracing for threaded use. */
79 static void
trace_cache_init_once(void)80 trace_cache_init_once (void)
81 {
82   pthread_key_create (&trace_cache_key, &trace_cache_free);
83   mempool_init (&trace_cache_pool, sizeof (unw_trace_cache_t), 0);
84   trace_cache_once_happen = 1;
85 }
86 
87 static unw_tdep_frame_t *
trace_cache_buckets(size_t n)88 trace_cache_buckets (size_t n)
89 {
90   unw_tdep_frame_t *frames;
91   size_t i;
92 
93   GET_MEMORY(frames, n * sizeof (unw_tdep_frame_t));
94   if (likely(frames != NULL))
95     for (i = 0; i < n; ++i)
96       frames[i] = empty_frame;
97 
98   return frames;
99 }
100 
101 /* Allocate and initialise hash table for frame cache lookups.
102    Returns the cache initialised with (1u << HASH_LOW_BITS) hash
103    buckets, or NULL if there was a memory allocation problem. */
104 static unw_trace_cache_t *
trace_cache_create(void)105 trace_cache_create (void)
106 {
107   unw_trace_cache_t *cache;
108 
109   if (tls_cache_destroyed)
110   {
111     /* The current thread is in the process of exiting. Don't recreate
112        cache, as we wouldn't have another chance to free it. */
113     Debug(5, "refusing to reallocate cache: "
114              "thread-locals are being deallocated\n");
115     return NULL;
116   }
117 
118   if (! (cache = mempool_alloc(&trace_cache_pool)))
119   {
120     Debug(5, "failed to allocate cache\n");
121     return NULL;
122   }
123 
124   if (! (cache->frames = trace_cache_buckets(1u << HASH_MIN_BITS)))
125   {
126     Debug(5, "failed to allocate buckets\n");
127     mempool_free(&trace_cache_pool, cache);
128     return NULL;
129   }
130 
131   cache->log_size = HASH_MIN_BITS;
132   cache->used = 0;
133   cache->dtor_count = 0;
134   tls_cache_destroyed = 0;  /* Paranoia: should already be 0. */
135   Debug(5, "allocated cache %p\n", cache);
136   return cache;
137 }
138 
139 /* Expand the hash table in the frame cache if possible. This always
140    quadruples the hash size, and clears all previous frame entries. */
141 static int
trace_cache_expand(unw_trace_cache_t * cache)142 trace_cache_expand (unw_trace_cache_t *cache)
143 {
144   size_t old_size = (1u << cache->log_size);
145   size_t new_log_size = cache->log_size + 2;
146   unw_tdep_frame_t *new_frames = trace_cache_buckets (1u << new_log_size);
147 
148   if (unlikely(! new_frames))
149   {
150     Debug(5, "failed to expand cache to 2^%lu buckets\n", new_log_size);
151     return -UNW_ENOMEM;
152   }
153 
154   Debug(5, "expanded cache from 2^%lu to 2^%lu buckets\n", cache->log_size, new_log_size);
155   munmap(cache->frames, old_size * sizeof(unw_tdep_frame_t));
156   cache->frames = new_frames;
157   cache->log_size = new_log_size;
158   cache->used = 0;
159   return 0;
160 }
161 
162 static unw_trace_cache_t *
trace_cache_get_unthreaded(void)163 trace_cache_get_unthreaded (void)
164 {
165   unw_trace_cache_t *cache;
166   intrmask_t saved_mask;
167   static unw_trace_cache_t *global_cache = NULL;
168   lock_acquire (&trace_init_lock, saved_mask);
169   if (! global_cache)
170   {
171     mempool_init (&trace_cache_pool, sizeof (unw_trace_cache_t), 0);
172     global_cache = trace_cache_create ();
173   }
174   cache = global_cache;
175   lock_release (&trace_init_lock, saved_mask);
176   Debug(5, "using cache %p\n", cache);
177   return cache;
178 }
179 
180 /* Get the frame cache for the current thread. Create it if there is none. */
181 static unw_trace_cache_t *
trace_cache_get(void)182 trace_cache_get (void)
183 {
184   unw_trace_cache_t *cache;
185   if (likely (pthread_once != NULL))
186   {
187     pthread_once(&trace_cache_once, &trace_cache_init_once);
188     if (!trace_cache_once_happen)
189     {
190       return trace_cache_get_unthreaded();
191     }
192     if (! (cache = tls_cache))
193     {
194       cache = trace_cache_create();
195       pthread_setspecific(trace_cache_key, cache);
196       tls_cache = cache;
197     }
198     Debug(5, "using cache %p\n", cache);
199     return cache;
200   }
201   else
202   {
203     return trace_cache_get_unthreaded();
204   }
205 }
206 
207 /* Initialise frame properties for address cache slot F at address
208    PC using current CFA, FP and SP values.  Modifies CURSOR to
209    that location, performs one unw_step(), and fills F with what
210    was discovered about the location.  Returns F.
211 
212    FIXME: This probably should tell DWARF handling to never evaluate
213    or use registers other than FP, SP and PC in case there is
214    highly unusual unwind info which uses these creatively. */
215 static unw_tdep_frame_t *
trace_init_addr(unw_tdep_frame_t * f,unw_cursor_t * cursor,unw_word_t cfa,unw_word_t pc,unw_word_t fp,unw_word_t sp)216 trace_init_addr (unw_tdep_frame_t *f,
217                  unw_cursor_t *cursor,
218                  unw_word_t cfa,
219                  unw_word_t pc,
220                  unw_word_t fp,
221                  unw_word_t sp)
222 {
223   struct cursor *c = (struct cursor *) cursor;
224   struct dwarf_cursor *d = &c->dwarf;
225   int ret = -UNW_EINVAL;
226 
227   /* Initialise frame properties: unknown, not last. */
228   f->virtual_address = pc;
229   f->frame_type = UNW_AARCH64_FRAME_OTHER;
230   f->last_frame = 0;
231   f->cfa_reg_sp = -1;
232   f->cfa_reg_offset = 0;
233   f->fp_cfa_offset = -1;
234   f->lr_cfa_offset = -1;
235   f->sp_cfa_offset = -1;
236 
237   /* Reinitialise cursor to this instruction - but undo next/prev PC
238      adjustment because unw_step will redo it - and force PC, FP and
239      SP into register locations (=~ ucontext we keep), then set
240      their desired values. Then perform the step. */
241   d->ip = pc + d->use_prev_instr;
242   d->cfa = cfa;
243   d->loc[UNW_AARCH64_X29] = DWARF_REG_LOC (d, UNW_AARCH64_X29);
244   d->loc[UNW_AARCH64_SP] = DWARF_REG_LOC (d, UNW_AARCH64_SP);
245   d->loc[UNW_AARCH64_PC] = DWARF_REG_LOC (d, UNW_AARCH64_PC);
246   c->frame_info = *f;
247 
248   if (likely(dwarf_put (d, d->loc[UNW_AARCH64_X29], fp) >= 0)
249       && likely(dwarf_put (d, d->loc[UNW_AARCH64_SP], sp) >= 0)
250       && likely(dwarf_put (d, d->loc[UNW_AARCH64_PC], pc) >= 0)
251       && likely((ret = unw_step (cursor)) >= 0))
252     *f = c->frame_info;
253 
254   /* If unw_step() stopped voluntarily, remember that, even if it
255      otherwise could not determine anything useful.  This avoids
256      failing trace if we hit frames without unwind info, which is
257      common for the outermost frame (CRT stuff) on many systems.
258      This avoids failing trace in very common circumstances; failing
259      to unw_step() loop wouldn't produce any better result. */
260   if (ret == 0)
261     f->last_frame = -1;
262 
263   Debug (3, "frame va %lx type %d last %d cfa %s+%d fp @ cfa%+d lr @ cfa%+d sp @ cfa%+d\n",
264          f->virtual_address, f->frame_type, f->last_frame,
265          f->cfa_reg_sp ? "sp" : "fp", f->cfa_reg_offset,
266          f->fp_cfa_offset, f->lr_cfa_offset, f->sp_cfa_offset);
267 
268   return f;
269 }
270 
271 /* Look up and if necessary fill in frame attributes for address PC
272    in CACHE using current CFA, FP and SP values.  Uses CURSOR to
273    perform any unwind steps necessary to fill the cache.  Returns the
274    frame cache slot which describes RIP. */
275 static unw_tdep_frame_t *
trace_lookup(unw_cursor_t * cursor,unw_trace_cache_t * cache,unw_word_t cfa,unw_word_t pc,unw_word_t fp,unw_word_t sp)276 trace_lookup (unw_cursor_t *cursor,
277               unw_trace_cache_t *cache,
278               unw_word_t cfa,
279               unw_word_t pc,
280               unw_word_t fp,
281               unw_word_t sp)
282 {
283   /* First look up for previously cached information using cache as
284      linear probing hash table with probe step of 1.  Majority of
285      lookups should be completed within few steps, but it is very
286      important the hash table does not fill up, or performance falls
287      off the cliff. */
288   uint64_t i, addr;
289   uint64_t cache_size = 1u << cache->log_size;
290   uint64_t slot = ((pc * 0x9e3779b97f4a7c16) >> 43) & (cache_size-1);
291   unw_tdep_frame_t *frame;
292 
293   for (i = 0; i < 16; ++i)
294   {
295     frame = &cache->frames[slot];
296     addr = frame->virtual_address;
297 
298     /* Return if we found the address. */
299     if (likely(addr == pc))
300     {
301       Debug (4, "found address after %ld steps\n", i);
302       return frame;
303     }
304 
305     /* If slot is empty, reuse it. */
306     if (likely(! addr))
307       break;
308 
309     /* Linear probe to next slot candidate, step = 1. */
310     if (++slot >= cache_size)
311       slot -= cache_size;
312   }
313 
314   /* If we collided after 16 steps, or if the hash is more than half
315      full, force the hash to expand. Fill the selected slot, whether
316      it's free or collides. Note that hash expansion drops previous
317      contents; further lookups will refill the hash. */
318   Debug (4, "updating slot %lu after %ld steps, replacing 0x%lx\n", slot, i, addr);
319   if (unlikely(addr || cache->used >= cache_size / 2))
320   {
321     if (unlikely(trace_cache_expand (cache) < 0))
322       return NULL;
323 
324     cache_size = 1u << cache->log_size;
325     slot = ((pc * 0x9e3779b97f4a7c16) >> 43) & (cache_size-1);
326     frame = &cache->frames[slot];
327     addr = frame->virtual_address;
328   }
329 
330   if (! addr)
331     ++cache->used;
332 
333   return trace_init_addr (frame, cursor, cfa, pc, fp, sp);
334 }
335 
336 /* Fast stack backtrace for AArch64.
337 
338    This is used by backtrace() implementation to accelerate frequent
339    queries for current stack, without any desire to unwind. It fills
340    BUFFER with the call tree from CURSOR upwards for at most SIZE
341    stack levels. The first frame, backtrace itself, is omitted. When
342    called, SIZE should give the maximum number of entries that can be
343    stored into BUFFER. Uses an internal thread-specific cache to
344    accelerate queries.
345 
346    The caller should fall back to a unw_step() loop if this function
347    fails by returning -UNW_ESTOPUNWIND, meaning the routine hit a
348    stack frame that is too complex to be traced in the fast path.
349 
350    This function is tuned for clients which only need to walk the
351    stack to get the call tree as fast as possible but without any
352    other details, for example profilers sampling the stack thousands
353    to millions of times per second.  The routine handles the most
354    common AArch64 ABI stack layouts: CFA is FP or SP plus/minus
355    constant offset, return address is in LR, and FP, LR and SP are
356    either unchanged or saved on stack at constant offset from the CFA;
357    the signal return frame; and frames without unwind info provided
358    they are at the outermost (final) frame or can conservatively be
359    assumed to be frame-pointer based.
360 
361    Any other stack layout will cause the routine to give up. There
362    are only a handful of relatively rarely used functions which do
363    not have a stack in the standard form: vfork, longjmp, setcontext
364    and _dl_runtime_profile on common linux systems for example.
365 
366    On success BUFFER and *SIZE reflect the trace progress up to *SIZE
367    stack levels or the outermost frame, which ever is less.  It may
368    stop short of outermost frame if unw_step() loop would also do so,
369    e.g. if there is no more unwind information; this is not reported
370    as an error.
371 
372    The function returns a negative value for errors, -UNW_ESTOPUNWIND
373    if tracing stopped because of an unusual frame unwind info.  The
374    BUFFER and *SIZE reflect tracing progress up to the error frame.
375 
376    Callers of this function would normally look like this:
377 
378      unw_cursor_t     cur;
379      unw_context_t    ctx;
380      void             addrs[128];
381      int              depth = 128;
382      int              ret;
383 
384      unw_getcontext(&ctx);
385      unw_init_local(&cur, &ctx);
386      if ((ret = unw_tdep_trace(&cur, addrs, &depth)) < 0)
387      {
388        depth = 0;
389        unw_getcontext(&ctx);
390        unw_init_local(&cur, &ctx);
391        while ((ret = unw_step(&cur)) > 0 && depth < 128)
392        {
393          unw_word_t ip;
394          unw_get_reg(&cur, UNW_REG_IP, &ip);
395          addresses[depth++] = (void *) ip;
396        }
397      }
398 */
399 HIDDEN int
tdep_trace(unw_cursor_t * cursor,void ** buffer,int * size)400 tdep_trace (unw_cursor_t *cursor, void **buffer, int *size)
401 {
402   struct cursor *c = (struct cursor *) cursor;
403   struct dwarf_cursor *d = &c->dwarf;
404   unw_trace_cache_t *cache;
405   unw_word_t fp, sp, pc, cfa, lr;
406   int maxdepth = 0;
407   int depth = 0;
408   int ret;
409 
410   /* Check input parametres. */
411   if (unlikely(! cursor || ! buffer || ! size || (maxdepth = *size) <= 0))
412     return -UNW_EINVAL;
413 
414   Debug (1, "begin ip 0x%lx cfa 0x%lx\n", d->ip, d->cfa);
415 
416   /* Tell core dwarf routines to call back to us. */
417   d->stash_frames = 1;
418 
419   /* Determine initial register values. These are direct access safe
420      because we know they come from the initial machine context. */
421   pc = d->ip;
422   sp = cfa = d->cfa;
423   ACCESS_MEM_FAST(ret, 0, d, DWARF_GET_LOC(d->loc[UNW_AARCH64_X29]), fp);
424   assert(ret == 0);
425   lr = 0;
426 
427   /* Get frame cache. */
428   if (unlikely(! (cache = trace_cache_get())))
429   {
430     Debug (1, "returning %d, cannot get trace cache\n", -UNW_ENOMEM);
431     *size = 0;
432     d->stash_frames = 0;
433     return -UNW_ENOMEM;
434   }
435 
436   /* Trace the stack upwards, starting from current RIP.  Adjust
437      the RIP address for previous/next instruction as the main
438      unwinding logic would also do.  We undo this before calling
439      back into unw_step(). */
440   while (depth < maxdepth)
441   {
442     pc -= d->use_prev_instr;
443     Debug (2, "depth %d cfa 0x%lx pc 0x%lx sp 0x%lx fp 0x%lx\n",
444            depth, cfa, pc, sp, fp);
445 
446     /* See if we have this address cached.  If not, evaluate enough of
447        the dwarf unwind information to fill the cache line data, or to
448        decide this frame cannot be handled in fast trace mode.  We
449        cache negative results too to prevent unnecessary dwarf parsing
450        for common failures. */
451     unw_tdep_frame_t *f = trace_lookup (cursor, cache, cfa, pc, fp, sp);
452 
453     /* If we don't have information for this frame, give up. */
454     if (unlikely(! f))
455     {
456       ret = -UNW_ENOINFO;
457       break;
458     }
459 
460     Debug (3, "frame va %lx type %d last %d cfa %s+%d fp @ cfa%+d lr @ cfa%+d sp @ cfa%+d\n",
461            f->virtual_address, f->frame_type, f->last_frame,
462            f->cfa_reg_sp ? "sp" : "fp", f->cfa_reg_offset,
463            f->fp_cfa_offset, f->lr_cfa_offset, f->sp_cfa_offset);
464 
465     assert (f->virtual_address == pc);
466 
467     /* Stop if this was the last frame.  In particular don't evaluate
468        new register values as it may not be safe - we don't normally
469        run with full validation on, and do not want to - and there's
470        enough bad unwind info floating around that we need to trust
471        what unw_step() previously said, in potentially bogus frames. */
472     if (f->last_frame)
473       break;
474 
475     /* Evaluate CFA and registers for the next frame. */
476     switch (f->frame_type)
477     {
478     case UNW_AARCH64_FRAME_GUESSED:
479       /* Fall thru to standard processing after forcing validation. */
480       c->validate = 1;
481 
482     case UNW_AARCH64_FRAME_STANDARD:
483       /* Advance standard traceable frame. */
484       cfa = (f->cfa_reg_sp ? sp : fp) + f->cfa_reg_offset;
485       if (likely(f->lr_cfa_offset != -1))
486         ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->lr_cfa_offset, pc);
487       else if (lr != 0)
488       {
489         /* Use the saved link register as the new pc. */
490         pc = lr;
491         lr = 0;
492       }
493       if (likely(ret >= 0) && likely(f->fp_cfa_offset != -1))
494         ACCESS_MEM_FAST(ret, c->validate, d, cfa + f->fp_cfa_offset, fp);
495 
496       /* Don't bother reading SP from DWARF, CFA becomes new SP. */
497       sp = cfa;
498 
499       /* Next frame needs to back up for unwind info lookup. */
500       d->use_prev_instr = 1;
501       break;
502 
503     case UNW_AARCH64_FRAME_SIGRETURN:
504       cfa = cfa + f->cfa_reg_offset; /* cfa now points to ucontext_t.  */
505 
506       ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_PC_OFF, pc);
507       if (likely(ret >= 0))
508         ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_X29_OFF, fp);
509       if (likely(ret >= 0))
510         ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_SP_OFF, sp);
511       /* Save the link register here in case we end up in a function that
512          doesn't save the link register in the prologue, e.g. kill. */
513       if (likely(ret >= 0))
514         ACCESS_MEM_FAST(ret, c->validate, d, cfa + LINUX_SC_X30_OFF, lr);
515 
516       /* Resume stack at signal restoration point. The stack is not
517          necessarily continuous here, especially with sigaltstack(). */
518       cfa = sp;
519 
520       /* Next frame should not back up. */
521       d->use_prev_instr = 0;
522       break;
523 
524     default:
525       /* We cannot trace through this frame, give up and tell the
526           caller we had to stop.  Data collected so far may still be
527           useful to the caller, so let it know how far we got.  */
528       ret = -UNW_ESTOPUNWIND;
529       break;
530     }
531 
532     Debug (4, "new cfa 0x%lx pc 0x%lx sp 0x%lx fp 0x%lx\n",
533            cfa, pc, sp, fp);
534 
535     /* If we failed or ended up somewhere bogus, stop. */
536     if (unlikely(ret < 0 || pc < 0x4000))
537       break;
538 
539     /* Record this address in stack trace. We skipped the first address. */
540     buffer[depth++] = (void *) (pc - d->use_prev_instr);
541   }
542 
543 #if UNW_DEBUG
544   Debug (1, "returning %d, depth %d\n", ret, depth);
545 #endif
546   *size = depth;
547   return ret;
548 }
549