• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Top level management of symbols and debugging information.   ---*/
5 /*---                                                  debuginfo.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2017 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #include "pub_core_basics.h"
34 #include "pub_core_vki.h"
35 #include "pub_core_threadstate.h"
36 #include "pub_core_debuginfo.h"  /* self */
37 #include "pub_core_demangle.h"
38 #include "pub_core_libcbase.h"
39 #include "pub_core_libcassert.h"
40 #include "pub_core_libcprint.h"
41 #include "pub_core_libcfile.h"
42 #include "pub_core_libcproc.h"   // VG_(getenv)
43 #include "pub_core_seqmatch.h"
44 #include "pub_core_options.h"
45 #include "pub_core_redir.h"      // VG_(redir_notify_{new,delete}_SegInfo)
46 #include "pub_core_aspacemgr.h"
47 #include "pub_core_machine.h"    // VG_PLAT_USES_PPCTOC
48 #include "pub_core_xarray.h"
49 #include "pub_core_oset.h"
50 #include "pub_core_stacktrace.h" // VG_(get_StackTrace) XXX: circular dependency
51 #include "pub_core_ume.h"
52 
53 #include "priv_misc.h"           /* dinfo_zalloc/free */
54 #include "priv_image.h"
55 #include "priv_d3basics.h"       /* ML_(pp_GX) */
56 #include "priv_tytypes.h"
57 #include "priv_storage.h"
58 #include "priv_readdwarf.h"
59 #if defined(VGO_linux) || defined(VGO_solaris)
60 # include "priv_readelf.h"
61 # include "priv_readdwarf3.h"
62 # include "priv_readpdb.h"
63 #elif defined(VGO_darwin)
64 # include "priv_readmacho.h"
65 # include "priv_readpdb.h"
66 #endif
67 
68 
69 /* Set this to 1 to enable debug printing for the
70    should-we-load-debuginfo-now? finite state machine. */
71 #define DEBUG_FSM 0
72 
73 
74 /*------------------------------------------------------------*/
75 /*--- The _svma / _avma / _image / _bias naming scheme     ---*/
76 /*------------------------------------------------------------*/
77 
78 /* JRS 11 Jan 07: I find the different kinds of addresses involved in
79    debuginfo reading confusing.  Recently I arrived at some
80    terminology which makes it clearer (to me, at least).  There are 3
81    kinds of address used in the debuginfo reading process:
82 
83    stated VMAs - the address where (eg) a .so says a symbol is, that
84                  is, what it tells you if you consider the .so in
85                  isolation
86 
87    actual VMAs - the address where (eg) said symbol really wound up
88                  after the .so was mapped into memory
89 
90    image addresses - pointers into the copy of the .so (etc)
91                      transiently mmaped aboard whilst we read its info
92 
93    Additionally I use the term 'bias' to denote the difference
94    between stated and actual VMAs for a given entity.
95 
96    This terminology is not used consistently, but a start has been
97    made.  readelf.c and the call-frame info reader in readdwarf.c now
98    use it.  Specifically, various variables and structure fields have
99    been annotated with _avma / _svma / _image / _bias.  In places _img
100    is used instead of _image for the sake of brevity.
101 */
102 
103 
104 /*------------------------------------------------------------*/
105 /*--- fwdses                                               ---*/
106 /*------------------------------------------------------------*/
107 
108 static void caches__invalidate (void);
109 
110 
111 /*------------------------------------------------------------*/
112 /*--- Root structure                                       ---*/
113 /*------------------------------------------------------------*/
114 
115 /* The root structure for the entire debug info system.  It is a
116    linked list of DebugInfos. */
117 static DebugInfo* debugInfo_list = NULL;
118 
119 
120 /* Find 'di' in the debugInfo_list and move it one step closer to the
121    front of the list, so as to make subsequent searches for it
122    cheaper.  When used in a controlled way, makes a major improvement
123    in some DebugInfo-search-intensive situations, most notably stack
124    unwinding on amd64-linux. */
move_DebugInfo_one_step_forward(DebugInfo * di)125 static void move_DebugInfo_one_step_forward ( DebugInfo* di )
126 {
127    DebugInfo *di0, *di1, *di2;
128    if (di == debugInfo_list)
129       return; /* already at head of list */
130    vg_assert(di != NULL);
131    di0 = debugInfo_list;
132    di1 = NULL;
133    di2 = NULL;
134    while (True) {
135       if (di0 == NULL || di0 == di) break;
136       di2 = di1;
137       di1 = di0;
138       di0 = di0->next;
139    }
140    vg_assert(di0 == di);
141    if (di0 != NULL && di1 != NULL && di2 != NULL) {
142       DebugInfo* tmp;
143       /* di0 points to di, di1 to its predecessor, and di2 to di1's
144          predecessor.  Swap di0 and di1, that is, move di0 one step
145          closer to the start of the list. */
146       vg_assert(di2->next == di1);
147       vg_assert(di1->next == di0);
148       tmp = di0->next;
149       di2->next = di0;
150       di0->next = di1;
151       di1->next = tmp;
152    }
153    else
154    if (di0 != NULL && di1 != NULL && di2 == NULL) {
155       /* it's second in the list. */
156       vg_assert(debugInfo_list == di1);
157       vg_assert(di1->next == di0);
158       di1->next = di0->next;
159       di0->next = di1;
160       debugInfo_list = di0;
161    }
162 }
163 
164 
165 /*------------------------------------------------------------*/
166 /*--- Notification (acquire/discard) helpers               ---*/
167 /*------------------------------------------------------------*/
168 
169 /* Gives out unique abstract handles for allocated DebugInfos.  See
170    comment in priv_storage.h, declaration of struct _DebugInfo, for
171    details. */
172 static ULong handle_counter = 1;
173 
174 /* Allocate and zero out a new DebugInfo record. */
175 static
alloc_DebugInfo(const HChar * filename)176 DebugInfo* alloc_DebugInfo( const HChar* filename )
177 {
178    Bool       traceme;
179    DebugInfo* di;
180 
181    vg_assert(filename);
182 
183    di = ML_(dinfo_zalloc)("di.debuginfo.aDI.1", sizeof(DebugInfo));
184    di->handle       = handle_counter++;
185    di->fsm.filename = ML_(dinfo_strdup)("di.debuginfo.aDI.2", filename);
186    di->fsm.maps     = VG_(newXA)(
187                          ML_(dinfo_zalloc), "di.debuginfo.aDI.3",
188                          ML_(dinfo_free), sizeof(DebugInfoMapping));
189 
190    /* Everything else -- pointers, sizes, arrays -- is zeroed by
191       ML_(dinfo_zalloc).  Now set up the debugging-output flags. */
192    traceme
193       = VG_(string_match)( VG_(clo_trace_symtab_patt), filename );
194    if (traceme) {
195       di->trace_symtab = VG_(clo_trace_symtab);
196       di->trace_cfi    = VG_(clo_trace_cfi);
197       di->ddump_syms   = VG_(clo_debug_dump_syms);
198       di->ddump_line   = VG_(clo_debug_dump_line);
199       di->ddump_frames = VG_(clo_debug_dump_frames);
200    }
201 
202    return di;
203 }
204 
205 
206 /* Free a DebugInfo, and also all the stuff hanging off it. */
free_DebugInfo(DebugInfo * di)207 static void free_DebugInfo ( DebugInfo* di )
208 {
209    Word i, j, n;
210    TyEnt* ent;
211    GExpr* gexpr;
212 
213    vg_assert(di != NULL);
214    if (di->fsm.maps)     VG_(deleteXA)(di->fsm.maps);
215    if (di->fsm.filename) ML_(dinfo_free)(di->fsm.filename);
216    if (di->fsm.dbgname)  ML_(dinfo_free)(di->fsm.dbgname);
217    if (di->soname)       ML_(dinfo_free)(di->soname);
218    if (di->loctab)       ML_(dinfo_free)(di->loctab);
219    if (di->loctab_fndn_ix) ML_(dinfo_free)(di->loctab_fndn_ix);
220    if (di->inltab)       ML_(dinfo_free)(di->inltab);
221    if (di->cfsi_base)    ML_(dinfo_free)(di->cfsi_base);
222    if (di->cfsi_m_ix)    ML_(dinfo_free)(di->cfsi_m_ix);
223    if (di->cfsi_rd)      ML_(dinfo_free)(di->cfsi_rd);
224    if (di->cfsi_m_pool)  VG_(deleteDedupPA)(di->cfsi_m_pool);
225    if (di->cfsi_exprs)   VG_(deleteXA)(di->cfsi_exprs);
226    if (di->fpo)          ML_(dinfo_free)(di->fpo);
227 
228    if (di->symtab) {
229       /* We have to visit all the entries so as to free up any
230          sec_names arrays that might exist. */
231       n = di->symtab_used;
232       for (i = 0; i < n; i++) {
233          DiSym* sym = &di->symtab[i];
234          if (sym->sec_names)
235             ML_(dinfo_free)(sym->sec_names);
236       }
237       /* and finally .. */
238       ML_(dinfo_free)(di->symtab);
239    }
240 
241    if (di->strpool)
242       VG_(deleteDedupPA) (di->strpool);
243    if (di->fndnpool)
244       VG_(deleteDedupPA) (di->fndnpool);
245 
246    /* Delete the two admin arrays.  These lists exist primarily so
247       that we can visit each object exactly once when we need to
248       delete them. */
249    if (di->admin_tyents) {
250       n = VG_(sizeXA)(di->admin_tyents);
251       for (i = 0; i < n; i++) {
252          ent = (TyEnt*)VG_(indexXA)(di->admin_tyents, i);
253          /* Dump anything hanging off this ent */
254          ML_(TyEnt__make_EMPTY)(ent);
255       }
256       VG_(deleteXA)(di->admin_tyents);
257       di->admin_tyents = NULL;
258    }
259 
260    if (di->admin_gexprs) {
261       n = VG_(sizeXA)(di->admin_gexprs);
262       for (i = 0; i < n; i++) {
263          gexpr = *(GExpr**)VG_(indexXA)(di->admin_gexprs, i);
264          ML_(dinfo_free)(gexpr);
265       }
266       VG_(deleteXA)(di->admin_gexprs);
267       di->admin_gexprs = NULL;
268    }
269 
270    /* Dump the variable info.  This is kinda complex: we must take
271       care not to free items which reside in either the admin lists
272       (as we have just freed them) or which reside in the DebugInfo's
273       string table. */
274    if (di->varinfo) {
275       for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
276          OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
277          if (!scope) continue;
278          /* iterate over all entries in 'scope' */
279          VG_(OSetGen_ResetIter)(scope);
280          while (True) {
281             DiAddrRange* arange = VG_(OSetGen_Next)(scope);
282             if (!arange) break;
283             /* for each var in 'arange' */
284             vg_assert(arange->vars);
285             for (j = 0; j < VG_(sizeXA)( arange->vars ); j++) {
286                DiVariable* var = (DiVariable*)VG_(indexXA)(arange->vars,j);
287                vg_assert(var);
288                /* Nothing to free in var: all the pointer fields refer
289                   to stuff either on an admin list, or in
290                   .strpool */
291             }
292             VG_(deleteXA)(arange->vars);
293             /* Don't free arange itself, as OSetGen_Destroy does
294                that */
295          }
296          VG_(OSetGen_Destroy)(scope);
297       }
298       VG_(deleteXA)(di->varinfo);
299    }
300 
301    ML_(dinfo_free)(di);
302 }
303 
304 
305 /* 'si' is a member of debugInfo_list.  Find it, remove it from the
306    list, notify m_redir that this has happened, and free all storage
307    reachable from it.
308 */
discard_DebugInfo(DebugInfo * di)309 static void discard_DebugInfo ( DebugInfo* di )
310 {
311    const HChar* reason = "munmap";
312 
313    DebugInfo** prev_next_ptr = &debugInfo_list;
314    DebugInfo*  curr          =  debugInfo_list;
315 
316    while (curr) {
317       if (curr == di) {
318          /* Found it;  remove from list and free it. */
319          if (curr->have_dinfo
320              && (VG_(clo_verbosity) > 1 || VG_(clo_trace_redir)))
321             VG_(message)(Vg_DebugMsg,
322                          "Discarding syms at %#lx-%#lx in %s due to %s()\n",
323                          di->text_avma,
324                          di->text_avma + di->text_size,
325                          curr->fsm.filename ? curr->fsm.filename
326                                             : "???",
327                          reason);
328          vg_assert(*prev_next_ptr == curr);
329          *prev_next_ptr = curr->next;
330          if (curr->have_dinfo)
331             VG_(redir_notify_delete_DebugInfo)( curr );
332          free_DebugInfo(curr);
333          return;
334       }
335       prev_next_ptr = &curr->next;
336       curr          =  curr->next;
337    }
338 
339    /* Not found. */
340 }
341 
342 
343 /* Repeatedly scan debugInfo_list, looking for DebugInfos with text
344    AVMAs intersecting [start,start+length), and call discard_DebugInfo
345    to get rid of them.  This modifies the list, hence the multiple
346    iterations.  Returns True iff any such DebugInfos were found.
347 */
discard_syms_in_range(Addr start,SizeT length)348 static Bool discard_syms_in_range ( Addr start, SizeT length )
349 {
350    Bool       anyFound = False;
351    Bool       found;
352    DebugInfo* curr;
353 
354    while (True) {
355       found = False;
356 
357       curr = debugInfo_list;
358       while (True) {
359          if (curr == NULL)
360             break;
361          if (curr->text_present
362              && curr->text_size > 0
363              && (start+length - 1 < curr->text_avma
364                  || curr->text_avma + curr->text_size - 1 < start)) {
365             /* no overlap */
366 	 } else {
367 	    found = True;
368 	    break;
369 	 }
370 	 curr = curr->next;
371       }
372 
373       if (!found) break;
374       anyFound = True;
375       discard_DebugInfo( curr );
376    }
377 
378    return anyFound;
379 }
380 
381 
382 /* Does [s1,+len1) overlap [s2,+len2) ?  Note: does not handle
383    wraparound at the end of the address space -- just asserts in that
384    case. */
ranges_overlap(Addr s1,SizeT len1,Addr s2,SizeT len2)385 static Bool ranges_overlap (Addr s1, SizeT len1, Addr s2, SizeT len2 )
386 {
387    Addr e1, e2;
388    if (len1 == 0 || len2 == 0)
389       return False;
390    e1 = s1 + len1 - 1;
391    e2 = s2 + len2 - 1;
392    /* Assert that we don't have wraparound.  If we do it would imply
393       that file sections are getting mapped around the end of the
394       address space, which sounds unlikely. */
395    vg_assert(s1 <= e1);
396    vg_assert(s2 <= e2);
397    if (e1 < s2 || e2 < s1) return False;
398    return True;
399 }
400 
401 
402 /* Do the basic mappings of the two DebugInfos overlap in any way? */
do_DebugInfos_overlap(const DebugInfo * di1,const DebugInfo * di2)403 static Bool do_DebugInfos_overlap ( const DebugInfo* di1, const DebugInfo* di2 )
404 {
405    Word i, j;
406    vg_assert(di1);
407    vg_assert(di2);
408    for (i = 0; i < VG_(sizeXA)(di1->fsm.maps); i++) {
409       const DebugInfoMapping* map1 = VG_(indexXA)(di1->fsm.maps, i);
410       for (j = 0; j < VG_(sizeXA)(di2->fsm.maps); j++) {
411          const DebugInfoMapping* map2 = VG_(indexXA)(di2->fsm.maps, j);
412          if (ranges_overlap(map1->avma, map1->size, map2->avma, map2->size))
413             return True;
414       }
415    }
416 
417    return False;
418 }
419 
420 
421 /* Discard all elements of debugInfo_list whose .mark bit is set.
422 */
discard_marked_DebugInfos(void)423 static void discard_marked_DebugInfos ( void )
424 {
425    DebugInfo* curr;
426 
427    while (True) {
428 
429       curr = debugInfo_list;
430       while (True) {
431          if (!curr)
432             break;
433          if (curr->mark)
434             break;
435 	 curr = curr->next;
436       }
437 
438       if (!curr) break;
439       discard_DebugInfo( curr );
440 
441    }
442 }
443 
444 
445 /* Discard any elements of debugInfo_list which overlap with diRef.
446    Clearly diRef must have its mapping information set to something sane. */
discard_DebugInfos_which_overlap_with(DebugInfo * diRef)447 static void discard_DebugInfos_which_overlap_with ( DebugInfo* diRef )
448 {
449    DebugInfo* di;
450    /* Mark all the DebugInfos in debugInfo_list that need to be
451       deleted.  First, clear all the mark bits; then set them if they
452       overlap with siRef.  Since siRef itself is in this list we at
453       least expect its own mark bit to be set. */
454    for (di = debugInfo_list; di; di = di->next) {
455       di->mark = do_DebugInfos_overlap( di, diRef );
456       if (di == diRef) {
457          vg_assert(di->mark);
458          di->mark = False;
459       }
460    }
461    discard_marked_DebugInfos();
462 }
463 
464 
465 /* Find the existing DebugInfo for |filename| or if not found, create
466    one.  In the latter case |filename| is strdup'd into VG_AR_DINFO,
467    and the new DebugInfo is added to debugInfo_list. */
find_or_create_DebugInfo_for(const HChar * filename)468 static DebugInfo* find_or_create_DebugInfo_for ( const HChar* filename )
469 {
470    DebugInfo* di;
471    vg_assert(filename);
472    for (di = debugInfo_list; di; di = di->next) {
473       vg_assert(di->fsm.filename);
474       if (0==VG_(strcmp)(di->fsm.filename, filename))
475          break;
476    }
477    if (!di) {
478       di = alloc_DebugInfo(filename);
479       vg_assert(di);
480       di->next = debugInfo_list;
481       debugInfo_list = di;
482    }
483    return di;
484 }
485 
486 
487 /* Debuginfo reading for 'di' has just been successfully completed.
488    Check that the invariants stated in
489    "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
490    priv_storage.h are observed. */
check_CFSI_related_invariants(const DebugInfo * di)491 static void check_CFSI_related_invariants ( const DebugInfo* di )
492 {
493    DebugInfo* di2 = NULL;
494    Bool has_nonempty_rx = False;
495    Bool cfsi_fits = False;
496    Word i, j;
497    vg_assert(di);
498    /* This fn isn't called until after debuginfo for this object has
499       been successfully read.  And that shouldn't happen until we have
500       both a r-x and rw- mapping for the object.  Hence: */
501    vg_assert(di->fsm.have_rx_map);
502    vg_assert(di->fsm.have_rw_map);
503    for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
504       const DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
505       /* We are interested in r-x mappings only */
506       if (!map->rx)
507          continue;
508 
509       /* degenerate case: r-x section is empty */
510       if (map->size == 0)
511          continue;
512       has_nonempty_rx = True;
513 
514       /* normal case: r-x section is nonempty */
515       /* invariant (0) */
516       vg_assert(map->size > 0);
517 
518       /* invariant (1) */
519       for (di2 = debugInfo_list; di2; di2 = di2->next) {
520          if (di2 == di)
521             continue;
522          for (j = 0; j < VG_(sizeXA)(di2->fsm.maps); j++) {
523             const DebugInfoMapping* map2 = VG_(indexXA)(di2->fsm.maps, j);
524             if (!map2->rx || map2->size == 0)
525                continue;
526             vg_assert(!ranges_overlap(map->avma,  map->size,
527                                       map2->avma, map2->size));
528          }
529       }
530       di2 = NULL;
531 
532       /* invariant (2) */
533       if (di->cfsi_rd) {
534          vg_assert(di->cfsi_minavma <= di->cfsi_maxavma); /* duh! */
535          /* Assume the csfi fits completely into one individual mapping
536             for now. This might need to be improved/reworked later. */
537          if (di->cfsi_minavma >= map->avma &&
538              di->cfsi_maxavma <  map->avma + map->size)
539             cfsi_fits = True;
540       }
541    }
542 
543    /* degenerate case: all r-x sections are empty */
544    if (!has_nonempty_rx) {
545       vg_assert(di->cfsi_rd == NULL);
546       return;
547    }
548 
549    /* invariant (2) - cont. */
550    if (di->cfsi_rd)
551       vg_assert(cfsi_fits);
552 
553    /* invariants (3) and (4) */
554    if (di->cfsi_rd) {
555       vg_assert(di->cfsi_used > 0);
556       vg_assert(di->cfsi_size > 0);
557       for (i = 0; i < di->cfsi_used; i++) {
558          DiCfSI* cfsi = &di->cfsi_rd[i];
559          vg_assert(cfsi->len > 0);
560          vg_assert(cfsi->base >= di->cfsi_minavma);
561          vg_assert(cfsi->base + cfsi->len - 1 <= di->cfsi_maxavma);
562          if (i > 0) {
563             DiCfSI* cfsip = &di->cfsi_rd[i-1];
564             vg_assert(cfsip->base + cfsip->len <= cfsi->base);
565          }
566       }
567    } else {
568       vg_assert(di->cfsi_used == 0);
569       vg_assert(di->cfsi_size == 0);
570    }
571 }
572 
573 
574 /*--------------------------------------------------------------*/
575 /*---                                                        ---*/
576 /*--- TOP LEVEL: INITIALISE THE DEBUGINFO SYSTEM             ---*/
577 /*---                                                        ---*/
578 /*--------------------------------------------------------------*/
579 
VG_(di_initialise)580 void VG_(di_initialise) ( void )
581 {
582    /* There's actually very little to do here, since everything
583       centers around the DebugInfos in debugInfo_list, they are
584       created and destroyed on demand, and each one is treated more or
585       less independently. */
586    vg_assert(debugInfo_list == NULL);
587 
588    /* flush the debug info caches. */
589    caches__invalidate();
590 }
591 
592 
593 /*--------------------------------------------------------------*/
594 /*---                                                        ---*/
595 /*--- TOP LEVEL: NOTIFICATION (ACQUIRE/DISCARD INFO) (LINUX) ---*/
596 /*---                                                        ---*/
597 /*--------------------------------------------------------------*/
598 
599 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
600 
601 /* Helper (indirect) for di_notify_ACHIEVE_ACCEPT_STATE */
overlaps_DebugInfoMappings(const DebugInfoMapping * map1,const DebugInfoMapping * map2)602 static Bool overlaps_DebugInfoMappings ( const DebugInfoMapping* map1,
603                                          const DebugInfoMapping* map2 )
604 {
605    vg_assert(map1 && map2 && map1 != map2);
606    vg_assert(map1->size != 0 && map2->size != 0);
607    if (map1->avma + map1->size <= map2->avma) return False;
608    if (map2->avma + map2->size <= map1->avma) return False;
609    return True;
610 }
611 
612 
613 /* Helper (indirect) for di_notify_ACHIEVE_ACCEPT_STATE */
show_DebugInfoMappings(const DebugInfo * di,XArray * maps)614 static void show_DebugInfoMappings
615                ( const DebugInfo* di,
616                  /*MOD*/XArray* maps /* XArray<DebugInfoMapping> */ )
617 {
618    Word i, n;
619    vg_assert(maps);
620    n = VG_(sizeXA)(maps);
621    for (i = 0; i < n; i++) {
622       const DebugInfoMapping* map = VG_(indexXA)(maps, i);
623       TRACE_SYMTAB("  [%ld]    avma 0x%-16lx    size %-8lu    "
624                    "foff %-8lld    %s %s %s\n",
625                    i, map->avma, map->size, (Long)map->foff,
626                    map->rx ? "rx" : "--",
627                    map->rw ? "rw" : "--",
628                    map->ro ? "ro" : "--");
629    }
630 }
631 
632 
633 /* Helper for di_notify_ACHIEVE_ACCEPT_STATE.  This removes overlaps
634    in |maps|, in a fairly weak way, by truncating overlapping ends.
635    This may need to be strengthened in future.  Currently it performs
636    a post-fixup check, so as least we can be sure that if this
637    function returns (rather than asserts) that |maps| is overlap
638    free. */
truncate_DebugInfoMapping_overlaps(const DebugInfo * di,XArray * maps)639 static void truncate_DebugInfoMapping_overlaps
640                ( const DebugInfo* di,
641                  /*MOD*/XArray* maps /* XArray<DebugInfoMapping> */ )
642 {
643    TRACE_SYMTAB("Un-de-overlapped _DebugInfoMappings:\n");
644    show_DebugInfoMappings(di, maps);
645    TRACE_SYMTAB("\n");
646 
647    Word i, j, n;
648    DebugInfoMapping *map_i, *map_j;
649 
650    n = VG_(sizeXA)(maps);
651    for (i = 0; i < n; i++) {
652 
653       map_i = VG_(indexXA)(maps, i);
654       if (map_i->size == 0)
655         continue; // Hmm, mutancy.  Shouldn't happen.
656 
657       for (j = i+1; j < n; j++) {
658 
659          map_j = VG_(indexXA)(maps, j);
660          if (map_j->size == 0)
661            continue; // Hmm, mutancy.  Shouldn't happen.
662 
663          /* map_j was observed later than map_i, since the entries are
664             in the XArray in the order in which they were observed.
665             If map_j starts inside map_i, trim map_i's end so it does
666             not overlap map_j.  This reflects the reality that when
667             two mmaped areas overlap, the later mmap silently
668             overwrites the earlier mmap's mapping. */
669          if (map_j->avma >= map_i->avma
670              && map_j->avma < map_i->avma + map_i->size) {
671             SizeT map_i_newsize = map_j->avma - map_i->avma;
672             vg_assert(map_i_newsize < map_i->size);
673             map_i->size = map_i_newsize;
674          }
675 
676       }
677    }
678 
679    TRACE_SYMTAB("De-overlapped DebugInfoMappings:\n");
680    show_DebugInfoMappings(di, maps);
681    TRACE_SYMTAB("\n");
682    TRACE_SYMTAB("Checking that there are no remaining overlaps.\n");
683 
684    for (i = 0; i < n; i++) {
685       map_i = VG_(indexXA)(maps, i);
686       if (map_i->size == 0)
687         continue;
688       for (j = i+1; j < n; j++) {
689          map_j = VG_(indexXA)(maps, j);
690          if (map_j->size == 0)
691            continue;
692          Bool overlap
693             = overlaps_DebugInfoMappings( map_i, map_j );
694          /* If the following assert ever fails, it means the de-overlapping
695             scheme above is too weak, and needs improvement. */
696          vg_assert(!overlap);
697       }
698    }
699 
700    TRACE_SYMTAB("Check successful.\n");
701 }
702 
703 
704 /* The debug info system is driven by notifications that a text
705    segment has been mapped in, or unmapped, or when sections change
706    permission.  It's all a bit kludgey and basically means watching
707    syscalls, trying to second-guess when the system's dynamic linker
708    is done with mapping in a new object for execution.  This is all
709    tracked using the DebugInfoFSM struct for the object.  Anyway, once
710    we finally decide we've got to an accept state, this section then
711    will acquire whatever info is available for the corresponding
712    object.  This section contains the notification handlers, which
713    update the FSM and determine when an accept state has been reached.
714 */
715 
716 /* When the sequence of observations causes a DebugInfoFSM to move
717    into the accept state, call here to actually get the debuginfo read
718    in.  Returns a ULong whose purpose is described in comments
719    preceding VG_(di_notify_mmap) just below.
720 */
di_notify_ACHIEVE_ACCEPT_STATE(struct _DebugInfo * di)721 static ULong di_notify_ACHIEVE_ACCEPT_STATE ( struct _DebugInfo* di )
722 {
723    ULong di_handle;
724    Bool  ok;
725 
726    vg_assert(di->fsm.filename);
727    TRACE_SYMTAB("\n");
728    TRACE_SYMTAB("------ start ELF OBJECT "
729                 "-------------------------"
730                 "------------------------------\n");
731    TRACE_SYMTAB("------ name = %s\n", di->fsm.filename);
732    TRACE_SYMTAB("\n");
733 
734    /* We're going to read symbols and debug info for the avma
735       ranges specified in the _DebugInfoFsm mapping array. First
736       get rid of any other DebugInfos which overlap any of those
737       ranges (to avoid total confusion). */
738    discard_DebugInfos_which_overlap_with( di );
739 
740    /* The DebugInfoMappings that now exist in the FSM may involve
741       overlaps.  This confuses ML_(read_elf_debug_info), and may cause
742       it to compute wrong biases.  So de-overlap them now.
743       See http://bugzilla.mozilla.org/show_bug.cgi?id=788974 */
744    truncate_DebugInfoMapping_overlaps( di, di->fsm.maps );
745 
746    /* And acquire new info. */
747 #  if defined(VGO_linux) || defined(VGO_solaris)
748    ok = ML_(read_elf_debug_info)( di );
749 #  elif defined(VGO_darwin)
750    ok = ML_(read_macho_debug_info)( di );
751 #  else
752 #    error "unknown OS"
753 #  endif
754 
755    if (ok) {
756 
757       TRACE_SYMTAB("\n------ Canonicalising the "
758                    "acquired info ------\n");
759       /* invalidate the debug info caches. */
760       caches__invalidate();
761       /* prepare read data for use */
762       ML_(canonicaliseTables)( di );
763       /* Check invariants listed in
764          Comment_on_IMPORTANT_REPRESENTATIONAL_INVARIANTS in
765          priv_storage.h. */
766       check_CFSI_related_invariants(di);
767       ML_(finish_CFSI_arrays)(di);
768       /* notify m_redir about it */
769       TRACE_SYMTAB("\n------ Notifying m_redir ------\n");
770       VG_(redir_notify_new_DebugInfo)( di );
771       /* Note that we succeeded */
772       di->have_dinfo = True;
773       vg_assert(di->handle > 0);
774       di_handle = di->handle;
775 
776    } else {
777       TRACE_SYMTAB("\n------ ELF reading failed ------\n");
778       /* Something went wrong (eg. bad ELF file).  Should we delete
779          this DebugInfo?  No - it contains info on the rw/rx
780          mappings, at least. */
781       di_handle = 0;
782       vg_assert(di->have_dinfo == False);
783    }
784 
785    TRACE_SYMTAB("\n");
786    TRACE_SYMTAB("------ name = %s\n", di->fsm.filename);
787    TRACE_SYMTAB("------ end ELF OBJECT "
788                 "-------------------------"
789                 "------------------------------\n");
790    TRACE_SYMTAB("\n");
791 
792    return di_handle;
793 }
794 
795 
796 /* Notify the debuginfo system about a new mapping.  This is the way
797    new debug information gets loaded.  If allow_SkFileV is True, it
798    will try load debug info if the mapping at 'a' belongs to Valgrind;
799    whereas normally (False) it will not do that.  This allows us to
800    carefully control when the thing will read symbols from the
801    Valgrind executable itself.
802 
803    If use_fd is not -1, that is used instead of the filename; this
804    avoids perturbing fcntl locks, which are released by simply
805    re-opening and closing the same file (even via different fd!).
806 
807    If a call to VG_(di_notify_mmap) causes debug info to be read, then
808    the returned ULong is an abstract handle which can later be used to
809    refer to the debuginfo read as a result of this specific mapping,
810    in later queries to m_debuginfo.  In this case the handle value
811    will be one or above.  If the returned value is zero, no debug info
812    was read. */
813 
VG_(di_notify_mmap)814 ULong VG_(di_notify_mmap)( Addr a, Bool allow_SkFileV, Int use_fd )
815 {
816    NSegment const * seg;
817    const HChar* filename;
818    Bool       is_rx_map, is_rw_map, is_ro_map;
819    DebugInfo* di;
820    Int        actual_fd, oflags;
821    SysRes     preadres;
822    HChar      buf1k[1024];
823    Bool       debug = (DEBUG_FSM != 0);
824    SysRes     statres;
825    struct vg_stat statbuf;
826 
827    vg_assert(use_fd >= -1);
828 
829    /* In short, figure out if this mapping is of interest to us, and
830       if so, try to guess what ld.so is doing and when/if we should
831       read debug info. */
832    seg = VG_(am_find_nsegment)(a);
833    vg_assert(seg);
834 
835    if (debug) {
836       VG_(printf)("di_notify_mmap-0:\n");
837       VG_(printf)("di_notify_mmap-1: %#lx-%#lx %c%c%c\n",
838                   seg->start, seg->end,
839                   seg->hasR ? 'r' : '-',
840                   seg->hasW ? 'w' : '-',seg->hasX ? 'x' : '-' );
841    }
842 
843    /* guaranteed by aspacemgr-linux.c, sane_NSegment() */
844    vg_assert(seg->end > seg->start);
845 
846    /* Ignore non-file mappings */
847    if ( ! (seg->kind == SkFileC
848            || (seg->kind == SkFileV && allow_SkFileV)) )
849       return 0;
850 
851    /* If the file doesn't have a name, we're hosed.  Give up. */
852    filename = VG_(am_get_filename)( seg );
853    if (!filename)
854       return 0;
855 
856    /*
857     * Cannot read from these magic files:
858     * --20208-- WARNING: Serious error when reading debug info
859     * --20208-- When reading debug info from /proc/xen/privcmd:
860     * --20208-- can't read file to inspect ELF header
861     */
862    if (VG_(strncmp)(filename, "/proc/xen/", 10) == 0)
863       return 0;
864 
865    if (debug)
866       VG_(printf)("di_notify_mmap-2: %s\n", filename);
867 
868    /* Only try to read debug information from regular files.  */
869    statres = VG_(stat)(filename, &statbuf);
870 
871    /* stat dereferences symlinks, so we don't expect it to succeed and
872       yet produce something that is a symlink. */
873    vg_assert(sr_isError(statres) || ! VKI_S_ISLNK(statbuf.mode));
874 
875    /* Don't let the stat call fail silently.  Filter out some known
876       sources of noise before complaining, though. */
877    if (sr_isError(statres)) {
878       DebugInfo fake_di;
879       Bool quiet = VG_(strstr)(filename, "/var/run/nscd/") != NULL;
880       if (!quiet && VG_(clo_verbosity) > 1) {
881          VG_(memset)(&fake_di, 0, sizeof(fake_di));
882          fake_di.fsm.filename = ML_(dinfo_strdup)("di.debuginfo.nmm", filename);
883          ML_(symerr)(&fake_di, True, "failed to stat64/stat this file");
884       }
885       return 0;
886    }
887 
888    /* Finally, the point of all this stattery: if it's not a regular file,
889       don't try to read debug info from it. */
890    if (! VKI_S_ISREG(statbuf.mode))
891       return 0;
892 
893    /* no uses of statbuf below here. */
894 
895    /* Now we have to guess if this is a text-like mapping, a data-like
896       mapping, neither or both.  The rules are:
897 
898         text if:   x86-linux    r and x
899                    other-linux  r and x and not w
900 
901         data if:   x86-linux    r and w
902                    other-linux  r and w and not x
903 
904       Background: On x86-linux, objects are typically mapped twice:
905 
906       1b8fb000-1b8ff000 r-xp 00000000 08:02 4471477 vgpreload_memcheck.so
907       1b8ff000-1b900000 rw-p 00004000 08:02 4471477 vgpreload_memcheck.so
908 
909       whereas ppc32-linux mysteriously does this:
910 
911       118a6000-118ad000 r-xp 00000000 08:05 14209428 vgpreload_memcheck.so
912       118ad000-118b6000 ---p 00007000 08:05 14209428 vgpreload_memcheck.so
913       118b6000-118bd000 rwxp 00000000 08:05 14209428 vgpreload_memcheck.so
914 
915       The third mapping should not be considered to have executable
916       code in.  Therefore a test which works for both is: r and x and
917       NOT w.  Reading symbols from the rwx segment -- which overlaps
918       the r-x segment in the file -- causes the redirection mechanism
919       to redirect to addresses in that third segment, which is wrong
920       and causes crashes.
921 
922       JRS 28 Dec 05: unfortunately icc 8.1 on x86 has been seen to
923       produce executables with a single rwx segment rather than a
924       (r-x,rw-) pair. That means the rules have to be modified thusly:
925 
926       x86-linux:   consider if r and x
927       all others:  consider if r and x and not w
928 
929       2009 Aug 16: apply similar kludge to ppc32-linux.
930       See http://bugs.kde.org/show_bug.cgi?id=190820
931 
932       There are two modes on s390x: with and without the noexec kernel
933       parameter. Together with some older kernels, this leads to several
934       variants:
935       executable: r and x
936       data:       r and w and x
937       or
938       executable: r and x
939       data:       r and w
940    */
941    is_rx_map = False;
942    is_rw_map = False;
943    is_ro_map = False;
944 
945 #  if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \
946       || defined(VGA_mips64)
947    is_rx_map = seg->hasR && seg->hasX;
948    is_rw_map = seg->hasR && seg->hasW;
949 #  elif defined(VGA_amd64) || defined(VGA_ppc64be) || defined(VGA_ppc64le)  \
950         || defined(VGA_arm) || defined(VGA_arm64)
951    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
952    is_rw_map = seg->hasR && seg->hasW && !seg->hasX;
953 #  elif defined(VGP_s390x_linux)
954    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
955    is_rw_map = seg->hasR && seg->hasW;
956 #  else
957 #    error "Unknown platform"
958 #  endif
959 
960 #  if defined(VGP_x86_darwin) && DARWIN_VERS >= DARWIN_10_7
961    is_ro_map = seg->hasR && !seg->hasW && !seg->hasX;
962 #  endif
963 
964 #  if defined(VGO_solaris)
965    is_rx_map = seg->hasR && seg->hasX && !seg->hasW;
966    is_rw_map = seg->hasR && seg->hasW;
967 #  endif
968 
969    if (debug)
970       VG_(printf)("di_notify_mmap-3: "
971                   "is_rx_map %d, is_rw_map %d, is_ro_map %d\n",
972                   (Int)is_rx_map, (Int)is_rw_map, (Int)is_ro_map);
973 
974    /* Ignore mappings with permissions we can't possibly be interested in. */
975    if (!(is_rx_map || is_rw_map || is_ro_map))
976       return 0;
977 
978    /* Peer at the first few bytes of the file, to see if it is an ELF */
979    /* object file. Ignore the file if we do not have read permission. */
980    VG_(memset)(buf1k, 0, sizeof(buf1k));
981    oflags = VKI_O_RDONLY;
982 #  if defined(VKI_O_LARGEFILE)
983    oflags |= VKI_O_LARGEFILE;
984 #  endif
985 
986    if (use_fd == -1) {
987       SysRes fd = VG_(open)( filename, oflags, 0 );
988       if (sr_isError(fd)) {
989          if (sr_Err(fd) != VKI_EACCES) {
990             DebugInfo fake_di;
991             VG_(memset)(&fake_di, 0, sizeof(fake_di));
992             fake_di.fsm.filename = ML_(dinfo_strdup)("di.debuginfo.nmm",
993                                                      filename);
994             ML_(symerr)(&fake_di, True,
995                         "can't open file to inspect ELF header");
996          }
997          return 0;
998       }
999       actual_fd = sr_Res(fd);
1000    } else {
1001       actual_fd = use_fd;
1002    }
1003 
1004    preadres = VG_(pread)( actual_fd, buf1k, sizeof(buf1k), 0 );
1005    if (use_fd == -1) {
1006       VG_(close)( actual_fd );
1007    }
1008 
1009    if (sr_isError(preadres)) {
1010       DebugInfo fake_di;
1011       VG_(memset)(&fake_di, 0, sizeof(fake_di));
1012       fake_di.fsm.filename = ML_(dinfo_strdup)("di.debuginfo.nmm", filename);
1013       ML_(symerr)(&fake_di, True, "can't read file to inspect ELF header");
1014       return 0;
1015    }
1016    if (sr_Res(preadres) == 0)
1017       return 0;
1018    vg_assert(sr_Res(preadres) > 0 && sr_Res(preadres) <= sizeof(buf1k) );
1019 
1020    /* We're only interested in mappings of object files. */
1021 #  if defined(VGO_linux) || defined(VGO_solaris)
1022    if (!ML_(is_elf_object_file)( buf1k, (SizeT)sr_Res(preadres), False ))
1023       return 0;
1024 #  elif defined(VGO_darwin)
1025    if (!ML_(is_macho_object_file)( buf1k, (SizeT)sr_Res(preadres) ))
1026       return 0;
1027 #  else
1028 #    error "unknown OS"
1029 #  endif
1030 
1031    /* See if we have a DebugInfo for this filename.  If not,
1032       create one. */
1033    di = find_or_create_DebugInfo_for( filename );
1034    vg_assert(di);
1035 
1036    if (debug)
1037       VG_(printf)("di_notify_mmap-4: "
1038                   "noting details in DebugInfo* at %p\n", di);
1039 
1040    /* Note the details about the mapping. */
1041    DebugInfoMapping map;
1042    map.avma = seg->start;
1043    map.size = seg->end + 1 - seg->start;
1044    map.foff = seg->offset;
1045    map.rx   = is_rx_map;
1046    map.rw   = is_rw_map;
1047    map.ro   = is_ro_map;
1048    VG_(addToXA)(di->fsm.maps, &map);
1049 
1050    /* Update flags about what kind of mappings we've already seen. */
1051    di->fsm.have_rx_map |= is_rx_map;
1052    di->fsm.have_rw_map |= is_rw_map;
1053    di->fsm.have_ro_map |= is_ro_map;
1054 
1055    /* So, finally, are we in an accept state? */
1056    if (di->fsm.have_rx_map && di->fsm.have_rw_map && !di->have_dinfo) {
1057       /* Ok, so, finally, we found what we need, and we haven't
1058          already read debuginfo for this object.  So let's do so now.
1059          Yee-ha! */
1060       if (debug)
1061          VG_(printf)("di_notify_mmap-5: "
1062                      "achieved accept state for %s\n", filename);
1063       return di_notify_ACHIEVE_ACCEPT_STATE ( di );
1064    } else {
1065       /* If we don't have an rx and rw mapping, or if we already have
1066          debuginfo for this mapping for whatever reason, go no
1067          further. */
1068       return 0;
1069    }
1070 }
1071 
1072 
1073 /* Unmap is simpler - throw away any SegInfos intersecting
1074    [a, a+len).  */
VG_(di_notify_munmap)1075 void VG_(di_notify_munmap)( Addr a, SizeT len )
1076 {
1077    Bool anyFound;
1078    if (0) VG_(printf)("DISCARD %#lx %#lx\n", a, a+len);
1079    anyFound = discard_syms_in_range(a, len);
1080    if (anyFound)
1081       caches__invalidate();
1082 }
1083 
1084 
1085 /* Uh, this doesn't do anything at all.  IIRC glibc (or ld.so, I don't
1086    remember) does a bunch of mprotects on itself, and if we follow
1087    through here, it causes the debug info for that object to get
1088    discarded. */
VG_(di_notify_mprotect)1089 void VG_(di_notify_mprotect)( Addr a, SizeT len, UInt prot )
1090 {
1091    Bool exe_ok = toBool(prot & VKI_PROT_EXEC);
1092 #  if defined(VGA_x86)
1093    exe_ok = exe_ok || toBool(prot & VKI_PROT_READ);
1094 #  endif
1095    if (0 && !exe_ok) {
1096       Bool anyFound = discard_syms_in_range(a, len);
1097       if (anyFound)
1098          caches__invalidate();
1099    }
1100 }
1101 
1102 
1103 /* This is a MacOSX >= 10.7 32-bit only special.  See comments on the
1104    declaration of struct _DebugInfoFSM for details. */
VG_(di_notify_vm_protect)1105 void VG_(di_notify_vm_protect)( Addr a, SizeT len, UInt prot )
1106 {
1107    Bool debug = (DEBUG_FSM != 0);
1108 
1109    Bool r_ok = toBool(prot & VKI_PROT_READ);
1110    Bool w_ok = toBool(prot & VKI_PROT_WRITE);
1111    Bool x_ok = toBool(prot & VKI_PROT_EXEC);
1112    if (debug) {
1113       VG_(printf)("di_notify_vm_protect-0:\n");
1114       VG_(printf)("di_notify_vm_protect-1: %#lx-%#lx %c%c%c\n",
1115                   a, a + len - 1,
1116                   r_ok ? 'r' : '-', w_ok ? 'w' : '-', x_ok ? 'x' : '-' );
1117    }
1118 
1119    Bool do_nothing = True;
1120 #  if defined(VGP_x86_darwin) && (DARWIN_VERS >= DARWIN_10_7)
1121    do_nothing = False;
1122 #  endif
1123    if (do_nothing /* wrong platform */) {
1124       if (debug)
1125          VG_(printf)("di_notify_vm_protect-2: wrong platform, "
1126                      "doing nothing.\n");
1127       return;
1128    }
1129 
1130    if (! (r_ok && !w_ok && x_ok))
1131       return; /* not an upgrade to r-x */
1132 
1133    /* Find a DebugInfo containing a FSM that has [a, +len) previously
1134       observed as a r-- mapping, plus some other rw- mapping.  If such
1135       is found, conclude we're in an accept state and read debuginfo
1136       accordingly. */
1137    if (debug)
1138       VG_(printf)("di_notify_vm_protect-3: looking for existing DebugInfo*\n");
1139    DebugInfo* di;
1140    DebugInfoMapping *map = NULL;
1141    Word i;
1142    for (di = debugInfo_list; di; di = di->next) {
1143       vg_assert(di->fsm.filename);
1144       if (di->have_dinfo)
1145          continue; /* already have debuginfo for this object */
1146       if (!di->fsm.have_ro_map)
1147          continue; /* need to have a r-- mapping for this object */
1148       if (di->fsm.have_rx_map)
1149          continue; /* rx- mapping already exists */
1150       if (!di->fsm.have_rw_map)
1151          continue; /* need to have a rw- mapping */
1152       /* Try to find a mapping matching the memory area. */
1153       for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1154          map = VG_(indexXA)(di->fsm.maps, i);
1155          if (map->ro && map->avma == a && map->size == len)
1156             break;
1157          map = NULL;
1158       }
1159       if (!map)
1160          continue; /* this isn't an upgrade of an r-- mapping */
1161       /* looks like we're in luck! */
1162       break;
1163    }
1164    if (di == NULL)
1165       return; /* didn't find anything */
1166 
1167    if (debug)
1168      VG_(printf)("di_notify_vm_protect-4: found existing DebugInfo* at %p\n",
1169                  di);
1170 
1171    /* Do the upgrade.  Simply update the flags of the mapping
1172       and pretend we never saw the RO map at all. */
1173    vg_assert(di->fsm.have_ro_map);
1174    map->rx = True;
1175    map->ro = False;
1176    di->fsm.have_rx_map = True;
1177    di->fsm.have_ro_map = False;
1178    /* See if there are any more ro mappings */
1179    for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1180       map = VG_(indexXA)(di->fsm.maps, i);
1181       if (map->ro) {
1182          di->fsm.have_ro_map = True;
1183          break;
1184       }
1185    }
1186 
1187    /* Check if we're now in an accept state and read debuginfo.  Finally. */
1188    if (di->fsm.have_rx_map && di->fsm.have_rw_map && !di->have_dinfo) {
1189       if (debug)
1190          VG_(printf)("di_notify_vm_protect-5: "
1191                      "achieved accept state for %s\n", di->fsm.filename);
1192       ULong di_handle __attribute__((unused))
1193          = di_notify_ACHIEVE_ACCEPT_STATE( di );
1194       /* di_handle is ignored. That's not a problem per se -- it just
1195          means nobody will ever be able to refer to this debuginfo by
1196          handle since nobody will know what the handle value is. */
1197    }
1198 }
1199 
1200 
1201 /*--------- PDB (windows debug info) reading --------- */
1202 
1203 /* this should really return ULong, as per VG_(di_notify_mmap). */
VG_(di_notify_pdb_debuginfo)1204 void VG_(di_notify_pdb_debuginfo)( Int fd_obj, Addr avma_obj,
1205                                    SizeT total_size, PtrdiffT bias_obj )
1206 {
1207    Int    i, r, sz_exename;
1208    ULong  obj_mtime, pdb_mtime;
1209    HChar* pdbname = NULL;
1210    HChar* dot;
1211    SysRes sres;
1212    Int    fd_pdbimage;
1213    SizeT  n_pdbimage;
1214    struct vg_stat stat_buf;
1215 
1216    if (VG_(clo_verbosity) > 0) {
1217       VG_(message)(Vg_UserMsg, "\n");
1218       VG_(message)(Vg_UserMsg,
1219          "LOAD_PDB_DEBUGINFO: clreq:   fd=%d, avma=%#lx, total_size=%lu, "
1220          "bias=%#lx\n",
1221          fd_obj, avma_obj, total_size, (UWord)bias_obj
1222       );
1223    }
1224 
1225    /* 'fd' refers to the .exe/.dll we're dealing with.  Get its modification
1226       time into obj_mtime. */
1227    r = VG_(fstat)(fd_obj, &stat_buf);
1228    if (r == -1)
1229       return; /* stat failed ?! */
1230    vg_assert(r == 0);
1231    obj_mtime = stat_buf.mtime;
1232 
1233    /* and get its name into exename. */
1234    const HChar *exe;
1235    if (! VG_(resolve_filename)(fd_obj, &exe))
1236       return; /*  failed */
1237    sz_exename = VG_(strlen)(exe);
1238    HChar exename[sz_exename + 1];
1239    VG_(strcpy)(exename, exe);  // make a copy on the stack
1240 
1241    if (VG_(clo_verbosity) > 0) {
1242       VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: objname: %s\n", exename);
1243    }
1244 
1245    /* Try to get the PDB file name from the executable. */
1246    pdbname = ML_(find_name_of_pdb_file)(exename);
1247    if (pdbname) {
1248       vg_assert(VG_(strlen)(pdbname) >= 5); /* 5 = strlen("X.pdb") */
1249       /* So we successfully extracted a name from the PE file.  But it's
1250          likely to be of the form
1251             e:\foo\bar\xyzzy\wibble.pdb
1252          and we need to change it into something we can actually open
1253          in Wine-world, which basically means turning it into
1254             $HOME/.wine/drive_e/foo/bar/xyzzy/wibble.pdb
1255          We also take into account $WINEPREFIX, if it is set.
1256          For the moment, if the name isn't fully qualified, just forget it
1257          (we'd have to root around to find where the pdb actually is)
1258       */
1259       /* Change all the backslashes to forward slashes */
1260       for (i = 0; pdbname[i]; i++) {
1261          if (pdbname[i] == '\\')
1262             pdbname[i] = '/';
1263       }
1264       Bool is_quald
1265          = ('a' <= VG_(tolower)(pdbname[0]) && VG_(tolower)(pdbname[0]) <= 'z')
1266            && pdbname[1] == ':'
1267            && pdbname[2] == '/';
1268       HChar* home = VG_(getenv)("HOME");
1269       HChar* wpfx = VG_(getenv)("WINEPREFIX");
1270       if (is_quald && wpfx) {
1271          /* Change e:/foo/bar/xyzzy/wibble.pdb
1272                 to $WINEPREFIX/drive_e/foo/bar/xyzzy/wibble.pdb
1273          */
1274          Int mashedSzB = VG_(strlen)(pdbname) + VG_(strlen)(wpfx) + 50/*misc*/;
1275          HChar* mashed = ML_(dinfo_zalloc)("di.debuginfo.dnpdi.1", mashedSzB);
1276          VG_(snprintf)(mashed, mashedSzB, "%s/drive_%c%s",
1277                        wpfx, pdbname[0], &pdbname[2]);
1278          vg_assert(mashed[mashedSzB-1] == 0);
1279          ML_(dinfo_free)(pdbname);
1280          pdbname = mashed;
1281       }
1282       else if (is_quald && home && !wpfx) {
1283          /* Change e:/foo/bar/xyzzy/wibble.pdb
1284                 to $HOME/.wine/drive_e/foo/bar/xyzzy/wibble.pdb
1285          */
1286          Int mashedSzB = VG_(strlen)(pdbname) + VG_(strlen)(home) + 50/*misc*/;
1287          HChar* mashed = ML_(dinfo_zalloc)("di.debuginfo.dnpdi.2", mashedSzB);
1288          VG_(snprintf)(mashed, mashedSzB, "%s/.wine/drive_%c%s",
1289 		       home, pdbname[0], &pdbname[2]);
1290          vg_assert(mashed[mashedSzB-1] == 0);
1291          ML_(dinfo_free)(pdbname);
1292          pdbname = mashed;
1293       } else {
1294          /* It's not a fully qualified path, or neither $HOME nor $WINE
1295             are set (strange).  Give up. */
1296          ML_(dinfo_free)(pdbname);
1297          pdbname = NULL;
1298       }
1299    }
1300 
1301    /* Try s/exe/pdb/ if we don't have a valid pdbname. */
1302    if (!pdbname) {
1303       /* Try to find a matching PDB file from which to read debuginfo.
1304          Windows PE files have symbol tables and line number information,
1305          but MSVC doesn't seem to use them. */
1306       /* Why +5 ?  Because in the worst case, we could find a dot as the
1307          last character of pdbname, and we'd then put "pdb" right after
1308          it, hence extending it a bit. */
1309       pdbname = ML_(dinfo_zalloc)("di.debuginfo.lpd1", sz_exename+5);
1310       VG_(strcpy)(pdbname, exename);
1311       vg_assert(pdbname[sz_exename+5-1] == 0);
1312       dot = VG_(strrchr)(pdbname, '.');
1313       if (!dot)
1314          goto out; /* there's no dot in the exe's name ?! */
1315       if (dot[1] == 0)
1316          goto out; /* hmm, path ends in "." */
1317 
1318       if ('A' <= dot[1] && dot[1] <= 'Z')
1319          VG_(strcpy)(dot, ".PDB");
1320       else
1321          VG_(strcpy)(dot, ".pdb");
1322 
1323       vg_assert(pdbname[sz_exename+5-1] == 0);
1324    }
1325 
1326    /* See if we can find it, and check it's in-dateness. */
1327    sres = VG_(stat)(pdbname, &stat_buf);
1328    if (sr_isError(sres)) {
1329       VG_(message)(Vg_UserMsg, "Warning: Missing or un-stat-able %s\n",
1330                                pdbname);
1331    if (VG_(clo_verbosity) > 0)
1332       VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: missing: %s\n", pdbname);
1333       goto out;
1334    }
1335    pdb_mtime = stat_buf.mtime;
1336 
1337    if (obj_mtime > pdb_mtime + 60ULL) {
1338       /* PDB file is older than PE file.  Really, the PDB should be
1339          newer than the PE, but that doesn't always seem to be the
1340          case.  Allow the PDB to be up to one minute older.
1341          Otherwise, it's probably out of date, in which case ignore it
1342          or we will either (a) print wrong stack traces or more likely
1343          (b) crash.
1344       */
1345       VG_(message)(Vg_UserMsg,
1346                    "Warning:       %s (mtime = %llu)\n"
1347                    " is older than %s (mtime = %llu)\n",
1348                    pdbname, pdb_mtime, exename, obj_mtime);
1349    }
1350 
1351    sres = VG_(open)(pdbname, VKI_O_RDONLY, 0);
1352    if (sr_isError(sres)) {
1353       VG_(message)(Vg_UserMsg, "Warning: Can't open %s\n", pdbname);
1354       goto out;
1355    }
1356 
1357    /* Looks promising; go on to try and read stuff from it.  But don't
1358       mmap the file.  Instead mmap free space and read the file into
1359       it.  This is because files on CIFS filesystems that are mounted
1360       '-o directio' can't be mmap'd, and that mount option is needed
1361       to make CIFS work reliably.  (See
1362       http://www.nabble.com/Corrupted-data-on-write-to-
1363                             Windows-2003-Server-t2782623.html)
1364       This is slower, but at least it works reliably. */
1365    fd_pdbimage = sr_Res(sres);
1366    n_pdbimage  = stat_buf.size;
1367    if (n_pdbimage == 0 || n_pdbimage > 0x7FFFFFFF) {
1368       // 0x7FFFFFFF: why?  Because the VG_(read) just below only
1369       // can deal with a signed int as the size of data to read,
1370       // so we can't reliably check for read failure for files
1371       // greater than that size.  Hence just skip them; we're
1372       // unlikely to encounter a PDB that large anyway.
1373       VG_(close)(fd_pdbimage);
1374       goto out;
1375    }
1376    sres = VG_(am_mmap_anon_float_valgrind)( n_pdbimage );
1377    if (sr_isError(sres)) {
1378       VG_(close)(fd_pdbimage);
1379       goto out;
1380    }
1381 
1382    void* pdbimage = (void*)sr_Res(sres);
1383    r = VG_(read)( fd_pdbimage, pdbimage, (Int)n_pdbimage );
1384    if (r < 0 || r != (Int)n_pdbimage) {
1385       VG_(am_munmap_valgrind)( (Addr)pdbimage, n_pdbimage );
1386       VG_(close)(fd_pdbimage);
1387       goto out;
1388    }
1389 
1390    if (VG_(clo_verbosity) > 0)
1391       VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: pdbname: %s\n", pdbname);
1392 
1393    /* play safe; always invalidate the debug info caches.  I don't know if
1394       this is necessary, but anyway .. */
1395    caches__invalidate();
1396    /* dump old info for this range, if any */
1397    discard_syms_in_range( avma_obj, total_size );
1398 
1399    { DebugInfo* di = find_or_create_DebugInfo_for(exename);
1400 
1401      /* this di must be new, since we just nuked any old stuff in the range */
1402      vg_assert(di && !di->fsm.have_rx_map && !di->fsm.have_rw_map);
1403      vg_assert(!di->have_dinfo);
1404 
1405      /* don't set up any of the di-> fields; let
1406         ML_(read_pdb_debug_info) do it. */
1407      ML_(read_pdb_debug_info)( di, avma_obj, bias_obj,
1408                                pdbimage, n_pdbimage, pdbname, pdb_mtime );
1409      // JRS fixme: take notice of return value from read_pdb_debug_info,
1410      // and handle failure
1411      vg_assert(di->have_dinfo); // fails if PDB read failed
1412      VG_(am_munmap_valgrind)( (Addr)pdbimage, n_pdbimage );
1413      VG_(close)(fd_pdbimage);
1414 
1415      if (VG_(clo_verbosity) > 0) {
1416         VG_(message)(Vg_UserMsg, "LOAD_PDB_DEBUGINFO: done:    "
1417                                  "%lu syms, %lu src locs, %lu fpo recs\n",
1418                      di->symtab_used, di->loctab_used, di->fpo_size);
1419      }
1420    }
1421 
1422   out:
1423    if (pdbname) ML_(dinfo_free)(pdbname);
1424 }
1425 
1426 #endif /* defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) */
1427 
1428 
1429 /*------------------------------------------------------------*/
1430 /*---                                                      ---*/
1431 /*--- TOP LEVEL: QUERYING EXISTING DEBUG INFO              ---*/
1432 /*---                                                      ---*/
1433 /*------------------------------------------------------------*/
1434 
VG_(di_discard_ALL_debuginfo)1435 void VG_(di_discard_ALL_debuginfo)( void )
1436 {
1437    DebugInfo *di, *di2;
1438    di = debugInfo_list;
1439    while (di) {
1440       di2 = di->next;
1441       VG_(printf)("XXX rm %p\n", di);
1442       free_DebugInfo( di );
1443       di = di2;
1444    }
1445 }
1446 
1447 
ML_(find_rx_mapping)1448 DebugInfoMapping* ML_(find_rx_mapping) ( DebugInfo* di, Addr lo, Addr hi )
1449 {
1450    Word i;
1451    vg_assert(lo <= hi);
1452 
1453    /* Optimization: Try to use the last matched rx mapping first */
1454    if (   di->last_rx_map
1455        && lo >= di->last_rx_map->avma
1456        && hi <  di->last_rx_map->avma + di->last_rx_map->size)
1457       return di->last_rx_map;
1458 
1459    for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
1460       DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
1461       if (   map->rx && map->size > 0
1462           && lo >= map->avma && hi < map->avma + map->size) {
1463          di->last_rx_map = map;
1464          return map;
1465       }
1466    }
1467 
1468    return NULL;
1469 }
1470 
1471 /*------------------------------------------------------------*/
1472 /*--- Types and functions for inlined IP cursor            ---*/
1473 /*------------------------------------------------------------*/
1474 struct _InlIPCursor {
1475    Addr eip;             // Cursor used to describe calls at eip.
1476    DebugInfo* di;        // DebugInfo describing inlined calls at eip
1477 
1478    Word    inltab_lopos; // The inlined fn calls covering eip are in
1479    Word    inltab_hipos; // di->inltab[inltab_lopos..inltab_hipos].
1480                          // Note that not all inlined fn calls in this range
1481                          // are necessarily covering eip.
1482 
1483    Int   curlevel;       // Current level to describe.
1484                          // 0 means to describe eip itself.
1485    Word  cur_inltab;     // inltab pos for call inlined at current level.
1486    Word  next_inltab;    // inltab pos for call inlined at next (towards main)
1487                          // level.
1488 };
1489 
is_top(const InlIPCursor * iipc)1490 static Bool is_top(const InlIPCursor *iipc)
1491 {
1492    return !iipc || iipc->cur_inltab == -1;
1493 }
1494 
is_bottom(const InlIPCursor * iipc)1495 static Bool is_bottom(const InlIPCursor *iipc)
1496 {
1497    return !iipc || iipc->next_inltab == -1;
1498 }
1499 
VG_(next_IIPC)1500 Bool VG_(next_IIPC)(InlIPCursor *iipc)
1501 {
1502    Word i;
1503    DiInlLoc *hinl = NULL;
1504    Word hinl_pos = -1;
1505    DebugInfo *di;
1506 
1507    if (iipc == NULL)
1508       return False;
1509 
1510    if (iipc->curlevel <= 0) {
1511       iipc->curlevel--;
1512       return False;
1513    }
1514 
1515    di = iipc->di;
1516    for (i = iipc->inltab_lopos; i <= iipc->inltab_hipos; i++) {
1517       if (di->inltab[i].addr_lo <= iipc->eip
1518           && iipc->eip < di->inltab[i].addr_hi
1519           && di->inltab[i].level < iipc->curlevel
1520           && (!hinl || hinl->level < di->inltab[i].level)) {
1521          hinl = &di->inltab[i];
1522          hinl_pos = i;
1523       }
1524    }
1525 
1526    iipc->cur_inltab = iipc->next_inltab;
1527    iipc->next_inltab = hinl_pos;
1528    if (iipc->next_inltab < 0)
1529       iipc->curlevel = 0; // no inlined call anymore, describe eip itself
1530    else
1531       iipc->curlevel = di->inltab[iipc->next_inltab].level;
1532 
1533    return True;
1534 }
1535 
1536 /* Forward */
1537 static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
1538                                            /*OUT*/Word* locno );
1539 
1540 /* Returns the position after which eip would be inserted in inltab.
1541    (-1 if eip should be inserted before position 0).
1542    This is the highest position with an addr_lo <= eip.
1543    As inltab is sorted on addr_lo, dichotomic search can be done
1544    (note that inltab might have duplicates addr_lo). */
inltab_insert_pos(DebugInfo * di,Addr eip)1545 static Word inltab_insert_pos (DebugInfo *di, Addr eip)
1546 {
1547    Word mid,
1548         lo = 0,
1549         hi = di->inltab_used-1;
1550    while (lo <= hi) {
1551       mid      = (lo + hi) / 2;
1552       if (eip < di->inltab[mid].addr_lo) { hi = mid-1; continue; }
1553       if (eip > di->inltab[mid].addr_lo) { lo = mid+1; continue; }
1554       lo = mid; break;
1555    }
1556 
1557    while (lo <= di->inltab_used-1 && di->inltab[lo].addr_lo <= eip)
1558       lo++;
1559 #if 0
1560    for (mid = 0; mid <= di->inltab_used-1; mid++)
1561       if (eip < di->inltab[mid].addr_lo)
1562          break;
1563    vg_assert (lo - 1 == mid - 1);
1564 #endif
1565    return lo - 1;
1566 }
1567 
VG_(new_IIPC)1568 InlIPCursor* VG_(new_IIPC)(Addr eip)
1569 {
1570    DebugInfo*  di;
1571    Word        locno;
1572    Word        i;
1573    InlIPCursor *ret;
1574    Bool        avail;
1575 
1576    if (!VG_(clo_read_inline_info))
1577       return NULL; // No way we can find inlined calls.
1578 
1579    /* Search the DebugInfo for eip */
1580    search_all_loctabs ( eip, &di, &locno );
1581    if (di == NULL || di->inltab_used == 0)
1582       return NULL; // No di (with inltab) containing eip.
1583 
1584    /* Search the entry in di->inltab with the highest addr_lo that
1585       contains eip. */
1586    /* We start from the highest pos in inltab after which eip would
1587       be inserted. */
1588    for (i = inltab_insert_pos (di, eip); i >= 0; i--) {
1589       if (di->inltab[i].addr_lo <= eip && eip < di->inltab[i].addr_hi) {
1590          break;
1591       }
1592       /* Stop the backward scan when reaching an addr_lo which
1593          cannot anymore contain eip : we know that all ranges before
1594          i also cannot contain eip. */
1595       if (di->inltab[i].addr_lo < eip - di->maxinl_codesz)
1596          return NULL;
1597    }
1598 
1599    if (i < 0)
1600       return NULL; // No entry containing eip.
1601 
1602    /* We have found the highest entry containing eip.
1603       Build a cursor. */
1604    ret = ML_(dinfo_zalloc) ("dinfo.new_IIPC", sizeof(*ret));
1605    ret->eip = eip;
1606    ret->di = di;
1607    ret->inltab_hipos = i;
1608    for (i = ret->inltab_hipos - 1; i >= 0; i--) {
1609 
1610       if (di->inltab[i].addr_lo < eip - di->maxinl_codesz)
1611          break; /* Similar stop backward scan logic as above. */
1612    }
1613    ret->inltab_lopos = i + 1;
1614    ret->curlevel = MAX_LEVEL;
1615    ret->cur_inltab = -1;
1616    ret->next_inltab = -1;
1617 
1618    /* MAX_LEVEL is higher than any stored level. We can use
1619       VG_(next_IIPC) to get to the 'real' first highest call level. */
1620    avail = VG_(next_IIPC) (ret);
1621    vg_assert (avail);
1622 
1623    return ret;
1624 }
1625 
VG_(delete_IIPC)1626 void VG_(delete_IIPC)(InlIPCursor *iipc)
1627 {
1628    if (iipc)
1629       ML_(dinfo_free)( iipc );
1630 }
1631 
1632 
1633 /*------------------------------------------------------------*/
1634 /*--- Use of symbol table & location info to create        ---*/
1635 /*--- plausible-looking stack dumps.                       ---*/
1636 /*------------------------------------------------------------*/
1637 
1638 /* Search all symtabs that we know about to locate ptr.  If found, set
1639    *pdi to the relevant DebugInfo, and *symno to the symtab entry
1640    *number within that.  If not found, *psi is set to NULL.
1641    If findText==True,  only text symbols are searched for.
1642    If findText==False, only data symbols are searched for.
1643 */
search_all_symtabs(Addr ptr,DebugInfo ** pdi,Word * symno,Bool findText)1644 static void search_all_symtabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
1645                                            /*OUT*/Word* symno,
1646                                  Bool findText )
1647 {
1648    Word       sno;
1649    DebugInfo* di;
1650    Bool       inRange;
1651 
1652    for (di = debugInfo_list; di != NULL; di = di->next) {
1653 
1654       if (findText) {
1655          /* Consider any symbol in the r-x mapped area to be text.
1656             See Comment_Regarding_Text_Range_Checks in storage.c for
1657             details. */
1658          inRange = di->fsm.have_rx_map
1659                    && (ML_(find_rx_mapping)(di, ptr, ptr) != NULL);
1660       } else {
1661          inRange = (di->data_present
1662                     && di->data_size > 0
1663                     && di->data_avma <= ptr
1664                     && ptr < di->data_avma + di->data_size)
1665                    ||
1666                    (di->sdata_present
1667                     && di->sdata_size > 0
1668                     && di->sdata_avma <= ptr
1669                     && ptr < di->sdata_avma + di->sdata_size)
1670                    ||
1671                    (di->bss_present
1672                     && di->bss_size > 0
1673                     && di->bss_avma <= ptr
1674                     && ptr < di->bss_avma + di->bss_size)
1675                    ||
1676                    (di->sbss_present
1677                     && di->sbss_size > 0
1678                     && di->sbss_avma <= ptr
1679                     && ptr < di->sbss_avma + di->sbss_size)
1680                    ||
1681                    (di->rodata_present
1682                     && di->rodata_size > 0
1683                     && di->rodata_avma <= ptr
1684                     && ptr < di->rodata_avma + di->rodata_size);
1685       }
1686 
1687       if (!inRange) continue;
1688 
1689       sno = ML_(search_one_symtab) ( di, ptr, findText );
1690       if (sno == -1) goto not_found;
1691       *symno = sno;
1692       *pdi = di;
1693       return;
1694 
1695    }
1696   not_found:
1697    *pdi = NULL;
1698 }
1699 
1700 
1701 /* Search all loctabs that we know about to locate ptr.  If found, set
1702    *pdi to the relevant DebugInfo, and *locno to the loctab entry
1703    *number within that.  If not found, *pdi is set to NULL. */
search_all_loctabs(Addr ptr,DebugInfo ** pdi,Word * locno)1704 static void search_all_loctabs ( Addr ptr, /*OUT*/DebugInfo** pdi,
1705                                            /*OUT*/Word* locno )
1706 {
1707    Word       lno;
1708    DebugInfo* di;
1709    for (di = debugInfo_list; di != NULL; di = di->next) {
1710       if (di->text_present
1711           && di->text_size > 0
1712           && di->text_avma <= ptr
1713           && ptr < di->text_avma + di->text_size) {
1714          lno = ML_(search_one_loctab) ( di, ptr );
1715          if (lno == -1) goto not_found;
1716          *locno = lno;
1717          *pdi = di;
1718          return;
1719       }
1720    }
1721   not_found:
1722    *pdi = NULL;
1723 }
1724 
1725 /* Caching of queries to symbol names. */
1726 // Prime number, giving about 6Kbytes cache on 32 bits,
1727 //                           12Kbytes cache on 64 bits.
1728 #define N_SYM_NAME_CACHE 509
1729 
1730 typedef
1731    struct {
1732       Addr sym_avma;
1733       const HChar* sym_name;
1734       PtrdiffT offset : (sizeof(PtrdiffT)*8)-1;
1735       Bool isText : 1;
1736    }
1737    Sym_Name_CacheEnt;
1738 /* Sym_Name_CacheEnt associates a queried address to the sym name found.
1739    By nature, if a sym name was found, it means the searched address
1740    stored in the cache is an avma (see e.g. search_all_symtabs).
1741    Note however that the caller is responsibe to work with 'avma'
1742    addresses e.g. when calling VG_(get_fnname) : m_debuginfo.c has
1743    no way to differentiate an 'svma a' from an 'avma a'. It is however
1744    unlikely that svma would percolate outside of this module. */
1745 
1746 static Sym_Name_CacheEnt sym_name_cache[N_SYM_NAME_CACHE];
1747 
1748 static const HChar* no_sym_name = "<<<noname>>>";
1749 /* We need a special marker for the address 0 : a not used entry has
1750    a zero sym_avma. So, if ever the 0 address is really queried, we need
1751    to be able to detect there is no sym name for this address.
1752    If on some platforms, 0 is associated to a symbol, the cache would
1753    work properly. */
1754 
sym_name_cache__invalidate(void)1755 static void sym_name_cache__invalidate ( void ) {
1756    VG_(memset)(&sym_name_cache, 0, sizeof(sym_name_cache));
1757    sym_name_cache[0].sym_name = no_sym_name;
1758 }
1759 
1760 /* The whole point of this whole big deal: map a code address to a
1761    plausible symbol name.  Returns False if no idea; otherwise True.
1762    Caller supplies buf.  If do_cxx_demangling is False, don't do
1763    C++ demangling, regardless of VG_(clo_demangle) -- probably because the
1764    call has come from VG_(get_fnname_raw)().  findText
1765    indicates whether we're looking for a text symbol or a data symbol
1766    -- caller must choose one kind or the other.
1767    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1768    in pub_tool_debuginfo.h
1769    get_sym_name and the fact it calls the demangler is the main reason
1770    for non persistence of the information returned by m_debuginfo.c
1771    functions : the string returned in *BUF is persistent as long as
1772    (1) the DebugInfo it belongs to is not discarded
1773    (2) the demangler is not invoked again
1774    Also, the returned string is owned by "somebody else". Callers must
1775    not free it or modify it.*/
1776 static
get_sym_name(Bool do_cxx_demangling,Bool do_z_demangling,Bool do_below_main_renaming,Addr a,const HChar ** buf,Bool match_anywhere_in_sym,Bool show_offset,Bool findText,PtrdiffT * offsetP)1777 Bool get_sym_name ( Bool do_cxx_demangling, Bool do_z_demangling,
1778                     Bool do_below_main_renaming,
1779                     Addr a, const HChar** buf,
1780                     Bool match_anywhere_in_sym, Bool show_offset,
1781                     Bool findText, /*OUT*/PtrdiffT* offsetP )
1782 {
1783    UWord         hash = a % N_SYM_NAME_CACHE;
1784    Sym_Name_CacheEnt* se =  &sym_name_cache[hash];
1785 
1786    if (UNLIKELY(se->sym_avma != a || se->isText != findText)) {
1787       DebugInfo* di;
1788       Word       sno;
1789 
1790       search_all_symtabs ( a, &di, &sno, findText );
1791       se->sym_avma = a;
1792       se->isText = findText;
1793       if (di == NULL || a == 0)
1794          se->sym_name = no_sym_name;
1795       else {
1796          vg_assert(di->symtab[sno].pri_name);
1797          se->sym_name = di->symtab[sno].pri_name;
1798          se->offset = a - di->symtab[sno].avmas.main;
1799       }
1800    }
1801 
1802    if (se->sym_name == no_sym_name
1803        || (!match_anywhere_in_sym && se->offset != 0)) {
1804       *buf = "";
1805       return False;
1806    }
1807 
1808    VG_(demangle) ( do_cxx_demangling, do_z_demangling,
1809                    se->sym_name, buf );
1810 
1811    /* Do the below-main hack */
1812    // To reduce the endless nuisance of multiple different names
1813    // for "the frame below main()" screwing up the testsuite, change all
1814    // known incarnations of said into a single name, "(below main)", if
1815    // --show-below-main=yes.
1816    if ( do_below_main_renaming && ! VG_(clo_show_below_main)
1817         && Vg_FnNameBelowMain == VG_(get_fnname_kind)(*buf) )
1818    {
1819      *buf = "(below main)";
1820    }
1821 
1822    if (offsetP) *offsetP = se->offset;
1823 
1824    if (show_offset && se->offset != 0) {
1825       static HChar *bufwo;      // buf with offset
1826       static SizeT  bufwo_szB;
1827       SizeT  need, len;
1828 
1829       len = VG_(strlen)(*buf);
1830       need = len + 1 + 19 + 1;
1831       if (need > bufwo_szB) {
1832         bufwo = ML_(dinfo_realloc)("get_sym_size", bufwo, need);
1833         bufwo_szB = need;
1834       }
1835 
1836       VG_(strcpy)(bufwo, *buf);
1837       VG_(sprintf)(bufwo + len, "%c%ld",
1838                    se->offset < 0 ? '-' : '+',
1839                    (PtrdiffT) (se->offset < 0 ? -se->offset : se->offset));
1840       *buf = bufwo;
1841    }
1842 
1843    return True;
1844 }
1845 
1846 /* ppc64be-linux only: find the TOC pointer (R2 value) that should be in
1847    force at the entry point address of the function containing
1848    guest_code_addr.  Returns 0 if not known. */
VG_(get_tocptr)1849 Addr VG_(get_tocptr) ( Addr guest_code_addr )
1850 {
1851 #if defined(VGA_ppc64be) || defined(VGA_ppc64le)
1852    DebugInfo* si;
1853    Word       sno;
1854    search_all_symtabs ( guest_code_addr,
1855                         &si, &sno,
1856                         True/*consider text symbols only*/ );
1857    if (si == NULL)
1858       return 0;
1859    else
1860       return GET_TOCPTR_AVMA(si->symtab[sno].avmas);
1861 #else
1862    return 0;
1863 #endif
1864 }
1865 
1866 /* This is available to tools... always demangle C++ names,
1867    match anywhere in function, but don't show offsets.
1868    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1869    in pub_tool_debuginfo.h */
VG_(get_fnname)1870 Bool VG_(get_fnname) ( Addr a, const HChar** buf )
1871 {
1872    return get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True,
1873                          /*below-main-renaming*/True,
1874                          a, buf,
1875                          /*match_anywhere_in_fun*/True,
1876                          /*show offset?*/False,
1877                          /*text sym*/True,
1878                          /*offsetP*/NULL );
1879 }
1880 
1881 /* This is available to tools... always demangle C++ names,
1882    match anywhere in function, and show offset if nonzero.
1883    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1884    in pub_tool_debuginfo.h */
VG_(get_fnname_w_offset)1885 Bool VG_(get_fnname_w_offset) ( Addr a, const HChar** buf )
1886 {
1887    return get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True,
1888                          /*below-main-renaming*/True,
1889                          a, buf,
1890                          /*match_anywhere_in_fun*/True,
1891                          /*show offset?*/True,
1892                          /*text sym*/True,
1893                          /*offsetP*/NULL );
1894 }
1895 
1896 /* This is available to tools... always demangle C++ names,
1897    only succeed if 'a' matches first instruction of function,
1898    and don't show offsets.
1899    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1900    in pub_tool_debuginfo.h */
VG_(get_fnname_if_entry)1901 Bool VG_(get_fnname_if_entry) ( Addr a, const HChar** buf )
1902 {
1903    const HChar *tmp;
1904    Bool res;
1905 
1906    res =  get_sym_name ( /*C++-demangle*/True, /*Z-demangle*/True,
1907                          /*below-main-renaming*/True,
1908                          a, &tmp,
1909                          /*match_anywhere_in_fun*/False,
1910                          /*show offset?*/False,
1911                          /*text sym*/True,
1912                          /*offsetP*/NULL );
1913    if (res)
1914       *buf = tmp;
1915    return res;
1916 }
1917 
1918 /* This is only available to core... don't C++-demangle, don't Z-demangle,
1919    don't rename below-main, match anywhere in function, and don't show
1920    offsets.
1921    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1922    in pub_tool_debuginfo.h  */
VG_(get_fnname_raw)1923 Bool VG_(get_fnname_raw) ( Addr a, const HChar** buf )
1924 {
1925    return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/False,
1926                          /*below-main-renaming*/False,
1927                          a, buf,
1928                          /*match_anywhere_in_fun*/True,
1929                          /*show offset?*/False,
1930                          /*text sym*/True,
1931                          /*offsetP*/NULL );
1932 }
1933 
1934 /* This is only available to core... don't demangle C++ names, but do
1935    do Z-demangling and below-main-renaming, match anywhere in function, and
1936    don't show offsets.
1937    NOTE: See IMPORTANT COMMENT above about persistence and ownership
1938    in pub_tool_debuginfo.h */
VG_(get_fnname_no_cxx_demangle)1939 Bool VG_(get_fnname_no_cxx_demangle) ( Addr a, const HChar** buf,
1940                                        const InlIPCursor* iipc )
1941 {
1942    if (is_bottom(iipc)) {
1943       // At the bottom (towards main), we describe the fn at eip.
1944       return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/True,
1945                             /*below-main-renaming*/True,
1946                             a, buf,
1947                             /*match_anywhere_in_fun*/True,
1948                             /*show offset?*/False,
1949                             /*text sym*/True,
1950                             /*offsetP*/NULL );
1951    } else {
1952       const DiInlLoc *next_inl = iipc && iipc->next_inltab >= 0
1953          ? & iipc->di->inltab[iipc->next_inltab]
1954          : NULL;
1955       vg_assert (next_inl);
1956       // The function we are in is called by next_inl.
1957       *buf = next_inl->inlinedfn;
1958       return True;
1959    }
1960 }
1961 
1962 /* mips-linux only: find the offset of current address. This is needed for
1963    stack unwinding for MIPS.
1964 */
VG_(get_inst_offset_in_function)1965 Bool VG_(get_inst_offset_in_function)( Addr a,
1966                                        /*OUT*/PtrdiffT* offset )
1967 {
1968    const HChar *fnname;
1969    return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/False,
1970                          /*below-main-renaming*/False,
1971                          a, &fnname,
1972                          /*match_anywhere_in_sym*/True,
1973                          /*show offset?*/False,
1974                          /*text sym*/True,
1975                          offset );
1976 }
1977 
VG_(get_fnname_kind)1978 Vg_FnNameKind VG_(get_fnname_kind) ( const HChar* name )
1979 {
1980    if (VG_STREQ("main", name)) {
1981       return Vg_FnNameMain;
1982 
1983    } else if (
1984 #      if defined(VGO_linux)
1985        VG_STREQ("__libc_start_main",  name) ||  // glibc glibness
1986        VG_STREQ("generic_start_main", name) ||  // Yellow Dog doggedness
1987 #      elif defined(VGO_darwin)
1988        // See readmacho.c for an explanation of this.
1989        VG_STREQ("start_according_to_valgrind", name) ||  // Darwin, darling
1990 #      elif defined(VGO_solaris)
1991        VG_STREQ("_start", name) || // main() is called directly from _start
1992 #      else
1993 #        error "Unknown OS"
1994 #      endif
1995        0) {
1996       return Vg_FnNameBelowMain;
1997 
1998    } else {
1999       return Vg_FnNameNormal;
2000    }
2001 }
2002 
VG_(get_fnname_kind_from_IP)2003 Vg_FnNameKind VG_(get_fnname_kind_from_IP) ( Addr ip )
2004 {
2005    const HChar *buf;
2006 
2007    // We don't demangle, because it's faster not to, and the special names
2008    // we're looking for won't be mangled.
2009    if (VG_(get_fnname_raw) ( ip, &buf )) {
2010 
2011       return VG_(get_fnname_kind)(buf);
2012    } else {
2013       return Vg_FnNameNormal;    // Don't know the name, treat it as normal.
2014    }
2015 }
2016 
2017 /* Looks up data_addr in the collection of data symbols, and if found
2018    puts a pointer to its name into dname. The name is zero terminated.
2019    Also data_addr's offset from the symbol start is put into *offset.
2020    NOTE: See IMPORTANT COMMENT above about persistence and ownership
2021    in pub_tool_debuginfo.h  */
VG_(get_datasym_and_offset)2022 Bool VG_(get_datasym_and_offset)( Addr data_addr,
2023                                   /*OUT*/const HChar** dname,
2024                                   /*OUT*/PtrdiffT* offset )
2025 {
2026    return get_sym_name ( /*C++-demangle*/False, /*Z-demangle*/False,
2027                        /*below-main-renaming*/False,
2028                        data_addr, dname,
2029                        /*match_anywhere_in_sym*/True,
2030                        /*show offset?*/False,
2031                        /*text sym*/False,
2032                        offset );
2033 }
2034 
2035 /* Map a code address to the name of a shared object file or the
2036    executable.  Returns False if no idea; otherwise True.
2037    Note: the string returned in *BUF is persistent as long as
2038    (1) the DebugInfo it belongs to is not discarded
2039    (2) the segment containing the address is not merged with another segment
2040 */
VG_(get_objname)2041 Bool VG_(get_objname) ( Addr a, const HChar** objname )
2042 {
2043    DebugInfo* di;
2044    const NSegment *seg;
2045    const HChar* filename;
2046 
2047    /* Look in the debugInfo_list to find the name.  In most cases we
2048       expect this to produce a result. */
2049    for (di = debugInfo_list; di != NULL; di = di->next) {
2050       if (di->text_present
2051           && di->text_size > 0
2052           && di->text_avma <= a
2053           && a < di->text_avma + di->text_size) {
2054          *objname = di->fsm.filename;
2055          return True;
2056       }
2057    }
2058    /* Last-ditch fallback position: if we don't find the address in
2059       the debugInfo_list, ask the address space manager whether it
2060       knows the name of the file associated with this mapping.  This
2061       allows us to print the names of exe/dll files in the stack trace
2062       when running programs under wine. */
2063    if ( (seg = VG_(am_find_nsegment)(a)) != NULL
2064         && (filename = VG_(am_get_filename)(seg)) != NULL ) {
2065       *objname = filename;
2066       return True;
2067    }
2068    return False;
2069 }
2070 
2071 /* Map a code address to its DebugInfo.  Returns NULL if not found.  Doesn't
2072    require debug info. */
VG_(find_DebugInfo)2073 DebugInfo* VG_(find_DebugInfo) ( Addr a )
2074 {
2075    static UWord n_search = 0;
2076    DebugInfo* di;
2077    n_search++;
2078    for (di = debugInfo_list; di != NULL; di = di->next) {
2079       if (di->text_present
2080           && di->text_size > 0
2081           && di->text_avma <= a
2082           && a < di->text_avma + di->text_size) {
2083          if (0 == (n_search & 0xF))
2084             move_DebugInfo_one_step_forward( di );
2085          return di;
2086       }
2087    }
2088    return NULL;
2089 }
2090 
2091 /* Map a code address to a filename.  Returns True if successful. The
2092    returned string is persistent as long as the DebugInfo to which it
2093    belongs is not discarded. */
VG_(get_filename)2094 Bool VG_(get_filename)( Addr a, const HChar** filename )
2095 {
2096    DebugInfo* si;
2097    Word       locno;
2098    UInt       fndn_ix;
2099 
2100    search_all_loctabs ( a, &si, &locno );
2101    if (si == NULL)
2102       return False;
2103    fndn_ix = ML_(fndn_ix) (si, locno);
2104    *filename = ML_(fndn_ix2filename) (si, fndn_ix);
2105    return True;
2106 }
2107 
2108 /* Map a code address to a line number.  Returns True if successful. */
VG_(get_linenum)2109 Bool VG_(get_linenum)( Addr a, UInt* lineno )
2110 {
2111    DebugInfo* si;
2112    Word       locno;
2113    search_all_loctabs ( a, &si, &locno );
2114    if (si == NULL)
2115       return False;
2116    *lineno = si->loctab[locno].lineno;
2117 
2118    return True;
2119 }
2120 
2121 /* Map a code address to a filename/line number/dir name info.
2122    See prototype for detailed description of behaviour.
2123 */
VG_(get_filename_linenum)2124 Bool VG_(get_filename_linenum) ( Addr a,
2125                                  /*OUT*/const HChar** filename,
2126                                  /*OUT*/const HChar** dirname,
2127                                  /*OUT*/UInt* lineno )
2128 {
2129    DebugInfo* si;
2130    Word       locno;
2131    UInt       fndn_ix;
2132 
2133    search_all_loctabs ( a, &si, &locno );
2134    if (si == NULL) {
2135       if (dirname) {
2136          *dirname = "";
2137       }
2138       *filename = "";      // this used to be not initialised....
2139       return False;
2140    }
2141 
2142    fndn_ix = ML_(fndn_ix)(si, locno);
2143    *filename = ML_(fndn_ix2filename) (si, fndn_ix);
2144    *lineno = si->loctab[locno].lineno;
2145 
2146    if (dirname) {
2147       /* caller wants directory info too .. */
2148       *dirname = ML_(fndn_ix2dirname) (si, fndn_ix);
2149    }
2150 
2151    return True;
2152 }
2153 
2154 
2155 /* Map a function name to its entry point and toc pointer.  Is done by
2156    sequential search of all symbol tables, so is very slow.  To
2157    mitigate the worst performance effects, you may specify a soname
2158    pattern, and only objects matching that pattern are searched.
2159    Therefore specify "*" to search all the objects.  On TOC-afflicted
2160    platforms, a symbol is deemed to be found only if it has a nonzero
2161    TOC pointer.  */
VG_(lookup_symbol_SLOW)2162 Bool VG_(lookup_symbol_SLOW)(const HChar* sopatt, const HChar* name,
2163                              SymAVMAs* avmas)
2164 {
2165    Bool     require_pToc = False;
2166    Int      i;
2167    const DebugInfo* si;
2168    Bool     debug = False;
2169 #  if defined(VG_PLAT_USES_PPCTOC)
2170    require_pToc = True;
2171 #  endif
2172    for (si = debugInfo_list; si; si = si->next) {
2173       if (debug)
2174          VG_(printf)("lookup_symbol_SLOW: considering %s\n", si->soname);
2175       if (!VG_(string_match)(sopatt, si->soname)) {
2176          if (debug)
2177             VG_(printf)(" ... skip\n");
2178          continue;
2179       }
2180       for (i = 0; i < si->symtab_used; i++) {
2181          const HChar* pri_name = si->symtab[i].pri_name;
2182          vg_assert(pri_name);
2183          if (0==VG_(strcmp)(name, pri_name)
2184              && (require_pToc ? GET_TOCPTR_AVMA(si->symtab[i].avmas) : True)) {
2185             *avmas = si->symtab[i].avmas;
2186             return True;
2187          }
2188          const HChar** sec_names = si->symtab[i].sec_names;
2189          if (sec_names) {
2190             vg_assert(sec_names[0]);
2191             while (*sec_names) {
2192                if (0==VG_(strcmp)(name, *sec_names)
2193                    && (require_pToc
2194                        ? GET_TOCPTR_AVMA(si->symtab[i].avmas) : True)) {
2195                   *avmas = si->symtab[i].avmas;
2196                   return True;
2197                }
2198                sec_names++;
2199             }
2200          }
2201       }
2202    }
2203    return False;
2204 }
2205 
2206 
2207 /* VG_(describe_IP): return info on code address, function name and
2208    filename. The returned string is allocated in a static buffer and will
2209    be overwritten in the next invocation. */
2210 
2211 /* Copy str into *buf starting at n, ensuring that buf is zero-terminated.
2212    Return the index of the terminating null character. */
2213 static SizeT
putStr(SizeT n,HChar ** buf,SizeT * bufsiz,const HChar * str)2214 putStr( SizeT n, HChar** buf, SizeT *bufsiz, const HChar* str )
2215 {
2216    SizeT slen = VG_(strlen)(str);
2217    SizeT need = n + slen + 1;
2218 
2219    if (need > *bufsiz) {
2220       if (need < 256) need = 256;
2221       *bufsiz = need;
2222       *buf = ML_(dinfo_realloc)("putStr", *buf, *bufsiz);
2223    }
2224 
2225    VG_(strcpy)(*buf + n, str);
2226 
2227    return n + slen;
2228 }
2229 
2230 /* Same as putStr, but escaping chars for XML output. */
2231 static SizeT
putStrEsc(SizeT n,HChar ** buf,SizeT * bufsiz,const HChar * str)2232 putStrEsc( SizeT n, HChar** buf, SizeT *bufsiz, const HChar* str )
2233 {
2234    HChar alt[2];
2235 
2236    for (; *str != 0; str++) {
2237       switch (*str) {
2238          case '&':
2239             n = putStr( n, buf, bufsiz, "&amp;");
2240             break;
2241          case '<':
2242             n = putStr( n, buf, bufsiz, "&lt;");
2243             break;
2244          case '>':
2245             n = putStr( n, buf, bufsiz, "&gt;");
2246             break;
2247          default:
2248             alt[0] = *str;
2249             alt[1] = 0;
2250             n = putStr( n, buf, bufsiz, alt );
2251             break;
2252       }
2253    }
2254    return n;
2255 }
2256 
VG_(describe_IP)2257 const HChar* VG_(describe_IP)(Addr eip, const InlIPCursor *iipc)
2258 {
2259    static HChar *buf = NULL;
2260    static SizeT bufsiz = 0;
2261 #  define APPEND(_str) \
2262       n = putStr(n, &buf, &bufsiz, _str)
2263 #  define APPEND_ESC(_str) \
2264       n = putStrEsc(n, &buf, &bufsiz, _str)
2265 
2266    UInt  lineno;
2267    HChar ibuf[50];   // large enough
2268    SizeT n = 0;
2269 
2270    vg_assert (!iipc || iipc->eip == eip);
2271 
2272    const HChar *buf_fn;
2273    const HChar *buf_obj;
2274    const HChar *buf_srcloc;
2275    const HChar *buf_dirname;
2276 
2277    Bool  know_dirinfo;
2278    Bool  know_fnname;
2279    Bool  know_objname;
2280    Bool  know_srcloc;
2281 
2282    if (is_bottom(iipc)) {
2283       // At the bottom (towards main), we describe the fn at eip.
2284       know_fnname = VG_(clo_sym_offsets)
2285                     ? VG_(get_fnname_w_offset) (eip, &buf_fn)
2286                     : VG_(get_fnname) (eip, &buf_fn);
2287    } else {
2288       const DiInlLoc *next_inl = iipc && iipc->next_inltab >= 0
2289          ? & iipc->di->inltab[iipc->next_inltab]
2290          : NULL;
2291       vg_assert (next_inl);
2292       // The function we are in is called by next_inl.
2293       buf_fn = next_inl->inlinedfn;
2294       know_fnname = True;
2295 
2296       // INLINED????
2297       // ??? Can we compute an offset for an inlined fn call ?
2298       // ??? Offset from what ? The beginning of the inl info ?
2299       // ??? But that is not necessarily the beginning of the fn
2300       // ??? as e.g. an inlined fn call can be in several ranges.
2301       // ??? Currently never showing an offset.
2302    }
2303 
2304    know_objname = VG_(get_objname)(eip, &buf_obj);
2305 
2306    if (is_top(iipc)) {
2307       // The source for the highest level is in the loctab entry.
2308       know_srcloc  = VG_(get_filename_linenum)(
2309                         eip,
2310                         &buf_srcloc,
2311                         &buf_dirname,
2312                         &lineno
2313                      );
2314       know_dirinfo = buf_dirname[0] != '\0';
2315    } else {
2316       const DiInlLoc *cur_inl = iipc && iipc->cur_inltab >= 0
2317          ? & iipc->di->inltab[iipc->cur_inltab]
2318          : NULL;
2319       vg_assert (cur_inl);
2320 
2321       know_dirinfo = False;
2322       buf_dirname  = "";
2323       // The fndn_ix and lineno for the caller of the inlined fn is in cur_inl.
2324       if (cur_inl->fndn_ix == 0) {
2325          buf_srcloc = "???";
2326       } else {
2327          FnDn *fndn = VG_(indexEltNumber) (iipc->di->fndnpool,
2328                                            cur_inl->fndn_ix);
2329          if (fndn->dirname) {
2330             buf_dirname = fndn->dirname;
2331             know_dirinfo = True;
2332          }
2333          buf_srcloc = fndn->filename;
2334       }
2335       lineno = cur_inl->lineno;
2336       know_srcloc = True;
2337    }
2338 
2339    if (VG_(clo_xml)) {
2340 
2341       Bool   human_readable = True;
2342       const HChar* maybe_newline  = human_readable ? "\n      " : "";
2343       const HChar* maybe_newline2 = human_readable ? "\n    "   : "";
2344 
2345       /* Print in XML format, dumping in as much info as we know.
2346          Ensure all tags are balanced. */
2347       APPEND("<frame>");
2348       VG_(sprintf)(ibuf,"<ip>0x%lX</ip>", eip);
2349       APPEND(maybe_newline);
2350       APPEND(ibuf);
2351       if (know_objname) {
2352          APPEND(maybe_newline);
2353          APPEND("<obj>");
2354          APPEND_ESC(buf_obj);
2355          APPEND("</obj>");
2356       }
2357       if (know_fnname) {
2358          APPEND(maybe_newline);
2359          APPEND("<fn>");
2360          APPEND_ESC(buf_fn);
2361          APPEND("</fn>");
2362       }
2363       if (know_srcloc) {
2364          if (know_dirinfo) {
2365             APPEND(maybe_newline);
2366             APPEND("<dir>");
2367             APPEND_ESC(buf_dirname);
2368             APPEND("</dir>");
2369          }
2370          APPEND(maybe_newline);
2371          APPEND("<file>");
2372          APPEND_ESC(buf_srcloc);
2373          APPEND("</file>");
2374          APPEND(maybe_newline);
2375          APPEND("<line>");
2376          VG_(sprintf)(ibuf,"%u",lineno);
2377          APPEND(ibuf);
2378          APPEND("</line>");
2379       }
2380       APPEND(maybe_newline2);
2381       APPEND("</frame>");
2382 
2383    } else {
2384 
2385       /* Print for humans to read */
2386       //
2387       // Possible forms:
2388       //
2389       //   0x80483BF: really (a.c:20)
2390       //   0x80483BF: really (in /foo/a.out)
2391       //   0x80483BF: really (in ???)
2392       //   0x80483BF: ??? (in /foo/a.out)
2393       //   0x80483BF: ??? (a.c:20)
2394       //   0x80483BF: ???
2395       //
2396       VG_(sprintf)(ibuf,"0x%lX: ", eip);
2397       APPEND(ibuf);
2398       if (know_fnname) {
2399          APPEND(buf_fn);
2400       } else {
2401          APPEND("???");
2402       }
2403       if (know_srcloc) {
2404          APPEND(" (");
2405          // Get the directory name, if any, possibly pruned, into dirname.
2406          const HChar* dirname = NULL;
2407          if (know_dirinfo && VG_(sizeXA)(VG_(clo_fullpath_after)) > 0) {
2408             Int i;
2409             dirname = buf_dirname;
2410             // Remove leading prefixes from the dirname.
2411             // If user supplied --fullpath-after=foo, this will remove
2412             // a leading string which matches '.*foo' (not greedy).
2413             for (i = 0; i < VG_(sizeXA)(VG_(clo_fullpath_after)); i++) {
2414                const HChar* prefix =
2415                   *(HChar**) VG_(indexXA)( VG_(clo_fullpath_after), i );
2416                HChar* str    = VG_(strstr)(dirname, prefix);
2417                if (str) {
2418                   dirname = str + VG_(strlen)(prefix);
2419                   break;
2420                }
2421             }
2422             /* remove leading "./" */
2423             if (dirname[0] == '.' && dirname[1] == '/')
2424                dirname += 2;
2425          }
2426          // do we have any interesting directory name to show?  If so
2427          // add it in.
2428          if (dirname && dirname[0] != 0) {
2429             APPEND(dirname);
2430             APPEND("/");
2431          }
2432          APPEND(buf_srcloc);
2433          APPEND(":");
2434          VG_(sprintf)(ibuf,"%u",lineno);
2435          APPEND(ibuf);
2436          APPEND(")");
2437       } else if (know_objname) {
2438          APPEND(" (in ");
2439          APPEND(buf_obj);
2440          APPEND(")");
2441       } else if (know_fnname) {
2442          // Nb: do this in two steps because "??)" is a trigraph!
2443          APPEND(" (in ???");
2444          APPEND(")");
2445       }
2446 
2447    }
2448    return buf;
2449 
2450 #  undef APPEND
2451 #  undef APPEND_ESC
2452 }
2453 
2454 
2455 /*--------------------------------------------------------------*/
2456 /*---                                                        ---*/
2457 /*--- TOP LEVEL: FOR UNWINDING THE STACK USING               ---*/
2458 /*---            DWARF3 .eh_frame INFO                       ---*/
2459 /*---                                                        ---*/
2460 /*--------------------------------------------------------------*/
2461 
2462 /* Gather up all the constant pieces of info needed to evaluate
2463    a CfiExpr into one convenient struct. */
2464 typedef
2465    struct {
2466       const D3UnwindRegs* uregs;
2467       Addr          min_accessible;
2468       Addr          max_accessible;
2469    }
2470    CfiExprEvalContext;
2471 
2472 /* Evaluate the CfiExpr rooted at ix in exprs given the context eec.
2473    *ok is set to False on failure, but not to True on success.  The
2474    caller must set it to True before calling. */
2475 __attribute__((noinline))
2476 static
evalCfiExpr(const XArray * exprs,Int ix,const CfiExprEvalContext * eec,Bool * ok)2477 UWord evalCfiExpr ( const XArray* exprs, Int ix,
2478                     const CfiExprEvalContext* eec, Bool* ok )
2479 {
2480    UWord w, wL, wR;
2481    Addr  a;
2482    const CfiExpr* e;
2483    vg_assert(sizeof(Addr) == sizeof(UWord));
2484    e = VG_(indexXA)( exprs, ix );
2485    switch (e->tag) {
2486       case Cex_Unop:
2487          w = evalCfiExpr( exprs, e->Cex.Unop.ix, eec, ok );
2488          if (!(*ok)) return 0;
2489          switch (e->Cex.Unop.op) {
2490             case Cunop_Abs: return (Word) w < 0 ? - w : w;
2491             case Cunop_Neg: return - (Word) w;
2492             case Cunop_Not: return ~ w;
2493             default: goto unhandled;
2494          }
2495          /*NOTREACHED*/
2496       case Cex_Binop:
2497          wL = evalCfiExpr( exprs, e->Cex.Binop.ixL, eec, ok );
2498          if (!(*ok)) return 0;
2499          wR = evalCfiExpr( exprs, e->Cex.Binop.ixR, eec, ok );
2500          if (!(*ok)) return 0;
2501          switch (e->Cex.Binop.op) {
2502             case Cbinop_Add: return wL + wR;
2503             case Cbinop_Sub: return wL - wR;
2504             case Cbinop_And: return wL & wR;
2505             case Cbinop_Mul: return wL * wR;
2506             case Cbinop_Shl: return wL << wR;
2507             case Cbinop_Shr: return wL >> wR;
2508             case Cbinop_Eq: return wL == wR ? 1 : 0;
2509             case Cbinop_Ge: return (Word) wL >= (Word) wR ? 1 : 0;
2510             case Cbinop_Gt: return (Word) wL > (Word) wR ? 1 : 0;
2511             case Cbinop_Le: return (Word) wL <= (Word) wR ? 1 : 0;
2512             case Cbinop_Lt: return (Word) wL < (Word) wR ? 1 : 0;
2513             case Cbinop_Ne: return wL != wR ? 1 : 0;
2514             default: goto unhandled;
2515          }
2516          /*NOTREACHED*/
2517       case Cex_CfiReg:
2518          switch (e->Cex.CfiReg.reg) {
2519 #           if defined(VGA_x86) || defined(VGA_amd64)
2520             case Creg_IA_IP: return eec->uregs->xip;
2521             case Creg_IA_SP: return eec->uregs->xsp;
2522             case Creg_IA_BP: return eec->uregs->xbp;
2523 #           elif defined(VGA_arm)
2524             case Creg_ARM_R15: return eec->uregs->r15;
2525             case Creg_ARM_R14: return eec->uregs->r14;
2526             case Creg_ARM_R13: return eec->uregs->r13;
2527             case Creg_ARM_R12: return eec->uregs->r12;
2528             case Creg_ARM_R7:  return eec->uregs->r7;
2529 #           elif defined(VGA_s390x)
2530             case Creg_S390_IA: return eec->uregs->ia;
2531             case Creg_S390_SP: return eec->uregs->sp;
2532             case Creg_S390_FP: return eec->uregs->fp;
2533             case Creg_S390_LR: return eec->uregs->lr;
2534 #           elif defined(VGA_mips32) || defined(VGA_mips64)
2535             case Creg_IA_IP: return eec->uregs->pc;
2536             case Creg_IA_SP: return eec->uregs->sp;
2537             case Creg_IA_BP: return eec->uregs->fp;
2538             case Creg_MIPS_RA: return eec->uregs->ra;
2539 #           elif defined(VGA_ppc32) || defined(VGA_ppc64be) \
2540                || defined(VGA_ppc64le)
2541 #           elif defined(VGP_arm64_linux)
2542             case Creg_ARM64_X30: return eec->uregs->x30;
2543 #           else
2544 #             error "Unsupported arch"
2545 #           endif
2546             default: goto unhandled;
2547          }
2548          /*NOTREACHED*/
2549       case Cex_Const:
2550          return e->Cex.Const.con;
2551       case Cex_Deref:
2552          a = evalCfiExpr( exprs, e->Cex.Deref.ixAddr, eec, ok );
2553          if (!(*ok)) return 0;
2554          if (a < eec->min_accessible
2555              || a > eec->max_accessible - sizeof(UWord) + 1) {
2556             *ok = False;
2557             return 0;
2558          }
2559          /* let's hope it doesn't trap! */
2560          return ML_(read_UWord)((void *)a);
2561       default:
2562          goto unhandled;
2563    }
2564    /*NOTREACHED*/
2565   unhandled:
2566    VG_(printf)("\n\nevalCfiExpr: unhandled\n");
2567    ML_(ppCfiExpr)( exprs, ix );
2568    VG_(printf)("\n");
2569    vg_assert(0);
2570    /*NOTREACHED*/
2571    return 0;
2572 }
2573 
2574 
2575 /* Search all the DebugInfos in the entire system, to find the DiCfSI_m
2576    that pertains to 'ip'.
2577 
2578    If found, set *diP to the DebugInfo in which it resides, and
2579    *cfsi_mP to the cfsi_m pointer in that DebugInfo's cfsi_m_pool.
2580 
2581    If not found, set *diP to (DebugInfo*)1 and *cfsi_mP to zero.
2582 */
2583 __attribute__((noinline))
find_DiCfSI(DebugInfo ** diP,DiCfSI_m ** cfsi_mP,Addr ip)2584 static void find_DiCfSI ( /*OUT*/DebugInfo** diP,
2585                           /*OUT*/DiCfSI_m** cfsi_mP,
2586                           Addr ip )
2587 {
2588    DebugInfo* di;
2589    Word       i = -1;
2590 
2591    static UWord n_search = 0;
2592    static UWord n_steps = 0;
2593    n_search++;
2594 
2595    if (0) VG_(printf)("search for %#lx\n", ip);
2596 
2597    for (di = debugInfo_list; di != NULL; di = di->next) {
2598       Word j;
2599       n_steps++;
2600 
2601       /* Use the per-DebugInfo summary address ranges to skip
2602          inapplicable DebugInfos quickly. */
2603       if (di->cfsi_used == 0)
2604          continue;
2605       if (ip < di->cfsi_minavma || ip > di->cfsi_maxavma)
2606          continue;
2607 
2608       /* It might be in this DebugInfo.  Search it. */
2609       j = ML_(search_one_cfitab)( di, ip );
2610       vg_assert(j >= -1 && j < (Word)di->cfsi_used);
2611 
2612       if (j != -1) {
2613          i = j;
2614          break; /* found it */
2615       }
2616    }
2617 
2618    if (i == -1) {
2619 
2620       /* we didn't find it. */
2621       *diP = (DebugInfo*)1;
2622       *cfsi_mP = 0;
2623 
2624    } else {
2625 
2626       /* found a di corresponding to ip. */
2627       /* ensure that di is 4-aligned (at least), so it can't possibly
2628          be equal to (DebugInfo*)1. */
2629       vg_assert(di && VG_IS_4_ALIGNED(di));
2630       *cfsi_mP = ML_(get_cfsi_m) (di, i);
2631       if (*cfsi_mP == NULL) {
2632          // This is a cfsi hole. Report no cfi information found.
2633          *diP = (DebugInfo*)1;
2634          // But we will still perform the hack below.
2635       } else {
2636          *diP = di;
2637       }
2638 
2639       /* Start of performance-enhancing hack: once every 64 (chosen
2640          hackily after profiling) successful searches, move the found
2641          DebugInfo one step closer to the start of the list.  This
2642          makes future searches cheaper.  For starting konqueror on
2643          amd64, this in fact reduces the total amount of searching
2644          done by the above find-the-right-DebugInfo loop by more than
2645          a factor of 20. */
2646       if ((n_search & 0xF) == 0) {
2647          /* Move di one step closer to the start of the list. */
2648          move_DebugInfo_one_step_forward( di );
2649       }
2650       /* End of performance-enhancing hack. */
2651 
2652       if (0 && ((n_search & 0x7FFFF) == 0))
2653          VG_(printf)("find_DiCfSI: %lu searches, "
2654                      "%lu DebugInfos looked at\n",
2655                      n_search, n_steps);
2656 
2657    }
2658 
2659 }
2660 
2661 
2662 /* Now follows a mechanism for caching queries to find_DiCfSI, since
2663    they are extremely frequent on amd64-linux, during stack unwinding.
2664 
2665    Each cache entry binds an ip value to a (di, cfsi_m*) pair.  Possible
2666    values:
2667 
2668    di is non-null, cfsi_m* >= 0  ==>  cache slot in use, "cfsi_m*"
2669    di is (DebugInfo*)1           ==>  cache slot in use, no associated di
2670    di is NULL                    ==>  cache slot not in use
2671 
2672    Hence simply zeroing out the entire cache invalidates all
2673    entries.
2674 
2675    We can map an ip value directly to a (di, cfsi_m*) pair as
2676    once a DebugInfo is read, adding new DiCfSI_m* is not possible
2677    anymore, as the cfsi_m_pool is frozen once the reading is terminated.
2678    Also, the cache is invalidated when new debuginfo is read due to
2679    an mmap or some debuginfo is discarded due to an munmap. */
2680 
2681 // Prime number, giving about 6Kbytes cache on 32 bits,
2682 //                           12Kbytes cache on 64 bits.
2683 #define N_CFSI_M_CACHE 509
2684 
2685 typedef
2686    struct { Addr ip; DebugInfo* di; DiCfSI_m* cfsi_m; }
2687    CFSI_m_CacheEnt;
2688 
2689 static CFSI_m_CacheEnt cfsi_m_cache[N_CFSI_M_CACHE];
2690 
cfsi_m_cache__invalidate(void)2691 static void cfsi_m_cache__invalidate ( void ) {
2692    VG_(memset)(&cfsi_m_cache, 0, sizeof(cfsi_m_cache));
2693 }
2694 
cfsi_m_cache__find(Addr ip)2695 static inline CFSI_m_CacheEnt* cfsi_m_cache__find ( Addr ip )
2696 {
2697    UWord         hash = ip % N_CFSI_M_CACHE;
2698    CFSI_m_CacheEnt* ce = &cfsi_m_cache[hash];
2699 #  ifdef N_Q_M_STATS
2700    static UWord  n_q = 0, n_m = 0;
2701    n_q++;
2702    if (0 == (n_q & 0x1FFFFF))
2703       VG_(printf)("QQQ %lu %lu\n", n_q, n_m);
2704 #  endif
2705 
2706    if (LIKELY(ce->ip == ip) && LIKELY(ce->di != NULL)) {
2707       /* found an entry in the cache .. */
2708    } else {
2709       /* not found in cache.  Search and update. */
2710 #     ifdef N_Q_M_STATS
2711       n_m++;
2712 #     endif
2713       ce->ip = ip;
2714       find_DiCfSI( &ce->di, &ce->cfsi_m, ip );
2715    }
2716 
2717    if (UNLIKELY(ce->di == (DebugInfo*)1)) {
2718       /* no DiCfSI for this address */
2719       return NULL;
2720    } else {
2721       /* found a DiCfSI for this address */
2722       return ce;
2723    }
2724 }
2725 
2726 
2727 inline
compute_cfa(const D3UnwindRegs * uregs,Addr min_accessible,Addr max_accessible,const DebugInfo * di,const DiCfSI_m * cfsi_m)2728 static Addr compute_cfa ( const D3UnwindRegs* uregs,
2729                           Addr min_accessible, Addr max_accessible,
2730                           const DebugInfo* di, const DiCfSI_m* cfsi_m )
2731 {
2732    CfiExprEvalContext eec;
2733    Addr               cfa;
2734    Bool               ok;
2735 
2736    /* Compute the CFA. */
2737    cfa = 0;
2738    switch (cfsi_m->cfa_how) {
2739 #     if defined(VGA_x86) || defined(VGA_amd64)
2740       case CFIC_IA_SPREL:
2741          cfa = cfsi_m->cfa_off + uregs->xsp;
2742          break;
2743       case CFIC_IA_BPREL:
2744          cfa = cfsi_m->cfa_off + uregs->xbp;
2745          break;
2746 #     elif defined(VGA_arm)
2747       case CFIC_ARM_R13REL:
2748          cfa = cfsi_m->cfa_off + uregs->r13;
2749          break;
2750       case CFIC_ARM_R12REL:
2751          cfa = cfsi_m->cfa_off + uregs->r12;
2752          break;
2753       case CFIC_ARM_R11REL:
2754          cfa = cfsi_m->cfa_off + uregs->r11;
2755          break;
2756       case CFIC_ARM_R7REL:
2757          cfa = cfsi_m->cfa_off + uregs->r7;
2758          break;
2759 #     elif defined(VGA_s390x)
2760       case CFIC_IA_SPREL:
2761          cfa = cfsi_m->cfa_off + uregs->sp;
2762          break;
2763       case CFIR_MEMCFAREL:
2764       {
2765          Addr a = uregs->sp + cfsi_m->cfa_off;
2766          if (a < min_accessible || a > max_accessible-sizeof(Addr))
2767             break;
2768          cfa = ML_(read_Addr)((void *)a);
2769          break;
2770       }
2771       case CFIR_SAME:
2772          cfa = uregs->fp;
2773          break;
2774       case CFIC_IA_BPREL:
2775          cfa = cfsi_m->cfa_off + uregs->fp;
2776          break;
2777 #     elif defined(VGA_mips32) || defined(VGA_mips64)
2778       case CFIC_IA_SPREL:
2779          cfa = cfsi_m->cfa_off + uregs->sp;
2780          break;
2781       case CFIR_SAME:
2782          cfa = uregs->fp;
2783          break;
2784       case CFIC_IA_BPREL:
2785          cfa = cfsi_m->cfa_off + uregs->fp;
2786          break;
2787 #     elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
2788 #     elif defined(VGP_arm64_linux)
2789       case CFIC_ARM64_SPREL:
2790          cfa = cfsi_m->cfa_off + uregs->sp;
2791          break;
2792       case CFIC_ARM64_X29REL:
2793          cfa = cfsi_m->cfa_off + uregs->x29;
2794          break;
2795 #     else
2796 #       error "Unsupported arch"
2797 #     endif
2798       case CFIC_EXPR: /* available on all archs */
2799          if (0) {
2800             VG_(printf)("CFIC_EXPR: ");
2801             ML_(ppCfiExpr)(di->cfsi_exprs, cfsi_m->cfa_off);
2802             VG_(printf)("\n");
2803          }
2804          eec.uregs          = uregs;
2805          eec.min_accessible = min_accessible;
2806          eec.max_accessible = max_accessible;
2807          ok = True;
2808          cfa = evalCfiExpr(di->cfsi_exprs, cfsi_m->cfa_off, &eec, &ok );
2809          if (!ok) return 0;
2810          break;
2811       default:
2812          vg_assert(0);
2813    }
2814    return cfa;
2815 }
2816 
2817 
2818 /* Get the call frame address (CFA) given an IP/SP/FP triple. */
2819 /* NOTE: This function may rearrange the order of entries in the
2820    DebugInfo list. */
ML_(get_CFA)2821 Addr ML_(get_CFA) ( Addr ip, Addr sp, Addr fp,
2822                     Addr min_accessible, Addr max_accessible )
2823 {
2824    CFSI_m_CacheEnt* ce;
2825 
2826    ce = cfsi_m_cache__find(ip);
2827 
2828    if (UNLIKELY(ce == NULL))
2829       return 0; /* no info.  Nothing we can do. */
2830 
2831    /* Temporary impedance-matching kludge so that this keeps working
2832       on x86-linux and amd64-linux. */
2833 #  if defined(VGA_x86) || defined(VGA_amd64)
2834    { D3UnwindRegs uregs;
2835      uregs.xip = ip;
2836      uregs.xsp = sp;
2837      uregs.xbp = fp;
2838      return compute_cfa(&uregs,
2839                         min_accessible,  max_accessible, ce->di, ce->cfsi_m);
2840    }
2841 #elif defined(VGA_s390x)
2842    { D3UnwindRegs uregs;
2843      uregs.ia = ip;
2844      uregs.sp = sp;
2845      uregs.fp = fp;
2846      return compute_cfa(&uregs,
2847                         min_accessible,  max_accessible, ce->di, ce->cfsi_m);
2848    }
2849 #elif defined(VGA_mips32) || defined(VGA_mips64)
2850    { D3UnwindRegs uregs;
2851      uregs.pc = ip;
2852      uregs.sp = sp;
2853      uregs.fp = fp;
2854      return compute_cfa(&uregs,
2855                         min_accessible,  max_accessible, ce->di, ce->cfsi_m);
2856    }
2857 
2858 #  else
2859    return 0; /* indicates failure */
2860 #  endif
2861 }
2862 
VG_(ppUnwindInfo)2863 void VG_(ppUnwindInfo) (Addr from, Addr to)
2864 {
2865    DebugInfo*         di;
2866    CFSI_m_CacheEnt*   ce;
2867    Addr ce_from;
2868    CFSI_m_CacheEnt*   next_ce;
2869 
2870 
2871    ce = cfsi_m_cache__find(from);
2872    ce_from = from;
2873    while (from <= to) {
2874       from++;
2875       next_ce = cfsi_m_cache__find(from);
2876       if ((ce == NULL && next_ce != NULL)
2877           || (ce != NULL && next_ce == NULL)
2878           || (ce != NULL && next_ce != NULL && ce->cfsi_m != next_ce->cfsi_m)
2879           || from > to) {
2880          if (ce == NULL) {
2881             VG_(printf)("[%#lx .. %#lx]: no CFI info\n", ce_from, from-1);
2882          } else {
2883             di = ce->di;
2884             ML_(ppDiCfSI)(di->cfsi_exprs,
2885                           ce_from, from - ce_from,
2886                           ce->cfsi_m);
2887          }
2888          ce = next_ce;
2889          ce_from = from;
2890       }
2891    }
2892 }
2893 
2894 
2895 /* The main function for DWARF2/3 CFI-based stack unwinding.  Given a
2896    set of registers in UREGS, modify it to hold the register values
2897    for the previous frame, if possible.  Returns True if successful.
2898    If not successful, *UREGS is not changed.
2899 
2900    For x86 and amd64, the unwound registers are: {E,R}IP,
2901    {E,R}SP, {E,R}BP.
2902 
2903    For arm, the unwound registers are: R7 R11 R12 R13 R14 R15.
2904 
2905    For arm64, the unwound registers are: X29(FP) X30(LR) SP PC.
2906 */
VG_(use_CF_info)2907 Bool VG_(use_CF_info) ( /*MOD*/D3UnwindRegs* uregsHere,
2908                         Addr min_accessible,
2909                         Addr max_accessible )
2910 {
2911    DebugInfo*         di;
2912    DiCfSI_m*          cfsi_m = NULL;
2913    Addr               cfa, ipHere = 0;
2914    CFSI_m_CacheEnt*   ce;
2915    CfiExprEvalContext eec __attribute__((unused));
2916    D3UnwindRegs       uregsPrev;
2917 
2918 #  if defined(VGA_x86) || defined(VGA_amd64)
2919    ipHere = uregsHere->xip;
2920 #  elif defined(VGA_arm)
2921    ipHere = uregsHere->r15;
2922 #  elif defined(VGA_s390x)
2923    ipHere = uregsHere->ia;
2924 #  elif defined(VGA_mips32) || defined(VGA_mips64)
2925    ipHere = uregsHere->pc;
2926 #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
2927 #  elif defined(VGP_arm64_linux)
2928    ipHere = uregsHere->pc;
2929 #  else
2930 #    error "Unknown arch"
2931 #  endif
2932    ce = cfsi_m_cache__find(ipHere);
2933 
2934    if (UNLIKELY(ce == NULL))
2935       return False; /* no info.  Nothing we can do. */
2936 
2937    di = ce->di;
2938    cfsi_m = ce->cfsi_m;
2939 
2940    if (0) {
2941       VG_(printf)("found cfsi_m (but printing fake base/len): ");
2942       ML_(ppDiCfSI)(di->cfsi_exprs, 0, 0, cfsi_m);
2943    }
2944 
2945    VG_(bzero_inline)(&uregsPrev, sizeof(uregsPrev));
2946 
2947    /* First compute the CFA. */
2948    cfa = compute_cfa(uregsHere,
2949                      min_accessible, max_accessible, di, cfsi_m);
2950    if (UNLIKELY(cfa == 0))
2951       return False;
2952 
2953    /* Now we know the CFA, use it to roll back the registers we're
2954       interested in. */
2955 
2956 #  define COMPUTE(_prev, _here, _how, _off)             \
2957       do {                                              \
2958          switch (_how) {                                \
2959             case CFIR_UNKNOWN:                          \
2960                return False;                            \
2961             case CFIR_SAME:                             \
2962                _prev = _here; break;                    \
2963             case CFIR_MEMCFAREL: {                      \
2964                Addr a = cfa + (Word)_off;               \
2965                if (a < min_accessible                   \
2966                    || a > max_accessible-sizeof(Addr))  \
2967                   return False;                         \
2968                _prev = ML_(read_Addr)((void *)a);       \
2969                break;                                   \
2970             }                                           \
2971             case CFIR_CFAREL:                           \
2972                _prev = cfa + (Word)_off;                \
2973                break;                                   \
2974             case CFIR_EXPR:                             \
2975                if (0)                                   \
2976                   ML_(ppCfiExpr)(di->cfsi_exprs,_off);  \
2977                eec.uregs = uregsHere;                   \
2978                eec.min_accessible = min_accessible;     \
2979                eec.max_accessible = max_accessible;     \
2980                Bool ok = True;                          \
2981                _prev = evalCfiExpr(di->cfsi_exprs, _off, &eec, &ok ); \
2982                if (!ok) return False;                   \
2983                break;                                   \
2984             default:                                    \
2985                vg_assert(0);                            \
2986          }                                              \
2987       } while (0)
2988 
2989 #  if defined(VGA_x86) || defined(VGA_amd64)
2990    COMPUTE(uregsPrev.xip, uregsHere->xip, cfsi_m->ra_how, cfsi_m->ra_off);
2991    COMPUTE(uregsPrev.xsp, uregsHere->xsp, cfsi_m->sp_how, cfsi_m->sp_off);
2992    COMPUTE(uregsPrev.xbp, uregsHere->xbp, cfsi_m->bp_how, cfsi_m->bp_off);
2993 #  elif defined(VGA_arm)
2994    COMPUTE(uregsPrev.r15, uregsHere->r15, cfsi_m->ra_how,  cfsi_m->ra_off);
2995    COMPUTE(uregsPrev.r14, uregsHere->r14, cfsi_m->r14_how, cfsi_m->r14_off);
2996    COMPUTE(uregsPrev.r13, uregsHere->r13, cfsi_m->r13_how, cfsi_m->r13_off);
2997    COMPUTE(uregsPrev.r12, uregsHere->r12, cfsi_m->r12_how, cfsi_m->r12_off);
2998    COMPUTE(uregsPrev.r11, uregsHere->r11, cfsi_m->r11_how, cfsi_m->r11_off);
2999    COMPUTE(uregsPrev.r7,  uregsHere->r7,  cfsi_m->r7_how,  cfsi_m->r7_off);
3000 #  elif defined(VGA_s390x)
3001    COMPUTE(uregsPrev.ia, uregsHere->ia, cfsi_m->ra_how, cfsi_m->ra_off);
3002    COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
3003    COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
3004 #  elif defined(VGA_mips32) || defined(VGA_mips64)
3005    COMPUTE(uregsPrev.pc, uregsHere->pc, cfsi_m->ra_how, cfsi_m->ra_off);
3006    COMPUTE(uregsPrev.sp, uregsHere->sp, cfsi_m->sp_how, cfsi_m->sp_off);
3007    COMPUTE(uregsPrev.fp, uregsHere->fp, cfsi_m->fp_how, cfsi_m->fp_off);
3008 #  elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
3009 #  elif defined(VGP_arm64_linux)
3010    COMPUTE(uregsPrev.pc,  uregsHere->pc,  cfsi_m->ra_how,  cfsi_m->ra_off);
3011    COMPUTE(uregsPrev.sp,  uregsHere->sp,  cfsi_m->sp_how,  cfsi_m->sp_off);
3012    COMPUTE(uregsPrev.x30, uregsHere->x30, cfsi_m->x30_how, cfsi_m->x30_off);
3013    COMPUTE(uregsPrev.x29, uregsHere->x29, cfsi_m->x29_how, cfsi_m->x29_off);
3014 #  else
3015 #    error "Unknown arch"
3016 #  endif
3017 
3018 #  undef COMPUTE
3019 
3020    *uregsHere = uregsPrev;
3021    return True;
3022 }
3023 
3024 
3025 /*--------------------------------------------------------------*/
3026 /*---                                                        ---*/
3027 /*--- TOP LEVEL: FOR UNWINDING THE STACK USING               ---*/
3028 /*---            MSVC FPO INFO                               ---*/
3029 /*---                                                        ---*/
3030 /*--------------------------------------------------------------*/
3031 
VG_(use_FPO_info)3032 Bool VG_(use_FPO_info) ( /*MOD*/Addr* ipP,
3033                          /*MOD*/Addr* spP,
3034                          /*MOD*/Addr* fpP,
3035                          Addr min_accessible,
3036                          Addr max_accessible )
3037 {
3038    Word       i;
3039    const DebugInfo* di;
3040    FPO_DATA*  fpo = NULL;
3041    Addr       spHere;
3042 
3043    static UWord n_search = 0;
3044    static UWord n_steps = 0;
3045    n_search++;
3046 
3047    if (0) VG_(printf)("search FPO for %#lx\n", *ipP);
3048 
3049    for (di = debugInfo_list; di != NULL; di = di->next) {
3050       n_steps++;
3051 
3052       /* Use the per-DebugInfo summary address ranges to skip
3053          inapplicable DebugInfos quickly. */
3054       if (di->fpo == NULL)
3055          continue;
3056       if (*ipP < di->fpo_minavma || *ipP > di->fpo_maxavma)
3057          continue;
3058 
3059       i = ML_(search_one_fpotab)( di, *ipP );
3060       if (i != -1) {
3061          Word j;
3062          if (0) {
3063             /* debug printing only */
3064             VG_(printf)("look for %#lx  size %lu i %ld\n",
3065                         *ipP, di->fpo_size, i);
3066             for (j = 0; j < di->fpo_size; j++)
3067                VG_(printf)("[%02ld] %#x %u\n",
3068                             j, di->fpo[j].ulOffStart, di->fpo[j].cbProcSize);
3069          }
3070          vg_assert(i >= 0 && i < di->fpo_size);
3071          fpo = &di->fpo[i];
3072          break;
3073       }
3074    }
3075 
3076    if (fpo == NULL)
3077       return False;
3078 
3079    if (0 && ((n_search & 0x7FFFF) == 0))
3080       VG_(printf)("VG_(use_FPO_info): %lu searches, "
3081                   "%lu DebugInfos looked at\n",
3082                   n_search, n_steps);
3083 
3084 
3085    /* Start of performance-enhancing hack: once every 64 (chosen
3086       hackily after profiling) successful searches, move the found
3087       DebugInfo one step closer to the start of the list.  This makes
3088       future searches cheaper.  For starting konqueror on amd64, this
3089       in fact reduces the total amount of searching done by the above
3090       find-the-right-DebugInfo loop by more than a factor of 20. */
3091    if ((n_search & 0x3F) == 0) {
3092       /* Move si one step closer to the start of the list. */
3093       //move_DebugInfo_one_step_forward( di );
3094    }
3095    /* End of performance-enhancing hack. */
3096 
3097    if (0) {
3098       VG_(printf)("found fpo: ");
3099       //ML_(ppFPO)(fpo);
3100    }
3101 
3102    /*
3103    Stack layout is:
3104    %esp->
3105       4*.cbRegs  {%edi, %esi, %ebp, %ebx}
3106       4*.cdwLocals
3107       return_pc
3108       4*.cdwParams
3109    prior_%esp->
3110 
3111    Typical code looks like:
3112       sub $4*.cdwLocals,%esp
3113          Alternative to above for >=4KB (and sometimes for smaller):
3114             mov $size,%eax
3115             call __chkstk  # WinNT performs page-by-page probe!
3116                __chkstk is much like alloc(), except that on return
3117                %eax= 5+ &CALL.  Thus it could be used as part of
3118                Position Independent Code to locate the Global Offset Table.
3119       push %ebx
3120       push %ebp
3121       push %esi
3122          Other once-only instructions often scheduled >here<.
3123       push %edi
3124 
3125    If the pc is within the first .cbProlog bytes of the function,
3126    then you must disassemble to see how many registers have been pushed,
3127    because instructions in the prolog may be scheduled for performance.
3128    The order of PUSH is always %ebx, %ebp, %esi, %edi, with trailing
3129    registers not pushed when .cbRegs < 4.  This seems somewhat strange
3130    because %ebp is the register whose usage you want to minimize,
3131    yet it is in the first half of the PUSH list.
3132 
3133    I don't know what happens when the compiler constructs an outgoing CALL.
3134    %esp could move if outgoing parameters are PUSHed, and this affects
3135    traceback for errors during the PUSHes. */
3136 
3137    spHere = *spP;
3138 
3139    *ipP = ML_(read_Addr)((void *)(spHere + 4*(fpo->cbRegs + fpo->cdwLocals)));
3140    *spP =                         spHere + 4*(fpo->cbRegs + fpo->cdwLocals + 1
3141                                                           + fpo->cdwParams);
3142    *fpP = ML_(read_Addr)((void *)(spHere + 4*2));
3143    return True;
3144 }
3145 
VG_(FPO_info_present)3146 Bool VG_(FPO_info_present)(void)
3147 {
3148    const DebugInfo* di;
3149    for (di = debugInfo_list; di != NULL; di = di->next) {
3150       if (di->fpo != NULL)
3151          return True;
3152    }
3153    return False;
3154 }
3155 
3156 
3157 /*--------------------------------------------------------------*/
3158 /*---                                                        ---*/
3159 /*--- TOP LEVEL: GENERATE DESCRIPTION OF DATA ADDRESSES      ---*/
3160 /*---            FROM DWARF3 DEBUG INFO                      ---*/
3161 /*---                                                        ---*/
3162 /*--------------------------------------------------------------*/
3163 
3164 /* Try to make p2XA(dst, fmt, args..) turn into
3165    VG_(xaprintf)(dst, fmt, args) without having to resort to
3166    vararg macros.  As usual with everything to do with varargs, it's
3167    an ugly hack.
3168 
3169    //#define p2XA(dstxa, format, args...)
3170    //   VG_(xaprintf)(dstxa, format, ##args)
3171 */
3172 #define  p2XA  VG_(xaprintf)
3173 
3174 /* Add a zero-terminating byte to DST, which must be an XArray* of
3175    HChar. */
zterm_XA(XArray * dst)3176 static void zterm_XA ( XArray* dst )
3177 {
3178    HChar zero = 0;
3179    (void) VG_(addBytesToXA)( dst, &zero, 1 );
3180 }
3181 
3182 
3183 /* Evaluate the location expression/list for var, to see whether or
3184    not data_addr falls within the variable.  If so also return the
3185    offset of data_addr from the start of the variable.  Note that
3186    regs, which supplies ip,sp,fp values, will be NULL for global
3187    variables, and non-NULL for local variables. */
data_address_is_in_var(PtrdiffT * offset,const XArray * tyents,const DiVariable * var,const RegSummary * regs,Addr data_addr,const DebugInfo * di)3188 static Bool data_address_is_in_var ( /*OUT*/PtrdiffT* offset,
3189                                      const XArray* /* TyEnt */ tyents,
3190                                      const DiVariable*   var,
3191                                      const RegSummary*   regs,
3192                                      Addr  data_addr,
3193                                      const DebugInfo* di )
3194 {
3195    MaybeULong mul;
3196    SizeT      var_szB;
3197    GXResult   res;
3198    Bool       show = False;
3199 
3200    vg_assert(var->name);
3201    vg_assert(var->gexpr);
3202 
3203    /* Figure out how big the variable is. */
3204    mul = ML_(sizeOfType)(tyents, var->typeR);
3205    /* If this var has a type whose size is unknown, zero, or
3206       impossibly large, it should never have been added.  ML_(addVar)
3207       should have rejected it. */
3208    vg_assert(mul.b == True);
3209    vg_assert(mul.ul > 0);
3210    if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32));
3211    /* After this point, we assume we can truncate mul.ul to a host word
3212       safely (without loss of info). */
3213 
3214    var_szB = (SizeT)mul.ul; /* NB: truncate to host word */
3215 
3216    if (show) {
3217       VG_(printf)("VVVV: data_address_%#lx_is_in_var: %s :: ",
3218                   data_addr, var->name );
3219       ML_(pp_TyEnt_C_ishly)( tyents, var->typeR );
3220       VG_(printf)("\n");
3221    }
3222 
3223    /* ignore zero-sized vars; they can never match anything. */
3224    if (var_szB == 0) {
3225       if (show)
3226          VG_(printf)("VVVV: -> Fail (variable is zero sized)\n");
3227       return False;
3228    }
3229 
3230    res = ML_(evaluate_GX)( var->gexpr, var->fbGX, regs, di );
3231 
3232    if (show) {
3233       VG_(printf)("VVVV: -> ");
3234       ML_(pp_GXResult)( res );
3235       VG_(printf)("\n");
3236    }
3237 
3238    if (res.kind == GXR_Addr
3239        && res.word <= data_addr
3240        && data_addr < res.word + var_szB) {
3241       *offset = data_addr - res.word;
3242       return True;
3243    } else {
3244       return False;
3245    }
3246 }
3247 
3248 
3249 /* Format the acquired information into DN(AME)1 and DN(AME)2, which
3250    are XArray*s of HChar, that have been initialised by the caller.
3251    Resulting strings will be zero terminated.  Information is
3252    formatted in an understandable way.  Not so easy.  If frameNo is
3253    -1, this is assumed to be a global variable; else a local
3254    variable. */
format_message(XArray * dn1,XArray * dn2,Addr data_addr,const DebugInfo * di,const DiVariable * var,PtrdiffT var_offset,PtrdiffT residual_offset,const XArray * described,Int frameNo,ThreadId tid)3255 static void format_message ( /*MOD*/XArray* /* of HChar */ dn1,
3256                              /*MOD*/XArray* /* of HChar */ dn2,
3257                              Addr     data_addr,
3258                              const DebugInfo* di,
3259                              const DiVariable* var,
3260                              PtrdiffT var_offset,
3261                              PtrdiffT residual_offset,
3262                              const XArray* /*HChar*/ described,
3263                              Int      frameNo,
3264                              ThreadId tid )
3265 {
3266    Bool   have_descr, have_srcloc;
3267    Bool   xml       = VG_(clo_xml);
3268    const HChar* vo_plural = var_offset == 1 ? "" : "s";
3269    const HChar* ro_plural = residual_offset == 1 ? "" : "s";
3270    const HChar* basetag   = "auxwhat"; /* a constant */
3271    HChar tagL[32], tagR[32], xagL[32], xagR[32];
3272    const HChar *fileName = ML_(fndn_ix2filename)(di, var->fndn_ix);
3273    // fileName will be "???" if var->fndn_ix == 0.
3274    // fileName will only be used if have_descr is True.
3275 
3276    if (frameNo < -1) {
3277       vg_assert(0); /* Not allowed */
3278    }
3279    else if (frameNo == -1) {
3280       vg_assert(tid == VG_INVALID_THREADID);
3281    }
3282    else /* (frameNo >= 0) */ {
3283       vg_assert(tid != VG_INVALID_THREADID);
3284    }
3285 
3286    vg_assert(dn1 && dn2);
3287    vg_assert(described);
3288    vg_assert(var && var->name);
3289    have_descr = VG_(sizeXA)(described) > 0
3290                 && *(HChar*)VG_(indexXA)(described,0) != '\0';
3291    have_srcloc = var->fndn_ix > 0 && var->lineNo > 0;
3292 
3293    tagL[0] = tagR[0] = xagL[0] = xagR[0] = 0;
3294    if (xml) {
3295       VG_(sprintf)(tagL, "<%s>",   basetag); // <auxwhat>
3296       VG_(sprintf)(tagR, "</%s>",  basetag); // </auxwhat>
3297       VG_(sprintf)(xagL, "<x%s>",  basetag); // <xauxwhat>
3298       VG_(sprintf)(xagR, "</x%s>", basetag); // </xauxwhat>
3299    }
3300 
3301 #  define TAGL(_xa) p2XA(_xa, "%s", tagL)
3302 #  define TAGR(_xa) p2XA(_xa, "%s", tagR)
3303 #  define XAGL(_xa) p2XA(_xa, "%s", xagL)
3304 #  define XAGR(_xa) p2XA(_xa, "%s", xagR)
3305 #  define TXTL(_xa) p2XA(_xa, "%s", "<text>")
3306 #  define TXTR(_xa) p2XA(_xa, "%s", "</text>")
3307 
3308    /* ------ local cases ------ */
3309 
3310    if ( frameNo >= 0 && (!have_srcloc) && (!have_descr) ) {
3311       /* no srcloc, no description:
3312          Location 0x7fefff6cf is 543 bytes inside local var "a",
3313          in frame #1 of thread 1
3314       */
3315       if (xml) {
3316          TAGL( dn1 );
3317          p2XA( dn1,
3318                "Location 0x%lx is %ld byte%s inside local var \"%pS\",",
3319                data_addr, var_offset, vo_plural, var->name );
3320          TAGR( dn1 );
3321          TAGL( dn2 );
3322          p2XA( dn2,
3323                "in frame #%d of thread %u", frameNo, tid );
3324          TAGR( dn2 );
3325       } else {
3326          p2XA( dn1,
3327                "Location 0x%lx is %ld byte%s inside local var \"%s\",",
3328                data_addr, var_offset, vo_plural, var->name );
3329          p2XA( dn2,
3330                "in frame #%d of thread %u", frameNo, tid );
3331       }
3332    }
3333    else
3334    if ( frameNo >= 0 && have_srcloc && (!have_descr) ) {
3335       /* no description:
3336          Location 0x7fefff6cf is 543 bytes inside local var "a"
3337          declared at dsyms7.c:17, in frame #1 of thread 1
3338       */
3339       if (xml) {
3340          TAGL( dn1 );
3341          p2XA( dn1,
3342                "Location 0x%lx is %ld byte%s inside local var \"%pS\"",
3343                data_addr, var_offset, vo_plural, var->name );
3344          TAGR( dn1 );
3345          XAGL( dn2 );
3346          TXTL( dn2 );
3347          p2XA( dn2,
3348                "declared at %pS:%d, in frame #%d of thread %u",
3349                fileName, var->lineNo, frameNo, tid );
3350          TXTR( dn2 );
3351          // FIXME: also do <dir>
3352          p2XA( dn2,
3353                " <file>%pS</file> <line>%d</line> ",
3354                fileName, var->lineNo );
3355          XAGR( dn2 );
3356       } else {
3357          p2XA( dn1,
3358                "Location 0x%lx is %ld byte%s inside local var \"%s\"",
3359                data_addr, var_offset, vo_plural, var->name );
3360          p2XA( dn2,
3361                "declared at %s:%d, in frame #%d of thread %u",
3362                fileName, var->lineNo, frameNo, tid );
3363       }
3364    }
3365    else
3366    if ( frameNo >= 0 && (!have_srcloc) && have_descr ) {
3367       /* no srcloc:
3368          Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2
3369          in frame #1 of thread 1
3370       */
3371       if (xml) {
3372          TAGL( dn1 );
3373          p2XA( dn1,
3374                "Location 0x%lx is %ld byte%s inside %pS%pS",
3375                data_addr, residual_offset, ro_plural, var->name,
3376                (HChar*)(VG_(indexXA)(described,0)) );
3377          TAGR( dn1 );
3378          TAGL( dn2 );
3379          p2XA( dn2,
3380                "in frame #%d of thread %u", frameNo, tid );
3381          TAGR( dn2 );
3382       } else {
3383          p2XA( dn1,
3384                "Location 0x%lx is %ld byte%s inside %s%s",
3385                data_addr, residual_offset, ro_plural, var->name,
3386                (HChar*)(VG_(indexXA)(described,0)) );
3387          p2XA( dn2,
3388                "in frame #%d of thread %u", frameNo, tid );
3389       }
3390    }
3391    else
3392    if ( frameNo >= 0 && have_srcloc && have_descr ) {
3393       /* Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2,
3394          declared at dsyms7.c:17, in frame #1 of thread 1 */
3395       if (xml) {
3396          TAGL( dn1 );
3397          p2XA( dn1,
3398                "Location 0x%lx is %ld byte%s inside %pS%pS,",
3399                data_addr, residual_offset, ro_plural, var->name,
3400                (HChar*)(VG_(indexXA)(described,0)) );
3401          TAGR( dn1 );
3402          XAGL( dn2 );
3403          TXTL( dn2 );
3404          p2XA( dn2,
3405                "declared at %pS:%d, in frame #%d of thread %u",
3406                fileName, var->lineNo, frameNo, tid );
3407          TXTR( dn2 );
3408          // FIXME: also do <dir>
3409          p2XA( dn2,
3410                " <file>%pS</file> <line>%d</line> ",
3411                fileName, var->lineNo );
3412          XAGR( dn2 );
3413       } else {
3414          p2XA( dn1,
3415                "Location 0x%lx is %ld byte%s inside %s%s,",
3416                data_addr, residual_offset, ro_plural, var->name,
3417                (HChar*)(VG_(indexXA)(described,0)) );
3418          p2XA( dn2,
3419                "declared at %s:%d, in frame #%d of thread %u",
3420                fileName, var->lineNo, frameNo, tid );
3421       }
3422    }
3423    else
3424    /* ------ global cases ------ */
3425    if ( frameNo >= -1 && (!have_srcloc) && (!have_descr) ) {
3426       /* no srcloc, no description:
3427          Location 0x7fefff6cf is 543 bytes inside global var "a"
3428       */
3429       if (xml) {
3430          TAGL( dn1 );
3431          p2XA( dn1,
3432                "Location 0x%lx is %ld byte%s inside global var \"%pS\"",
3433                data_addr, var_offset, vo_plural, var->name );
3434          TAGR( dn1 );
3435       } else {
3436          p2XA( dn1,
3437                "Location 0x%lx is %ld byte%s inside global var \"%s\"",
3438                data_addr, var_offset, vo_plural, var->name );
3439       }
3440    }
3441    else
3442    if ( frameNo >= -1 && have_srcloc && (!have_descr) ) {
3443       /* no description:
3444          Location 0x7fefff6cf is 543 bytes inside global var "a"
3445          declared at dsyms7.c:17
3446       */
3447       if (xml) {
3448          TAGL( dn1 );
3449          p2XA( dn1,
3450                "Location 0x%lx is %ld byte%s inside global var \"%pS\"",
3451                data_addr, var_offset, vo_plural, var->name );
3452          TAGR( dn1 );
3453          XAGL( dn2 );
3454          TXTL( dn2 );
3455          p2XA( dn2,
3456                "declared at %pS:%d",
3457                fileName, var->lineNo);
3458          TXTR( dn2 );
3459          // FIXME: also do <dir>
3460          p2XA( dn2,
3461                " <file>%pS</file> <line>%d</line> ",
3462                fileName, var->lineNo );
3463          XAGR( dn2 );
3464       } else {
3465          p2XA( dn1,
3466                "Location 0x%lx is %ld byte%s inside global var \"%s\"",
3467                data_addr, var_offset, vo_plural, var->name );
3468          p2XA( dn2,
3469                "declared at %s:%d",
3470                fileName, var->lineNo);
3471       }
3472    }
3473    else
3474    if ( frameNo >= -1 && (!have_srcloc) && have_descr ) {
3475       /* no srcloc:
3476          Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2,
3477          a global variable
3478       */
3479       if (xml) {
3480          TAGL( dn1 );
3481          p2XA( dn1,
3482                "Location 0x%lx is %ld byte%s inside %pS%pS,",
3483                data_addr, residual_offset, ro_plural, var->name,
3484                (HChar*)(VG_(indexXA)(described,0)) );
3485          TAGR( dn1 );
3486          TAGL( dn2 );
3487          p2XA( dn2,
3488                "a global variable");
3489          TAGR( dn2 );
3490       } else {
3491          p2XA( dn1,
3492                "Location 0x%lx is %ld byte%s inside %s%s,",
3493                data_addr, residual_offset, ro_plural, var->name,
3494                (HChar*)(VG_(indexXA)(described,0)) );
3495          p2XA( dn2,
3496                "a global variable");
3497       }
3498    }
3499    else
3500    if ( frameNo >= -1 && have_srcloc && have_descr ) {
3501       /* Location 0x7fefff6cf is 2 bytes inside a[3].xyzzy[21].c2,
3502          a global variable declared at dsyms7.c:17 */
3503       if (xml) {
3504          TAGL( dn1 );
3505          p2XA( dn1,
3506                "Location 0x%lx is %ld byte%s inside %pS%pS,",
3507                data_addr, residual_offset, ro_plural, var->name,
3508                (HChar*)(VG_(indexXA)(described,0)) );
3509          TAGR( dn1 );
3510          XAGL( dn2 );
3511          TXTL( dn2 );
3512          p2XA( dn2,
3513                "a global variable declared at %pS:%d",
3514                fileName, var->lineNo);
3515          TXTR( dn2 );
3516          // FIXME: also do <dir>
3517          p2XA( dn2,
3518                " <file>%pS</file> <line>%d</line> ",
3519                fileName, var->lineNo );
3520          XAGR( dn2 );
3521       } else {
3522          p2XA( dn1,
3523                "Location 0x%lx is %ld byte%s inside %s%s,",
3524                data_addr, residual_offset, ro_plural, var->name,
3525                (HChar*)(VG_(indexXA)(described,0)) );
3526          p2XA( dn2,
3527                "a global variable declared at %s:%d",
3528                fileName, var->lineNo);
3529       }
3530    }
3531    else
3532       vg_assert(0);
3533 
3534    /* Zero terminate both strings */
3535    zterm_XA( dn1 );
3536    zterm_XA( dn2 );
3537 
3538 #  undef TAGL
3539 #  undef TAGR
3540 #  undef XAGL
3541 #  undef XAGR
3542 #  undef TXTL
3543 #  undef TXTR
3544 }
3545 
3546 
3547 /* Determine if data_addr is a local variable in the frame
3548    characterised by (ip,sp,fp), and if so write its description at the
3549    ends of DNAME{1,2}, which are XArray*s of HChar, that have been
3550    initialised by the caller, zero terminate both, and return True.
3551    If it's not a local variable in said frame, return False. */
3552 static
consider_vars_in_frame(XArray * dname1,XArray * dname2,Addr data_addr,Addr ip,Addr sp,Addr fp,ThreadId tid,Int frameNo)3553 Bool consider_vars_in_frame ( /*MOD*/XArray* /* of HChar */ dname1,
3554                               /*MOD*/XArray* /* of HChar */ dname2,
3555                               Addr data_addr,
3556                               Addr ip, Addr sp, Addr fp,
3557                               /* shown to user: */
3558                               ThreadId tid, Int frameNo )
3559 {
3560    Word       i;
3561    DebugInfo* di;
3562    RegSummary regs;
3563    Bool debug = False;
3564 
3565    static UInt n_search = 0;
3566    static UInt n_steps = 0;
3567    n_search++;
3568    if (debug)
3569       VG_(printf)("QQQQ: cvif: ip,sp,fp %#lx,%#lx,%#lx\n", ip,sp,fp);
3570    /* first, find the DebugInfo that pertains to 'ip'. */
3571    for (di = debugInfo_list; di; di = di->next) {
3572       n_steps++;
3573       /* text segment missing? unlikely, but handle it .. */
3574       if (!di->text_present || di->text_size == 0)
3575          continue;
3576       /* Ok.  So does this text mapping bracket the ip? */
3577       if (di->text_avma <= ip && ip < di->text_avma + di->text_size)
3578          break;
3579    }
3580 
3581    /* Didn't find it.  Strange -- means ip is a code address outside
3582       of any mapped text segment.  Unlikely but not impossible -- app
3583       could be generating code to run. */
3584    if (!di)
3585       return False;
3586 
3587    if (0 && ((n_search & 0x1) == 0))
3588       VG_(printf)("consider_vars_in_frame: %u searches, "
3589                   "%u DebugInfos looked at\n",
3590                   n_search, n_steps);
3591    /* Start of performance-enhancing hack: once every ??? (chosen
3592       hackily after profiling) successful searches, move the found
3593       DebugInfo one step closer to the start of the list.  This makes
3594       future searches cheaper. */
3595    if ((n_search & 0xFFFF) == 0) {
3596       /* Move si one step closer to the start of the list. */
3597       move_DebugInfo_one_step_forward( di );
3598    }
3599    /* End of performance-enhancing hack. */
3600 
3601    /* any var info at all? */
3602    if (!di->varinfo)
3603       return False;
3604 
3605    /* Work through the scopes from most deeply nested outwards,
3606       looking for code address ranges that bracket 'ip'.  The
3607       variables on each such address range found are in scope right
3608       now.  Don't descend to level zero as that is the global
3609       scope. */
3610    regs.ip = ip;
3611    regs.sp = sp;
3612    regs.fp = fp;
3613 
3614    /* "for each scope, working outwards ..." */
3615    for (i = VG_(sizeXA)(di->varinfo) - 1; i >= 1; i--) {
3616       XArray*      vars;
3617       Word         j;
3618       DiAddrRange* arange;
3619       OSet*        this_scope
3620          = *(OSet**)VG_(indexXA)( di->varinfo, i );
3621       if (debug)
3622          VG_(printf)("QQQQ:   considering scope %ld\n", (Word)i);
3623       if (!this_scope)
3624          continue;
3625       /* Find the set of variables in this scope that
3626          bracket the program counter. */
3627       arange = VG_(OSetGen_LookupWithCmp)(
3628                   this_scope, &ip,
3629                   ML_(cmp_for_DiAddrRange_range)
3630                );
3631       if (!arange)
3632          continue;
3633       /* stay sane */
3634       vg_assert(arange->aMin <= arange->aMax);
3635       /* It must bracket the ip we asked for, else
3636          ML_(cmp_for_DiAddrRange_range) is somehow broken. */
3637       vg_assert(arange->aMin <= ip && ip <= arange->aMax);
3638       /* It must have an attached XArray of DiVariables. */
3639       vars = arange->vars;
3640       vg_assert(vars);
3641       /* But it mustn't cover the entire address range.  We only
3642          expect that to happen for the global scope (level 0), which
3643          we're not looking at here.  Except, it may cover the entire
3644          address range, but in that case the vars array must be
3645          empty. */
3646       vg_assert(! (arange->aMin == (Addr)0
3647                    && arange->aMax == ~(Addr)0
3648                    && VG_(sizeXA)(vars) > 0) );
3649       for (j = 0; j < VG_(sizeXA)( vars ); j++) {
3650          DiVariable* var = (DiVariable*)VG_(indexXA)( vars, j );
3651          PtrdiffT    offset;
3652          if (debug)
3653             VG_(printf)("QQQQ:    var:name=%s %#lx-%#lx %#lx\n",
3654                         var->name,arange->aMin,arange->aMax,ip);
3655          if (data_address_is_in_var( &offset, di->admin_tyents,
3656                                      var, &regs,
3657                                      data_addr, di )) {
3658             PtrdiffT residual_offset = 0;
3659             XArray* described = ML_(describe_type)( &residual_offset,
3660                                                     di->admin_tyents,
3661                                                     var->typeR, offset );
3662             format_message( dname1, dname2,
3663                             data_addr, di, var, offset, residual_offset,
3664                             described, frameNo, tid );
3665             VG_(deleteXA)( described );
3666             return True;
3667          }
3668       }
3669    }
3670 
3671    return False;
3672 }
3673 
3674 /* Try to form some description of DATA_ADDR by looking at the DWARF3
3675    debug info we have.  This considers all global variables, and 8
3676    frames in the stacks of all threads.  Result is written at the ends
3677    of DNAME{1,2}V, which are XArray*s of HChar, that have been
3678    initialised by the caller, and True is returned.  If no description
3679    is created, False is returned.  Regardless of the return value,
3680    DNAME{1,2}V are guaranteed to be zero terminated after the call.
3681 
3682    Note that after the call, DNAME{1,2} may have more than one
3683    trailing zero, so callers should establish the useful text length
3684    using VG_(strlen) on the contents, rather than VG_(sizeXA) on the
3685    XArray itself.
3686 */
VG_(get_data_description)3687 Bool VG_(get_data_description)(
3688         /*MOD*/ XArray* /* of HChar */ dname1,
3689         /*MOD*/ XArray* /* of HChar */ dname2,
3690         Addr data_addr
3691      )
3692 {
3693 #  define N_FRAMES 8
3694    Addr ips[N_FRAMES], sps[N_FRAMES], fps[N_FRAMES];
3695    UInt n_frames;
3696 
3697    Addr       stack_min, stack_max;
3698    ThreadId   tid;
3699    Bool       found;
3700    DebugInfo* di;
3701    Word       j;
3702 
3703    if (0) VG_(printf)("get_data_description: dataaddr %#lx\n", data_addr);
3704    /* First, see if data_addr is (or is part of) a global variable.
3705       Loop over the DebugInfos we have.  Check data_addr against the
3706       outermost scope of all of them, as that should be a global
3707       scope. */
3708    for (di = debugInfo_list; di != NULL; di = di->next) {
3709       OSet*        global_scope;
3710       Word         gs_size;
3711       Addr         zero;
3712       DiAddrRange* global_arange;
3713       Word         i;
3714       XArray*      vars;
3715 
3716       /* text segment missing? unlikely, but handle it .. */
3717       if (!di->text_present || di->text_size == 0)
3718          continue;
3719       /* any var info at all? */
3720       if (!di->varinfo)
3721          continue;
3722       /* perhaps this object didn't contribute any vars at all? */
3723       if (VG_(sizeXA)( di->varinfo ) == 0)
3724          continue;
3725       global_scope = *(OSet**)VG_(indexXA)( di->varinfo, 0 );
3726       vg_assert(global_scope);
3727       gs_size = VG_(OSetGen_Size)( global_scope );
3728       /* The global scope might be completely empty if this
3729          compilation unit declared locals but nothing global. */
3730       if (gs_size == 0)
3731           continue;
3732       /* But if it isn't empty, then it must contain exactly one
3733          element, which covers the entire address range. */
3734       vg_assert(gs_size == 1);
3735       /* Fish out the global scope and check it is as expected. */
3736       zero = 0;
3737       global_arange
3738          = VG_(OSetGen_Lookup)( global_scope, &zero );
3739       /* The global range from (Addr)0 to ~(Addr)0 must exist */
3740       vg_assert(global_arange);
3741       vg_assert(global_arange->aMin == (Addr)0
3742                 && global_arange->aMax == ~(Addr)0);
3743       /* Any vars in this range? */
3744       if (!global_arange->vars)
3745          continue;
3746       /* Ok, there are some vars in the global scope of this
3747          DebugInfo.  Wade through them and see if the data addresses
3748          of any of them bracket data_addr. */
3749       vars = global_arange->vars;
3750       for (i = 0; i < VG_(sizeXA)( vars ); i++) {
3751          PtrdiffT offset;
3752          DiVariable* var = (DiVariable*)VG_(indexXA)( vars, i );
3753          vg_assert(var->name);
3754          /* Note we use a NULL RegSummary* here.  It can't make any
3755             sense for a global variable to have a location expression
3756             which depends on a SP/FP/IP value.  So don't supply any.
3757             This means, if the evaluation of the location
3758             expression/list requires a register, we have to let it
3759             fail. */
3760          if (data_address_is_in_var( &offset, di->admin_tyents, var,
3761                                      NULL/* RegSummary* */,
3762                                      data_addr, di )) {
3763             PtrdiffT residual_offset = 0;
3764             XArray* described = ML_(describe_type)( &residual_offset,
3765                                                     di->admin_tyents,
3766                                                     var->typeR, offset );
3767             format_message( dname1, dname2,
3768                             data_addr, di, var, offset, residual_offset,
3769                             described, -1/*frameNo*/,
3770                             VG_INVALID_THREADID );
3771             VG_(deleteXA)( described );
3772             zterm_XA( dname1 );
3773             zterm_XA( dname2 );
3774             return True;
3775          }
3776       }
3777    }
3778 
3779    /* Ok, well it's not a global variable.  So now let's snoop around
3780       in the stacks of all the threads.  First try to figure out which
3781       thread's stack data_addr is in. */
3782 
3783    /* Perhaps it's on a thread's stack? */
3784    found = False;
3785    VG_(thread_stack_reset_iter)(&tid);
3786    while ( VG_(thread_stack_next)(&tid, &stack_min, &stack_max) ) {
3787       if (stack_min >= stack_max)
3788          continue; /* ignore obviously stupid cases */
3789       if (stack_min - VG_STACK_REDZONE_SZB <= data_addr
3790           && data_addr <= stack_max) {
3791          found = True;
3792          break;
3793       }
3794    }
3795    if (!found) {
3796       zterm_XA( dname1 );
3797       zterm_XA( dname2 );
3798       return False;
3799    }
3800 
3801    /* We conclude data_addr is in thread tid's stack.  Unwind the
3802       stack to get a bunch of (ip,sp,fp) triples describing the
3803       frames, and for each frame, consider the local variables. */
3804    n_frames = VG_(get_StackTrace)( tid, ips, N_FRAMES,
3805                                    sps, fps, 0/*first_ip_delta*/ );
3806 
3807    vg_assert(n_frames >= 0 && n_frames <= N_FRAMES);
3808    for (j = 0; j < n_frames; j++) {
3809       if (consider_vars_in_frame( dname1, dname2,
3810                                   data_addr,
3811                                   ips[j],
3812                                   sps[j], fps[j], tid, j )) {
3813          zterm_XA( dname1 );
3814          zterm_XA( dname2 );
3815          return True;
3816       }
3817       /* Now, it appears that gcc sometimes appears to produce
3818          location lists whose ranges don't actually cover the call
3819          instruction, even though the address of the variable in
3820          question is passed as a parameter in the call.  AFAICS this
3821          is simply a bug in gcc - how can the variable be claimed not
3822          exist in memory (on the stack) for the duration of a call in
3823          which its address is passed?  But anyway, in the particular
3824          case I investigated (memcheck/tests/varinfo6.c, call to croak
3825          on line 2999, local var budget declared at line 3115
3826          appearing not to exist across the call to mainSort on line
3827          3143, "gcc.orig (GCC) 3.4.4 20050721 (Red Hat 3.4.4-2)" on
3828          amd64), the variable's location list does claim it exists
3829          starting at the first byte of the first instruction after the
3830          call instruction.  So, call consider_vars_in_frame a second
3831          time, but this time add 1 to the IP.  GDB handles this
3832          example with no difficulty, which leads me to believe that
3833          either (1) I misunderstood something, or (2) GDB has an
3834          equivalent kludge. */
3835       if (j > 0 /* this is a non-innermost frame */
3836           && consider_vars_in_frame( dname1, dname2,
3837                                      data_addr,
3838                                      ips[j] + 1,
3839                                      sps[j], fps[j], tid, j )) {
3840          zterm_XA( dname1 );
3841          zterm_XA( dname2 );
3842          return True;
3843       }
3844    }
3845 
3846    /* We didn't find anything useful. */
3847    zterm_XA( dname1 );
3848    zterm_XA( dname2 );
3849    return False;
3850 #  undef N_FRAMES
3851 }
3852 
3853 
3854 //////////////////////////////////////////////////////////////////
3855 //                                                              //
3856 // Support for other kinds of queries to the Dwarf3 var info    //
3857 //                                                              //
3858 //////////////////////////////////////////////////////////////////
3859 
3860 /* Figure out if the variable 'var' has a location that is linearly
3861    dependent on a stack pointer value, or a frame pointer value, and
3862    if it is, add a description of it to 'blocks'.  Otherwise ignore
3863    it.  If 'arrays_only' is True, also ignore it unless it has an
3864    array type. */
3865 
3866 static
analyse_deps(XArray * blocks,const XArray * tyents,Addr ip,const DebugInfo * di,const DiVariable * var,Bool arrays_only)3867 void analyse_deps ( /*MOD*/XArray* /* of FrameBlock */ blocks,
3868                     const XArray* /* TyEnt */ tyents,
3869                     Addr ip, const DebugInfo* di, const DiVariable* var,
3870                     Bool arrays_only )
3871 {
3872    GXResult   res_sp_6k, res_sp_7k, res_fp_6k, res_fp_7k;
3873    RegSummary regs;
3874    MaybeULong mul;
3875    Bool       isVec;
3876    TyEnt*     ty;
3877 
3878    Bool debug = False;
3879    if (0&&debug)
3880       VG_(printf)("adeps: var %s\n", var->name );
3881 
3882    /* Figure out how big the variable is. */
3883    mul = ML_(sizeOfType)(tyents, var->typeR);
3884    /* If this var has a type whose size is unknown, zero, or
3885       impossibly large, it should never have been added.  ML_(addVar)
3886       should have rejected it. */
3887    vg_assert(mul.b == True);
3888    vg_assert(mul.ul > 0);
3889    if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32));
3890    /* After this point, we assume we can truncate mul.ul to a host word
3891       safely (without loss of info). */
3892 
3893    /* skip if non-array and we're only interested in arrays */
3894    ty = ML_(TyEnts__index_by_cuOff)( tyents, NULL, var->typeR );
3895    vg_assert(ty);
3896    vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty));
3897    if (ty->tag == Te_UNKNOWN)
3898       return; /* perhaps we should complain in this case? */
3899    isVec = ty->tag == Te_TyArray;
3900    if (arrays_only && !isVec)
3901       return;
3902 
3903    if (0) {ML_(pp_TyEnt_C_ishly)(tyents, var->typeR);
3904            VG_(printf)("  %s\n", var->name);}
3905 
3906    /* Do some test evaluations of the variable's location expression,
3907       in order to guess whether it is sp-relative, fp-relative, or
3908       none.  A crude hack, which can be interpreted roughly as finding
3909       the first derivative of the location expression w.r.t. the
3910       supplied frame and stack pointer values. */
3911    regs.fp   = 0;
3912    regs.ip   = ip;
3913    regs.sp   = 6 * 1024;
3914    res_sp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3915 
3916    regs.fp   = 0;
3917    regs.ip   = ip;
3918    regs.sp   = 7 * 1024;
3919    res_sp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3920 
3921    regs.fp   = 6 * 1024;
3922    regs.ip   = ip;
3923    regs.sp   = 0;
3924    res_fp_6k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3925 
3926    regs.fp   = 7 * 1024;
3927    regs.ip   = ip;
3928    regs.sp   = 0;
3929    res_fp_7k = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3930 
3931    vg_assert(res_sp_6k.kind == res_sp_7k.kind);
3932    vg_assert(res_sp_6k.kind == res_fp_6k.kind);
3933    vg_assert(res_sp_6k.kind == res_fp_7k.kind);
3934 
3935    if (res_sp_6k.kind == GXR_Addr) {
3936       StackBlock block;
3937       GXResult res;
3938       UWord sp_delta = res_sp_7k.word - res_sp_6k.word;
3939       UWord fp_delta = res_fp_7k.word - res_fp_6k.word;
3940       vg_assert(sp_delta == 0 || sp_delta == 1024);
3941       vg_assert(fp_delta == 0 || fp_delta == 1024);
3942 
3943       if (sp_delta == 0 && fp_delta == 0) {
3944          /* depends neither on sp nor fp, so it can't be a stack
3945             local.  Ignore it. */
3946       }
3947       else
3948       if (sp_delta == 1024 && fp_delta == 0) {
3949          regs.sp = regs.fp = 0;
3950          regs.ip = ip;
3951          res = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3952          vg_assert(res.kind == GXR_Addr);
3953          if (debug)
3954          VG_(printf)("   %5lu .. %5llu (sp) %s\n",
3955                      res.word, res.word + mul.ul - 1, var->name);
3956          block.base  = res.word;
3957          block.szB   = (SizeT)mul.ul;
3958          block.spRel = True;
3959          block.isVec = isVec;
3960          VG_(memset)( &block.name[0], 0, sizeof(block.name) );
3961          if (var->name)
3962             VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 );
3963          block.name[ sizeof(block.name)-1 ] = 0;
3964          VG_(addToXA)( blocks, &block );
3965       }
3966       else
3967       if (sp_delta == 0 && fp_delta == 1024) {
3968          regs.sp = regs.fp = 0;
3969          regs.ip = ip;
3970          res = ML_(evaluate_GX)( var->gexpr, var->fbGX, &regs, di );
3971          vg_assert(res.kind == GXR_Addr);
3972          if (debug)
3973          VG_(printf)("   %5lu .. %5llu (FP) %s\n",
3974                      res.word, res.word + mul.ul - 1, var->name);
3975          block.base  = res.word;
3976          block.szB   = (SizeT)mul.ul;
3977          block.spRel = False;
3978          block.isVec = isVec;
3979          VG_(memset)( &block.name[0], 0, sizeof(block.name) );
3980          if (var->name)
3981             VG_(strncpy)( &block.name[0], var->name, sizeof(block.name)-1 );
3982          block.name[ sizeof(block.name)-1 ] = 0;
3983          VG_(addToXA)( blocks, &block );
3984       }
3985       else {
3986          vg_assert(0);
3987       }
3988    }
3989 }
3990 
3991 
3992 /* Get an XArray of StackBlock which describe the stack (auto) blocks
3993    for this ip.  The caller is expected to free the XArray at some
3994    point.  If 'arrays_only' is True, only array-typed blocks are
3995    returned; otherwise blocks of all types are returned. */
3996 
3997 XArray* /* of StackBlock */
VG_(di_get_stack_blocks_at_ip)3998 VG_(di_get_stack_blocks_at_ip)( Addr ip, Bool arrays_only )
3999 {
4000    /* This is a derivation of consider_vars_in_frame() above. */
4001    Word       i;
4002    DebugInfo* di;
4003    Bool debug = False;
4004 
4005    XArray* res = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dgsbai.1",
4006                              ML_(dinfo_free),
4007                              sizeof(StackBlock) );
4008 
4009    static UInt n_search = 0;
4010    static UInt n_steps = 0;
4011    n_search++;
4012    if (debug)
4013       VG_(printf)("QQQQ: dgsbai: ip %#lx\n", ip);
4014    /* first, find the DebugInfo that pertains to 'ip'. */
4015    for (di = debugInfo_list; di; di = di->next) {
4016       n_steps++;
4017       /* text segment missing? unlikely, but handle it .. */
4018       if (!di->text_present || di->text_size == 0)
4019          continue;
4020       /* Ok.  So does this text mapping bracket the ip? */
4021       if (di->text_avma <= ip && ip < di->text_avma + di->text_size)
4022          break;
4023    }
4024 
4025    /* Didn't find it.  Strange -- means ip is a code address outside
4026       of any mapped text segment.  Unlikely but not impossible -- app
4027       could be generating code to run. */
4028    if (!di)
4029       return res; /* currently empty */
4030 
4031    if (0 && ((n_search & 0x1) == 0))
4032       VG_(printf)("VG_(di_get_stack_blocks_at_ip): %u searches, "
4033                   "%u DebugInfos looked at\n",
4034                   n_search, n_steps);
4035    /* Start of performance-enhancing hack: once every ??? (chosen
4036       hackily after profiling) successful searches, move the found
4037       DebugInfo one step closer to the start of the list.  This makes
4038       future searches cheaper. */
4039    if ((n_search & 0xFFFF) == 0) {
4040       /* Move si one step closer to the start of the list. */
4041       move_DebugInfo_one_step_forward( di );
4042    }
4043    /* End of performance-enhancing hack. */
4044 
4045    /* any var info at all? */
4046    if (!di->varinfo)
4047       return res; /* currently empty */
4048 
4049    /* Work through the scopes from most deeply nested outwards,
4050       looking for code address ranges that bracket 'ip'.  The
4051       variables on each such address range found are in scope right
4052       now.  Don't descend to level zero as that is the global
4053       scope. */
4054 
4055    /* "for each scope, working outwards ..." */
4056    for (i = VG_(sizeXA)(di->varinfo) - 1; i >= 1; i--) {
4057       XArray*      vars;
4058       Word         j;
4059       DiAddrRange* arange;
4060       OSet*        this_scope
4061          = *(OSet**)VG_(indexXA)( di->varinfo, i );
4062       if (debug)
4063          VG_(printf)("QQQQ:   considering scope %ld\n", (Word)i);
4064       if (!this_scope)
4065          continue;
4066       /* Find the set of variables in this scope that
4067          bracket the program counter. */
4068       arange = VG_(OSetGen_LookupWithCmp)(
4069                   this_scope, &ip,
4070                   ML_(cmp_for_DiAddrRange_range)
4071                );
4072       if (!arange)
4073          continue;
4074       /* stay sane */
4075       vg_assert(arange->aMin <= arange->aMax);
4076       /* It must bracket the ip we asked for, else
4077          ML_(cmp_for_DiAddrRange_range) is somehow broken. */
4078       vg_assert(arange->aMin <= ip && ip <= arange->aMax);
4079       /* It must have an attached XArray of DiVariables. */
4080       vars = arange->vars;
4081       vg_assert(vars);
4082       /* But it mustn't cover the entire address range.  We only
4083          expect that to happen for the global scope (level 0), which
4084          we're not looking at here.  Except, it may cover the entire
4085          address range, but in that case the vars array must be
4086          empty. */
4087       vg_assert(! (arange->aMin == (Addr)0
4088                    && arange->aMax == ~(Addr)0
4089                    && VG_(sizeXA)(vars) > 0) );
4090       for (j = 0; j < VG_(sizeXA)( vars ); j++) {
4091          DiVariable* var = (DiVariable*)VG_(indexXA)( vars, j );
4092          if (debug)
4093             VG_(printf)("QQQQ:    var:name=%s %#lx-%#lx %#lx\n",
4094                         var->name,arange->aMin,arange->aMax,ip);
4095          analyse_deps( res, di->admin_tyents, ip,
4096                        di, var, arrays_only );
4097       }
4098    }
4099 
4100    return res;
4101 }
4102 
4103 
4104 /* Get an array of GlobalBlock which describe the global blocks owned
4105    by the shared object characterised by the given di_handle.  Asserts
4106    if the handle is invalid.  The caller is responsible for freeing
4107    the array at some point.  If 'arrays_only' is True, only
4108    array-typed blocks are returned; otherwise blocks of all types are
4109    returned. */
4110 
4111 XArray* /* of GlobalBlock */
VG_(di_get_global_blocks_from_dihandle)4112 VG_(di_get_global_blocks_from_dihandle) ( ULong di_handle, Bool  arrays_only )
4113 {
4114    /* This is a derivation of consider_vars_in_frame() above. */
4115 
4116    DebugInfo* di;
4117    XArray* gvars; /* XArray* of GlobalBlock */
4118    Word nScopes, scopeIx;
4119 
4120    /* The first thing to do is find the DebugInfo that
4121       pertains to 'di_handle'. */
4122    vg_assert(di_handle > 0);
4123    for (di = debugInfo_list; di; di = di->next) {
4124       if (di->handle == di_handle)
4125          break;
4126    }
4127 
4128    /* If this fails, we were unable to find any DebugInfo with the
4129       given handle.  This is considered an error on the part of the
4130       caller. */
4131    vg_assert(di != NULL);
4132 
4133    /* we'll put the collected variables in here. */
4134    gvars = VG_(newXA)( ML_(dinfo_zalloc), "di.debuginfo.dggbfd.1",
4135                        ML_(dinfo_free), sizeof(GlobalBlock) );
4136 
4137    /* any var info at all? */
4138    if (!di->varinfo)
4139       return gvars;
4140 
4141    /* we'll iterate over all the variables we can find, even if
4142       it seems senseless to visit stack-allocated variables */
4143    /* Iterate over all scopes */
4144    nScopes = VG_(sizeXA)( di->varinfo );
4145    for (scopeIx = 0; scopeIx < nScopes; scopeIx++) {
4146 
4147       /* Iterate over each (code) address range at the current scope */
4148       DiAddrRange* range;
4149       OSet* /* of DiAddrInfo */ scope
4150          = *(OSet**)VG_(indexXA)( di->varinfo, scopeIx );
4151       vg_assert(scope);
4152       VG_(OSetGen_ResetIter)(scope);
4153       while ( (range = VG_(OSetGen_Next)(scope)) ) {
4154 
4155          /* Iterate over each variable in the current address range */
4156          Word nVars, varIx;
4157          vg_assert(range->vars);
4158          nVars = VG_(sizeXA)( range->vars );
4159          for (varIx = 0; varIx < nVars; varIx++) {
4160 
4161             Bool        isVec;
4162             GXResult    res;
4163             MaybeULong  mul;
4164             GlobalBlock gb;
4165             TyEnt*      ty;
4166             DiVariable* var = VG_(indexXA)( range->vars, varIx );
4167             vg_assert(var->name);
4168             if (0) VG_(printf)("at depth %ld var %s ", scopeIx, var->name );
4169 
4170             /* Now figure out if this variable has a constant address
4171                (that is, independent of FP, SP, phase of moon, etc),
4172                and if so, what the address is.  Any variable with a
4173                constant address is deemed to be a global so we collect
4174                it. */
4175             if (0) { VG_(printf)("EVAL: "); ML_(pp_GX)(var->gexpr);
4176                      VG_(printf)("\n"); }
4177             res = ML_(evaluate_trivial_GX)( var->gexpr, di );
4178 
4179             /* Not a constant address => not interesting */
4180             if (res.kind != GXR_Addr) {
4181                if (0) VG_(printf)("FAIL\n");
4182                continue;
4183             }
4184 
4185             /* Ok, it's a constant address.  See if we want to collect
4186                it. */
4187             if (0) VG_(printf)("%#lx\n", res.word);
4188 
4189             /* Figure out how big the variable is. */
4190             mul = ML_(sizeOfType)(di->admin_tyents, var->typeR);
4191 
4192             /* If this var has a type whose size is unknown, zero, or
4193                impossibly large, it should never have been added.
4194                ML_(addVar) should have rejected it. */
4195             vg_assert(mul.b == True);
4196             vg_assert(mul.ul > 0);
4197             if (sizeof(void*) == 4) vg_assert(mul.ul < (1ULL << 32));
4198             /* After this point, we assume we can truncate mul.ul to a
4199                host word safely (without loss of info). */
4200 
4201             /* skip if non-array and we're only interested in
4202                arrays */
4203             ty = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL,
4204                                               var->typeR );
4205             vg_assert(ty);
4206             vg_assert(ty->tag == Te_UNKNOWN || ML_(TyEnt__is_type)(ty));
4207             if (ty->tag == Te_UNKNOWN)
4208                continue; /* perhaps we should complain in this case? */
4209 
4210             isVec = ty->tag == Te_TyArray;
4211             if (arrays_only && !isVec) continue;
4212 
4213             /* Ok, so collect it! */
4214             vg_assert(var->name);
4215             vg_assert(di->soname);
4216             if (0) VG_(printf)("XXXX %s %s %d\n", var->name,
4217                                ML_(fndn_ix2filename)(di, var->fndn_ix),
4218                                var->lineNo);
4219             VG_(memset)(&gb, 0, sizeof(gb));
4220             gb.addr  = res.word;
4221             gb.szB   = (SizeT)mul.ul;
4222             gb.isVec = isVec;
4223             VG_(strncpy)(&gb.name[0], var->name, sizeof(gb.name)-1);
4224             VG_(strncpy)(&gb.soname[0], di->soname, sizeof(gb.soname)-1);
4225             vg_assert(gb.name[ sizeof(gb.name)-1 ] == 0);
4226             vg_assert(gb.soname[ sizeof(gb.soname)-1 ] == 0);
4227 
4228             VG_(addToXA)( gvars, &gb );
4229 
4230          } /* for (varIx = 0; varIx < nVars; varIx++) */
4231 
4232       } /* while ( (range = VG_(OSetGen_Next)(scope)) ) */
4233 
4234    } /* for (scopeIx = 0; scopeIx < nScopes; scopeIx++) */
4235 
4236    return gvars;
4237 }
4238 
4239 
4240 /*------------------------------------------------------------*/
4241 /*--- DebugInfo accessor functions                         ---*/
4242 /*------------------------------------------------------------*/
4243 
VG_(next_DebugInfo)4244 const DebugInfo* VG_(next_DebugInfo)(const DebugInfo* di)
4245 {
4246    if (di == NULL)
4247       return debugInfo_list;
4248    return di->next;
4249 }
4250 
VG_(DebugInfo_get_text_avma)4251 Addr VG_(DebugInfo_get_text_avma)(const DebugInfo* di)
4252 {
4253    return di->text_present ? di->text_avma : 0;
4254 }
4255 
VG_(DebugInfo_get_text_size)4256 SizeT VG_(DebugInfo_get_text_size)(const DebugInfo* di)
4257 {
4258    return di->text_present ? di->text_size : 0;
4259 }
4260 
VG_(DebugInfo_get_bss_avma)4261 Addr VG_(DebugInfo_get_bss_avma)(const DebugInfo* di)
4262 {
4263    return di->bss_present ? di->bss_avma : 0;
4264 }
4265 
VG_(DebugInfo_get_bss_size)4266 SizeT VG_(DebugInfo_get_bss_size)(const DebugInfo* di)
4267 {
4268    return di->bss_present ? di->bss_size : 0;
4269 }
4270 
VG_(DebugInfo_get_plt_avma)4271 Addr VG_(DebugInfo_get_plt_avma)(const DebugInfo* di)
4272 {
4273    return di->plt_present ? di->plt_avma : 0;
4274 }
4275 
VG_(DebugInfo_get_plt_size)4276 SizeT VG_(DebugInfo_get_plt_size)(const DebugInfo* di)
4277 {
4278    return di->plt_present ? di->plt_size : 0;
4279 }
4280 
VG_(DebugInfo_get_gotplt_avma)4281 Addr VG_(DebugInfo_get_gotplt_avma)(const DebugInfo* di)
4282 {
4283    return di->gotplt_present ? di->gotplt_avma : 0;
4284 }
4285 
VG_(DebugInfo_get_gotplt_size)4286 SizeT VG_(DebugInfo_get_gotplt_size)(const DebugInfo* di)
4287 {
4288    return di->gotplt_present ? di->gotplt_size : 0;
4289 }
4290 
VG_(DebugInfo_get_got_avma)4291 Addr VG_(DebugInfo_get_got_avma)(const DebugInfo* di)
4292 {
4293    return di->got_present ? di->got_avma : 0;
4294 }
4295 
VG_(DebugInfo_get_got_size)4296 SizeT VG_(DebugInfo_get_got_size)(const DebugInfo* di)
4297 {
4298    return di->got_present ? di->got_size : 0;
4299 }
4300 
VG_(DebugInfo_get_soname)4301 const HChar* VG_(DebugInfo_get_soname)(const DebugInfo* di)
4302 {
4303    return di->soname;
4304 }
4305 
VG_(DebugInfo_get_filename)4306 const HChar* VG_(DebugInfo_get_filename)(const DebugInfo* di)
4307 {
4308    return di->fsm.filename;
4309 }
4310 
VG_(DebugInfo_get_text_bias)4311 PtrdiffT VG_(DebugInfo_get_text_bias)(const DebugInfo* di)
4312 {
4313    return di->text_present ? di->text_bias : 0;
4314 }
4315 
VG_(DebugInfo_syms_howmany)4316 Int VG_(DebugInfo_syms_howmany) ( const DebugInfo *si )
4317 {
4318    return si->symtab_used;
4319 }
4320 
VG_(DebugInfo_syms_getidx)4321 void VG_(DebugInfo_syms_getidx) ( const DebugInfo *si,
4322                                         Int idx,
4323                                   /*OUT*/SymAVMAs* avmas,
4324                                   /*OUT*/UInt*     size,
4325                                   /*OUT*/const HChar**   pri_name,
4326                                   /*OUT*/const HChar***  sec_names,
4327                                   /*OUT*/Bool*     isText,
4328                                   /*OUT*/Bool*     isIFunc,
4329                                   /*OUT*/Bool*     isGlobal )
4330 {
4331    vg_assert(idx >= 0 && idx < si->symtab_used);
4332    if (avmas)     *avmas     = si->symtab[idx].avmas;
4333    if (size)      *size      = si->symtab[idx].size;
4334    if (pri_name)  *pri_name  = si->symtab[idx].pri_name;
4335    if (sec_names) *sec_names = si->symtab[idx].sec_names;
4336    if (isText)    *isText    = si->symtab[idx].isText;
4337    if (isIFunc)   *isIFunc   = si->symtab[idx].isIFunc;
4338    if (isGlobal)  *isGlobal  = si->symtab[idx].isGlobal;
4339 }
4340 
4341 
4342 /*------------------------------------------------------------*/
4343 /*--- SectKind query functions                             ---*/
4344 /*------------------------------------------------------------*/
4345 
4346 /* Convert a VgSectKind to a string, which must be copied if you want
4347    to change it. */
VG_(pp_SectKind)4348 const HChar* VG_(pp_SectKind)( VgSectKind kind )
4349 {
4350    switch (kind) {
4351       case Vg_SectUnknown: return "Unknown";
4352       case Vg_SectText:    return "Text";
4353       case Vg_SectData:    return "Data";
4354       case Vg_SectBSS:     return "BSS";
4355       case Vg_SectGOT:     return "GOT";
4356       case Vg_SectPLT:     return "PLT";
4357       case Vg_SectOPD:     return "OPD";
4358       case Vg_SectGOTPLT:  return "GOTPLT";
4359       default:             vg_assert(0);
4360    }
4361 }
4362 
4363 /* Given an address 'a', make a guess of which section of which object
4364    it comes from.  If name is non-NULL, then the object's name is put
4365    in *name. The returned name, if any, should be saved away, if there is
4366    a chance that a debug-info will be discarded and the name is being
4367    used later on. */
VG_(DebugInfo_sect_kind)4368 VgSectKind VG_(DebugInfo_sect_kind)( /*OUT*/const HChar** objname, Addr a)
4369 {
4370    DebugInfo* di;
4371    VgSectKind res = Vg_SectUnknown;
4372 
4373    for (di = debugInfo_list; di != NULL; di = di->next) {
4374 
4375       if (0)
4376          VG_(printf)(
4377             "addr=%#lx di=%p %s got=%#lx,%lu plt=%#lx,%lu "
4378             "data=%#lx,%lu bss=%#lx,%lu\n",
4379             a, di, di->fsm.filename,
4380             di->got_avma,  di->got_size,
4381             di->plt_avma,  di->plt_size,
4382             di->data_avma, di->data_size,
4383             di->bss_avma,  di->bss_size);
4384 
4385       if (di->text_present
4386           && di->text_size > 0
4387           && a >= di->text_avma && a < di->text_avma + di->text_size) {
4388          res = Vg_SectText;
4389          break;
4390       }
4391       if (di->data_present
4392           && di->data_size > 0
4393           && a >= di->data_avma && a < di->data_avma + di->data_size) {
4394          res = Vg_SectData;
4395          break;
4396       }
4397       if (di->sdata_present
4398           && di->sdata_size > 0
4399           && a >= di->sdata_avma && a < di->sdata_avma + di->sdata_size) {
4400          res = Vg_SectData;
4401          break;
4402       }
4403       if (di->bss_present
4404           && di->bss_size > 0
4405           && a >= di->bss_avma && a < di->bss_avma + di->bss_size) {
4406          res = Vg_SectBSS;
4407          break;
4408       }
4409       if (di->sbss_present
4410           && di->sbss_size > 0
4411           && a >= di->sbss_avma && a < di->sbss_avma + di->sbss_size) {
4412          res = Vg_SectBSS;
4413          break;
4414       }
4415       if (di->plt_present
4416           && di->plt_size > 0
4417           && a >= di->plt_avma && a < di->plt_avma + di->plt_size) {
4418          res = Vg_SectPLT;
4419          break;
4420       }
4421       if (di->got_present
4422           && di->got_size > 0
4423           && a >= di->got_avma && a < di->got_avma + di->got_size) {
4424          res = Vg_SectGOT;
4425          break;
4426       }
4427       if (di->gotplt_present
4428           && di->gotplt_size > 0
4429           && a >= di->gotplt_avma && a < di->gotplt_avma + di->gotplt_size) {
4430          res = Vg_SectGOTPLT;
4431          break;
4432       }
4433       if (di->opd_present
4434           && di->opd_size > 0
4435           && a >= di->opd_avma && a < di->opd_avma + di->opd_size) {
4436          res = Vg_SectOPD;
4437          break;
4438       }
4439       /* we could also check for .eh_frame, if anyone really cares */
4440    }
4441 
4442    vg_assert( (di == NULL && res == Vg_SectUnknown)
4443               || (di != NULL && res != Vg_SectUnknown) );
4444 
4445    if (objname) {
4446       if (di && di->fsm.filename) {
4447          *objname = di->fsm.filename;
4448       } else {
4449          *objname = "???";
4450       }
4451    }
4452 
4453    return res;
4454 
4455 }
4456 
4457 static UInt debuginfo_generation = 0;
4458 
VG_(debuginfo_generation)4459 UInt VG_(debuginfo_generation) (void)
4460 {
4461    return debuginfo_generation;
4462 }
4463 
caches__invalidate(void)4464 static void caches__invalidate ( void ) {
4465    cfsi_m_cache__invalidate();
4466    sym_name_cache__invalidate();
4467    debuginfo_generation++;
4468 }
4469 
4470 /*--------------------------------------------------------------------*/
4471 /*--- end                                                          ---*/
4472 /*--------------------------------------------------------------------*/
4473