1
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info. ---*/
5 /*--- storage.c ---*/
6 /*--------------------------------------------------------------------*/
7
8 /*
9 This file is part of Valgrind, a dynamic binary instrumentation
10 framework.
11
12 Copyright (C) 2000-2010 Julian Seward
13 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31 */
32
33 /* This file manages the data structures built by the debuginfo
34 system. These are: the top level SegInfo list. For each SegInfo,
35 there are tables for for address-to-symbol mappings,
36 address-to-src-file/line mappings, and address-to-CFI-info
37 mappings.
38 */
39
40 #include "pub_core_basics.h"
41 #include "pub_core_options.h" /* VG_(clo_verbosity) */
42 #include "pub_core_debuginfo.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcprint.h"
46 #include "pub_core_xarray.h"
47 #include "pub_core_oset.h"
48
49 #include "priv_misc.h" /* dinfo_zalloc/free/strdup */
50 #include "priv_d3basics.h" /* ML_(pp_GX) */
51 #include "priv_tytypes.h"
52 #include "priv_storage.h" /* self */
53
54
55 /*------------------------------------------------------------*/
56 /*--- Misc (printing, errors) ---*/
57 /*------------------------------------------------------------*/
58
59 /* Show a non-fatal debug info reading error. Use vg_panic if
60 terminal. 'serious' errors are shown regardless of the
61 verbosity setting. */
ML_(symerr)62 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg )
63 {
64 /* XML mode hides everything :-( */
65 if (VG_(clo_xml))
66 return;
67
68 if (serious) {
69
70 VG_(message)(Vg_DebugMsg, "WARNING: Serious error when "
71 "reading debug info\n");
72 if (True || VG_(clo_verbosity) < 2) {
73 /* Need to show what the file name is, at verbosity levels 2
74 or below, since that won't already have been shown */
75 VG_(message)(Vg_DebugMsg,
76 "When reading debug info from %s:\n",
77 (di && di->filename) ? di->filename : (UChar*)"???");
78 }
79 VG_(message)(Vg_DebugMsg, "%s\n", msg);
80
81 } else { /* !serious */
82
83 if (VG_(clo_verbosity) >= 2)
84 VG_(message)(Vg_DebugMsg, "%s\n", msg);
85
86 }
87 }
88
89
90 /* Print a symbol. */
ML_(ppSym)91 void ML_(ppSym) ( Int idx, DiSym* sym )
92 {
93 VG_(printf)( "%5d: %#8lx .. %#8lx (%d) %s\n",
94 idx,
95 sym->addr,
96 sym->addr + sym->size - 1, sym->size,
97 sym->name );
98 }
99
100 /* Print a call-frame-info summary. */
ML_(ppDiCfSI)101 void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si )
102 {
103 # define SHOW_HOW(_how, _off) \
104 do { \
105 if (_how == CFIR_UNKNOWN) { \
106 VG_(printf)("Unknown"); \
107 } else \
108 if (_how == CFIR_SAME) { \
109 VG_(printf)("Same"); \
110 } else \
111 if (_how == CFIR_CFAREL) { \
112 VG_(printf)("cfa+%d", _off); \
113 } else \
114 if (_how == CFIR_MEMCFAREL) { \
115 VG_(printf)("*(cfa+%d)", _off); \
116 } else \
117 if (_how == CFIR_EXPR) { \
118 VG_(printf)("{"); \
119 ML_(ppCfiExpr)(exprs, _off); \
120 VG_(printf)("}"); \
121 } else { \
122 vg_assert(0+0); \
123 } \
124 } while (0)
125
126 VG_(printf)("[%#lx .. %#lx]: ", si->base,
127 si->base + (UWord)si->len - 1);
128 switch (si->cfa_how) {
129 case CFIC_IA_SPREL:
130 VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
131 break;
132 case CFIC_IA_BPREL:
133 VG_(printf)("let cfa=oldBP+%d", si->cfa_off);
134 break;
135 case CFIC_ARM_R13REL:
136 VG_(printf)("let cfa=oldR13+%d", si->cfa_off);
137 break;
138 case CFIC_ARM_R12REL:
139 VG_(printf)("let cfa=oldR12+%d", si->cfa_off);
140 break;
141 case CFIC_ARM_R11REL:
142 VG_(printf)("let cfa=oldR11+%d", si->cfa_off);
143 break;
144 case CFIC_ARM_R7REL:
145 VG_(printf)("let cfa=oldR7+%d", si->cfa_off);
146 break;
147 case CFIC_EXPR:
148 VG_(printf)("let cfa={");
149 ML_(ppCfiExpr)(exprs, si->cfa_off);
150 VG_(printf)("}");
151 break;
152 default:
153 vg_assert(0);
154 }
155
156 VG_(printf)(" in RA=");
157 SHOW_HOW(si->ra_how, si->ra_off);
158 # if defined(VGA_x86) || defined(VGA_amd64)
159 VG_(printf)(" SP=");
160 SHOW_HOW(si->sp_how, si->sp_off);
161 VG_(printf)(" BP=");
162 SHOW_HOW(si->bp_how, si->bp_off);
163 # elif defined(VGA_arm)
164 VG_(printf)(" R14=");
165 SHOW_HOW(si->r14_how, si->r14_off);
166 VG_(printf)(" R13=");
167 SHOW_HOW(si->r13_how, si->r13_off);
168 VG_(printf)(" R12=");
169 SHOW_HOW(si->r12_how, si->r12_off);
170 VG_(printf)(" R11=");
171 SHOW_HOW(si->r11_how, si->r11_off);
172 VG_(printf)(" R7=");
173 SHOW_HOW(si->r7_how, si->r7_off);
174 # elif defined(VGA_ppc32) || defined(VGA_ppc64)
175 # else
176 # error "Unknown arch"
177 # endif
178 VG_(printf)("\n");
179 # undef SHOW_HOW
180 }
181
182
183 /*------------------------------------------------------------*/
184 /*--- Adding stuff ---*/
185 /*------------------------------------------------------------*/
186
187 /* Add a str to the string table, including terminating zero, and
188 return pointer to the string in vg_strtab. Unless it's been seen
189 recently, in which case we find the old pointer and return that.
190 This avoids the most egregious duplications.
191
192 JSGF: changed from returning an index to a pointer, and changed to
193 a chunking memory allocator rather than reallocating, so the
194 pointers are stable.
195 */
ML_(addStr)196 UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len )
197 {
198 struct strchunk *chunk;
199 Int space_needed;
200 UChar* p;
201
202 if (len == -1) {
203 len = VG_(strlen)(str);
204 } else {
205 vg_assert(len >= 0);
206 }
207
208 space_needed = 1 + len;
209
210 // Allocate a new strtab chunk if necessary
211 if (di->strchunks == NULL ||
212 (di->strchunks->strtab_used
213 + space_needed) > SEGINFO_STRCHUNKSIZE) {
214 chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk));
215 chunk->strtab_used = 0;
216 chunk->next = di->strchunks;
217 di->strchunks = chunk;
218 }
219 chunk = di->strchunks;
220
221 p = &chunk->strtab[chunk->strtab_used];
222 VG_(memcpy)(p, str, len);
223 chunk->strtab[chunk->strtab_used+len] = '\0';
224 chunk->strtab_used += space_needed;
225
226 return p;
227 }
228
229
230 /* Add a symbol to the symbol table.
231 */
ML_(addSym)232 void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym )
233 {
234 UInt new_sz, i;
235 DiSym* new_tab;
236
237 /* Ignore zero-sized syms. */
238 if (sym->size == 0) return;
239
240 if (di->symtab_used == di->symtab_size) {
241 new_sz = 2 * di->symtab_size;
242 if (new_sz == 0) new_sz = 500;
243 new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1",
244 new_sz * sizeof(DiSym) );
245 if (di->symtab != NULL) {
246 for (i = 0; i < di->symtab_used; i++)
247 new_tab[i] = di->symtab[i];
248 ML_(dinfo_free)(di->symtab);
249 }
250 di->symtab = new_tab;
251 di->symtab_size = new_sz;
252 }
253
254 di->symtab[di->symtab_used] = *sym;
255 di->symtab_used++;
256 vg_assert(di->symtab_used <= di->symtab_size);
257 }
258
259
260 /* Resize the symbol table to save memory.
261 */
ML_(shrinkSym)262 void ML_(shrinkSym)( struct _DebugInfo* di )
263 {
264 DiSym* new_tab;
265 UInt new_sz = di->symtab_used;
266 if (new_sz == di->symtab_size) return;
267
268 new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkSym",
269 new_sz * sizeof(DiSym) );
270 VG_(memcpy)(new_tab, di->symtab, new_sz * sizeof(DiSym));
271
272 ML_(dinfo_free)(di->symtab);
273 di->symtab = new_tab;
274 di->symtab_size = new_sz;
275 }
276
277
278 /* Add a location to the location table.
279 */
addLoc(struct _DebugInfo * di,DiLoc * loc)280 static void addLoc ( struct _DebugInfo* di, DiLoc* loc )
281 {
282 UInt new_sz, i;
283 DiLoc* new_tab;
284
285 /* Zero-sized locs should have been ignored earlier */
286 vg_assert(loc->size > 0);
287
288 if (di->loctab_used == di->loctab_size) {
289 new_sz = 2 * di->loctab_size;
290 if (new_sz == 0) new_sz = 500;
291 new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
292 new_sz * sizeof(DiLoc) );
293 if (di->loctab != NULL) {
294 for (i = 0; i < di->loctab_used; i++)
295 new_tab[i] = di->loctab[i];
296 ML_(dinfo_free)(di->loctab);
297 }
298 di->loctab = new_tab;
299 di->loctab_size = new_sz;
300 }
301
302 di->loctab[di->loctab_used] = *loc;
303 di->loctab_used++;
304 vg_assert(di->loctab_used <= di->loctab_size);
305 }
306
307
308 /* Resize the lineinfo table to save memory.
309 */
ML_(shrinkLineInfo)310 void ML_(shrinkLineInfo)( struct _DebugInfo* di )
311 {
312 DiLoc* new_tab;
313 UInt new_sz = di->loctab_used;
314 if (new_sz == di->loctab_size) return;
315
316 new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLineInfo",
317 new_sz * sizeof(DiLoc) );
318 VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc));
319
320 ML_(dinfo_free)(di->loctab);
321 di->loctab = new_tab;
322 di->loctab_size = new_sz;
323 }
324
325
326 /* Top-level place to call to add a source-location mapping entry.
327 */
ML_(addLineInfo)328 void ML_(addLineInfo) ( struct _DebugInfo* di,
329 UChar* filename,
330 UChar* dirname, /* NULL == directory is unknown */
331 Addr this,
332 Addr next,
333 Int lineno,
334 Int entry /* only needed for debug printing */
335 )
336 {
337 static const Bool debug = False;
338 DiLoc loc;
339 Int size = next - this;
340
341 /* Ignore zero-sized locs */
342 if (this == next) return;
343
344 if (debug)
345 VG_(printf)( " src %s %s line %d %#lx-%#lx\n",
346 dirname ? dirname : (UChar*)"(unknown)",
347 filename, lineno, this, next );
348
349 /* Maximum sanity checking. Some versions of GNU as do a shabby
350 * job with stabs entries; if anything looks suspicious, revert to
351 * a size of 1. This should catch the instruction of interest
352 * (since if using asm-level debug info, one instruction will
353 * correspond to one line, unlike with C-level debug info where
354 * multiple instructions can map to the one line), but avoid
355 * catching any other instructions bogusly. */
356 if (this > next) {
357 if (VG_(clo_verbosity) > 2) {
358 VG_(message)(Vg_DebugMsg,
359 "warning: line info addresses out of order "
360 "at entry %d: 0x%lx 0x%lx\n", entry, this, next);
361 }
362 size = 1;
363 }
364
365 if (size > MAX_LOC_SIZE) {
366 if (0)
367 VG_(message)(Vg_DebugMsg,
368 "warning: line info address range too large "
369 "at entry %d: %d\n", entry, size);
370 size = 1;
371 }
372
373 /* Rule out ones which are completely outside the r-x mapped area.
374 See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
375 for background and rationale. */
376 vg_assert(di->have_rx_map && di->have_rw_map);
377 if (next-1 < di->rx_map_avma
378 || this >= di->rx_map_avma + di->rx_map_size ) {
379 if (0)
380 VG_(message)(Vg_DebugMsg,
381 "warning: ignoring line info entry falling "
382 "outside current DebugInfo: %#lx %#lx %#lx %#lx\n",
383 di->text_avma,
384 di->text_avma + di->text_size,
385 this, next-1);
386 return;
387 }
388
389 vg_assert(lineno >= 0);
390 if (lineno > MAX_LINENO) {
391 static Bool complained = False;
392 if (!complained) {
393 complained = True;
394 VG_(message)(Vg_UserMsg,
395 "warning: ignoring line info entry with "
396 "huge line number (%d)\n", lineno);
397 VG_(message)(Vg_UserMsg,
398 " Can't handle line numbers "
399 "greater than %d, sorry\n", MAX_LINENO);
400 VG_(message)(Vg_UserMsg,
401 "(Nb: this message is only shown once)\n");
402 }
403 return;
404 }
405
406 loc.addr = this;
407 loc.size = (UShort)size;
408 loc.lineno = lineno;
409 loc.filename = filename;
410 loc.dirname = dirname;
411
412 if (0) VG_(message)(Vg_DebugMsg,
413 "addLoc: addr %#lx, size %d, line %d, file %s\n",
414 this,size,lineno,filename);
415
416 addLoc ( di, &loc );
417 }
418
419
420 /* Top-level place to call to add a CFI summary record. The supplied
421 DiCfSI is copied. */
ML_(addDiCfSI)422 void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig )
423 {
424 static const Bool debug = False;
425 UInt new_sz, i;
426 DiCfSI* new_tab;
427 SSizeT delta;
428
429 /* copy the original, so we can mess with it */
430 DiCfSI cfsi = *cfsi_orig;
431
432 if (debug) {
433 VG_(printf)("adding DiCfSI: ");
434 ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
435 }
436
437 /* sanity */
438 vg_assert(cfsi.len > 0);
439 /* If this fails, the implication is you have a single procedure
440 with more than 5 million bytes of code. Which is pretty
441 unlikely. Either that, or the debuginfo reader is somehow
442 broken. 5 million is of course arbitrary; but it's big enough
443 to be bigger than the size of any plausible piece of code that
444 would fall within a single procedure. */
445 vg_assert(cfsi.len < 5000000);
446
447 vg_assert(di->have_rx_map && di->have_rw_map);
448 /* If we have an empty r-x mapping (is that possible?) then the
449 DiCfSI can't possibly fall inside it. In which case skip. */
450 if (di->rx_map_size == 0)
451 return;
452
453 /* Rule out ones which are completely outside the r-x mapped area.
454 See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
455 for background and rationale. */
456 if (cfsi.base + cfsi.len - 1 < di->rx_map_avma
457 || cfsi.base >= di->rx_map_avma + di->rx_map_size) {
458 static Int complaints = 10;
459 if (VG_(clo_trace_cfi) || complaints > 0) {
460 complaints--;
461 if (VG_(clo_verbosity) > 1) {
462 VG_(message)(
463 Vg_DebugMsg,
464 "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx\n",
465 cfsi.base,
466 cfsi.base + cfsi.len - 1,
467 di->text_avma,
468 di->text_avma + di->text_size - 1
469 );
470 }
471 if (VG_(clo_trace_cfi))
472 ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
473 }
474 return;
475 }
476
477 /* Now we know the range is at least partially inside the r-x
478 mapped area. That implies that at least one of the ends of the
479 range falls inside the area. If necessary, clip it so it is
480 completely within the area. If we don't do this,
481 check_CFSI_related_invariants() in debuginfo.c (invariant #2)
482 will fail. See
483 "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
484 priv_storage.h for background. */
485 if (cfsi.base < di->rx_map_avma) {
486 /* Lower end is outside the mapped area. Hence upper end must
487 be inside it. */
488 if (0) VG_(printf)("XXX truncate lower\n");
489 vg_assert(cfsi.base + cfsi.len - 1 >= di->rx_map_avma);
490 delta = (SSizeT)(di->rx_map_avma - cfsi.base);
491 vg_assert(delta > 0);
492 vg_assert(delta < (SSizeT)cfsi.len);
493 cfsi.base += delta;
494 cfsi.len -= delta;
495 }
496 else
497 if (cfsi.base + cfsi.len - 1 > di->rx_map_avma + di->rx_map_size - 1) {
498 /* Upper end is outside the mapped area. Hence lower end must be
499 inside it. */
500 if (0) VG_(printf)("XXX truncate upper\n");
501 vg_assert(cfsi.base <= di->rx_map_avma + di->rx_map_size - 1);
502 delta = (SSizeT)( (cfsi.base + cfsi.len - 1)
503 - (di->rx_map_avma + di->rx_map_size - 1) );
504 vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len);
505 cfsi.len -= delta;
506 }
507
508 /* Final checks */
509
510 /* Because: either cfsi was entirely inside the range, in which
511 case we asserted that len > 0 at the start, OR it fell partially
512 inside the range, in which case we reduced it by some size
513 (delta) which is < its original size. */
514 vg_assert(cfsi.len > 0);
515
516 /* Similar logic applies for the next two assertions. */
517 vg_assert(cfsi.base >= di->rx_map_avma);
518 vg_assert(cfsi.base + cfsi.len - 1
519 <= di->rx_map_avma + di->rx_map_size - 1);
520
521 if (di->cfsi_used == di->cfsi_size) {
522 new_sz = 2 * di->cfsi_size;
523 if (new_sz == 0) new_sz = 20;
524 new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
525 new_sz * sizeof(DiCfSI) );
526 if (di->cfsi != NULL) {
527 for (i = 0; i < di->cfsi_used; i++)
528 new_tab[i] = di->cfsi[i];
529 ML_(dinfo_free)(di->cfsi);
530 }
531 di->cfsi = new_tab;
532 di->cfsi_size = new_sz;
533 }
534
535 di->cfsi[di->cfsi_used] = cfsi;
536 di->cfsi_used++;
537 vg_assert(di->cfsi_used <= di->cfsi_size);
538 }
539
540
ML_(CfiExpr_Undef)541 Int ML_(CfiExpr_Undef)( XArray* dst )
542 {
543 CfiExpr e;
544 VG_(memset)( &e, 0, sizeof(e) );
545 e.tag = Cex_Undef;
546 return (Int)VG_(addToXA)( dst, &e );
547 }
ML_(CfiExpr_Deref)548 Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr )
549 {
550 CfiExpr e;
551 VG_(memset)( &e, 0, sizeof(e) );
552 e.tag = Cex_Deref;
553 e.Cex.Deref.ixAddr = ixAddr;
554 return (Int)VG_(addToXA)( dst, &e );
555 }
ML_(CfiExpr_Const)556 Int ML_(CfiExpr_Const)( XArray* dst, UWord con )
557 {
558 CfiExpr e;
559 VG_(memset)( &e, 0, sizeof(e) );
560 e.tag = Cex_Const;
561 e.Cex.Const.con = con;
562 return (Int)VG_(addToXA)( dst, &e );
563 }
ML_(CfiExpr_Binop)564 Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR )
565 {
566 CfiExpr e;
567 VG_(memset)( &e, 0, sizeof(e) );
568 e.tag = Cex_Binop;
569 e.Cex.Binop.op = op;
570 e.Cex.Binop.ixL = ixL;
571 e.Cex.Binop.ixR = ixR;
572 return (Int)VG_(addToXA)( dst, &e );
573 }
ML_(CfiExpr_CfiReg)574 Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg )
575 {
576 CfiExpr e;
577 VG_(memset)( &e, 0, sizeof(e) );
578 e.tag = Cex_CfiReg;
579 e.Cex.CfiReg.reg = reg;
580 return (Int)VG_(addToXA)( dst, &e );
581 }
ML_(CfiExpr_DwReg)582 Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg )
583 {
584 CfiExpr e;
585 VG_(memset)( &e, 0, sizeof(e) );
586 e.tag = Cex_DwReg;
587 e.Cex.DwReg.reg = reg;
588 return (Int)VG_(addToXA)( dst, &e );
589 }
590
ppCfiOp(CfiOp op)591 static void ppCfiOp ( CfiOp op )
592 {
593 switch (op) {
594 case Cop_Add: VG_(printf)("+"); break;
595 case Cop_Sub: VG_(printf)("-"); break;
596 case Cop_And: VG_(printf)("&"); break;
597 case Cop_Mul: VG_(printf)("*"); break;
598 default: vg_assert(0);
599 }
600 }
601
ppCfiReg(CfiReg reg)602 static void ppCfiReg ( CfiReg reg )
603 {
604 switch (reg) {
605 case Creg_IA_SP: VG_(printf)("xSP"); break;
606 case Creg_IA_BP: VG_(printf)("xBP"); break;
607 case Creg_IA_IP: VG_(printf)("xIP"); break;
608 case Creg_ARM_R13: VG_(printf)("R13"); break;
609 case Creg_ARM_R12: VG_(printf)("R12"); break;
610 case Creg_ARM_R15: VG_(printf)("R15"); break;
611 case Creg_ARM_R14: VG_(printf)("R14"); break;
612 default: vg_assert(0);
613 }
614 }
615
ML_(ppCfiExpr)616 void ML_(ppCfiExpr)( XArray* src, Int ix )
617 {
618 /* VG_(indexXA) checks for invalid src/ix values, so we can
619 use it indiscriminately. */
620 CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix );
621 switch (e->tag) {
622 case Cex_Undef:
623 VG_(printf)("Undef");
624 break;
625 case Cex_Deref:
626 VG_(printf)("*(");
627 ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr);
628 VG_(printf)(")");
629 break;
630 case Cex_Const:
631 VG_(printf)("0x%lx", e->Cex.Const.con);
632 break;
633 case Cex_Binop:
634 VG_(printf)("(");
635 ML_(ppCfiExpr)(src, e->Cex.Binop.ixL);
636 VG_(printf)(")");
637 ppCfiOp(e->Cex.Binop.op);
638 VG_(printf)("(");
639 ML_(ppCfiExpr)(src, e->Cex.Binop.ixR);
640 VG_(printf)(")");
641 break;
642 case Cex_CfiReg:
643 ppCfiReg(e->Cex.CfiReg.reg);
644 break;
645 case Cex_DwReg:
646 VG_(printf)("dwr%d", e->Cex.DwReg.reg);
647 break;
648 default:
649 VG_(core_panic)("ML_(ppCfiExpr)");
650 /*NOTREACHED*/
651 break;
652 }
653 }
654
655
ML_(cmp_for_DiAddrRange_range)656 Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV,
657 const void* elemV ) {
658 const Addr* key = (const Addr*)keyV;
659 const DiAddrRange* elem = (const DiAddrRange*)elemV;
660 if (0)
661 VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n",
662 *key, elem->aMin);
663 if ((*key) < elem->aMin) return -1;
664 if ((*key) > elem->aMax) return 1;
665 return 0;
666 }
667
668 static
show_scope(OSet * scope,HChar * who)669 void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who )
670 {
671 DiAddrRange* range;
672 VG_(printf)("Scope \"%s\" = {\n", who);
673 VG_(OSetGen_ResetIter)( scope );
674 while (True) {
675 range = VG_(OSetGen_Next)( scope );
676 if (!range) break;
677 VG_(printf)(" %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax,
678 range->vars ? VG_(sizeXA)(range->vars) : 0);
679 }
680 VG_(printf)("}\n");
681 }
682
683 /* Add the variable 'var' to 'scope' for the address range [aMin,aMax]
684 (inclusive of aMin and aMax). Split existing ranges as required if
685 aMin or aMax or both don't match existing range boundaries, and add
686 'var' to all required ranges. Take great care to preserve the
687 invariant that the ranges in 'scope' cover the entire address range
688 exactly once, with no overlaps and no holes. */
add_var_to_arange(OSet * scope,Addr aMin,Addr aMax,DiVariable * var)689 static void add_var_to_arange (
690 /*MOD*/OSet* /* of DiAddrRange */ scope,
691 Addr aMin,
692 Addr aMax,
693 DiVariable* var
694 )
695 {
696 DiAddrRange *first, *last, *range;
697 /* These xx variables are for assertion checking only; they don't
698 contribute anything to the actual work of this function. */
699 DiAddrRange *xxRangep, *xxFirst, *xxLast;
700 UWord xxIters;
701
702 vg_assert(aMin <= aMax);
703
704 if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax);
705 if (0) show_scope( scope, "add_var_to_arange(1)" );
706
707 /* See if the lower end of the range (aMin) falls exactly on an
708 existing range boundary. If not, find the range it does fall
709 into, and split it (copying the variables in the process), so
710 that aMin does exactly fall on a range boundary. */
711 first = VG_(OSetGen_Lookup)( scope, &aMin );
712 /* It must be present, since the presented OSet must cover
713 the entire address range. */
714 vg_assert(first);
715 vg_assert(first->aMin <= first->aMax);
716 vg_assert(first->aMin <= aMin && aMin <= first->aMax);
717
718 /* Fast track common case, which is that the range specified for
719 the variable exactly coincides with one already-existing
720 range. */
721 if (first->aMin == aMin && first->aMax == aMax) {
722 vg_assert(first->vars);
723 VG_(addToXA)( first->vars, var );
724 return;
725 }
726
727 /* We have to get into splitting ranges, which is complex
728 and slow. */
729 if (first->aMin < aMin) {
730 DiAddrRange* nyu;
731 /* Ok. We'll have to split 'first'. */
732 /* truncate the upper end of 'first' */
733 Addr tmp = first->aMax;
734 first->aMax = aMin-1;
735 vg_assert(first->aMin <= first->aMax);
736 /* create a new range */
737 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
738 vg_assert(nyu);
739 nyu->aMin = aMin;
740 nyu->aMax = tmp;
741 vg_assert(nyu->aMin <= nyu->aMax);
742 /* copy vars into it */
743 vg_assert(first->vars);
744 nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
745 vg_assert(nyu->vars);
746 VG_(OSetGen_Insert)( scope, nyu );
747 first = nyu;
748 }
749
750 vg_assert(first->aMin == aMin);
751
752 /* Now do exactly the same for the upper end (aMax): if it doesn't
753 fall on a boundary, cause it to do so by splitting the range it
754 does currently fall into. */
755 last = VG_(OSetGen_Lookup)( scope, &aMax );
756 vg_assert(last->aMin <= last->aMax);
757 vg_assert(last->aMin <= aMax && aMax <= last->aMax);
758
759 if (aMax < last->aMax) {
760 DiAddrRange* nyu;
761 /* We have to split 'last'. */
762 /* truncate the lower end of 'last' */
763 Addr tmp = last->aMin;
764 last->aMin = aMax+1;
765 vg_assert(last->aMin <= last->aMax);
766 /* create a new range */
767 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
768 vg_assert(nyu);
769 nyu->aMin = tmp;
770 nyu->aMax = aMax;
771 vg_assert(nyu->aMin <= nyu->aMax);
772 /* copy vars into it */
773 vg_assert(last->vars);
774 nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
775 vg_assert(nyu->vars);
776 VG_(OSetGen_Insert)( scope, nyu );
777 last = nyu;
778 }
779
780 vg_assert(aMax == last->aMax);
781
782 xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin);
783 xxLast = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax);
784 vg_assert(xxFirst);
785 vg_assert(xxLast);
786 vg_assert(xxFirst->aMin == aMin);
787 vg_assert(xxLast->aMax == aMax);
788 if (xxFirst != xxLast)
789 vg_assert(xxFirst->aMax < xxLast->aMin);
790
791 /* Great. Now we merely need to iterate over the segments from
792 'first' to 'last' inclusive, and add 'var' to the variable set
793 of each of them. */
794 if (0) {
795 static UWord ctr = 0;
796 ctr++;
797 VG_(printf)("ctr = %lu\n", ctr);
798 if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" );
799 }
800
801 xxIters = 0;
802 range = xxRangep = NULL;
803 VG_(OSetGen_ResetIterAt)( scope, &aMin );
804 while (True) {
805 xxRangep = range;
806 range = VG_(OSetGen_Next)( scope );
807 if (!range) break;
808 if (range->aMin > aMax) break;
809 xxIters++;
810 if (0) VG_(printf)("have range %#lx %#lx\n",
811 range->aMin, range->aMax);
812
813 /* Sanity checks */
814 if (!xxRangep) {
815 /* This is the first in the range */
816 vg_assert(range->aMin == aMin);
817 } else {
818 vg_assert(xxRangep->aMax + 1 == range->aMin);
819 }
820
821 vg_assert(range->vars);
822 VG_(addToXA)( range->vars, var );
823 }
824 /* Done. We should have seen at least one range. */
825 vg_assert(xxIters >= 1);
826 if (xxIters == 1) vg_assert(xxFirst == xxLast);
827 if (xxFirst == xxLast) vg_assert(xxIters == 1);
828 vg_assert(xxRangep);
829 vg_assert(xxRangep->aMax == aMax);
830 vg_assert(xxRangep == xxLast);
831 }
832
833
834 /* Top-level place to call to add a variable description (as extracted
835 from a DWARF3 .debug_info section. */
ML_(addVar)836 void ML_(addVar)( struct _DebugInfo* di,
837 Int level,
838 Addr aMin,
839 Addr aMax,
840 UChar* name, /* in di's .strchunks */
841 UWord typeR, /* a cuOff */
842 GExpr* gexpr,
843 GExpr* fbGX,
844 UChar* fileName, /* where decl'd - may be NULL.
845 in di's .strchunks */
846 Int lineNo, /* where decl'd - may be zero */
847 Bool show )
848 {
849 OSet* /* of DiAddrRange */ scope;
850 DiVariable var;
851 Bool all;
852 TyEnt* ent;
853 MaybeULong mul;
854 HChar* badness;
855
856 tl_assert(di && di->admin_tyents);
857
858 if (0) {
859 VG_(printf)(" ML_(addVar): level %d %#lx-%#lx %s :: ",
860 level, aMin, aMax, name );
861 ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
862 VG_(printf)("\n Var=");
863 ML_(pp_GX)(gexpr);
864 VG_(printf)("\n");
865 if (fbGX) {
866 VG_(printf)(" FrB=");
867 ML_(pp_GX)( fbGX );
868 VG_(printf)("\n");
869 } else {
870 VG_(printf)(" FrB=none\n");
871 }
872 VG_(printf)("\n");
873 }
874
875 vg_assert(level >= 0);
876 vg_assert(aMin <= aMax);
877 vg_assert(name);
878 vg_assert(gexpr);
879
880 ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
881 tl_assert(ent);
882 vg_assert(ML_(TyEnt__is_type)(ent));
883
884 /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
885 ----------------------------------------------------------------
886 Ignore any variables whose aMin .. aMax (that is, range of text
887 addresses for which they actually exist) falls outside the text
888 segment. Is this indicative of a bug in the reader? Maybe.
889 (LATER): instead of restricting strictly to the .text segment,
890 be a bit more relaxed, and accept any variable whose text range
891 falls inside the r-x mapped area. This is useful because .text
892 is not always the only instruction-carrying segment: others are:
893 .init .plt __libc_freeres_fn and .fini. This implicitly assumes
894 that those extra sections have the same bias as .text, but that
895 seems a reasonable assumption to me. */
896 /* This is assured us by top level steering logic in debuginfo.c,
897 and it is re-checked at the start of
898 ML_(read_elf_debug_info). */
899 vg_assert(di->have_rx_map && di->have_rw_map);
900 if (level > 0
901 && (aMax < di->rx_map_avma
902 || aMin >= di->rx_map_avma + di->rx_map_size)) {
903 if (VG_(clo_verbosity) >= 0) {
904 VG_(message)(Vg_DebugMsg,
905 "warning: addVar: in range %#lx .. %#lx outside "
906 "segment %#lx .. %#lx (%s)\n",
907 aMin, aMax,
908 di->text_avma, di->text_avma + di->text_size -1,
909 name
910 );
911 }
912 return;
913 }
914
915 /* If the type's size is zero (which can mean unknown size), ignore
916 it. We will never be able to actually relate a data address to
917 a data object with zero size, so there's no point in storing
918 info on it. On 32-bit platforms, also reject types whose size
919 is 2^32 bytes or large. (It's amazing what junk shows up ..) */
920 mul = ML_(sizeOfType)(di->admin_tyents, typeR);
921
922 badness = NULL;
923 if (mul.b != True)
924 badness = "unknown size";
925 else if (mul.ul == 0)
926 badness = "zero size ";
927 else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32))
928 badness = "implausibly large";
929
930 if (badness) {
931 static Int complaints = 10;
932 if (VG_(clo_verbosity) >= 2 && complaints > 0) {
933 VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n",
934 badness, name );
935 complaints--;
936 }
937 return;
938 }
939
940 if (!di->varinfo) {
941 di->varinfo = VG_(newXA)( ML_(dinfo_zalloc),
942 "di.storage.addVar.1",
943 ML_(dinfo_free),
944 sizeof(OSet*) );
945 }
946
947 vg_assert(level < 256); /* arbitrary; stay sane */
948 /* Expand the top level array enough to map this level */
949 while ( VG_(sizeXA)(di->varinfo) <= level ) {
950 DiAddrRange* nyu;
951 scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin),
952 ML_(cmp_for_DiAddrRange_range),
953 ML_(dinfo_zalloc), "di.storage.addVar.2",
954 ML_(dinfo_free) );
955 vg_assert(scope);
956 if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
957 scope, VG_(sizeXA)(di->varinfo));
958 VG_(addToXA)( di->varinfo, &scope );
959 /* Add a single range covering the entire address space. At
960 level 0 we require this doesn't get split. At levels above 0
961 we require that any additions to it cause it to get split.
962 All of these invariants get checked both add_var_to_arange
963 and after reading is complete, in canonicaliseVarInfo. */
964 nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
965 vg_assert(nyu);
966 nyu->aMin = (Addr)0;
967 nyu->aMax = ~(Addr)0;
968 nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
969 ML_(dinfo_free),
970 sizeof(DiVariable) );
971 vg_assert(nyu->vars);
972 VG_(OSetGen_Insert)( scope, nyu );
973 }
974
975 vg_assert( VG_(sizeXA)(di->varinfo) > level );
976 scope = *(OSet**)VG_(indexXA)( di->varinfo, level );
977 vg_assert(scope);
978
979 var.name = name;
980 var.typeR = typeR;
981 var.gexpr = gexpr;
982 var.fbGX = fbGX;
983 var.fileName = fileName;
984 var.lineNo = lineNo;
985
986 all = aMin == (Addr)0 && aMax == ~(Addr)0;
987 vg_assert(level == 0 ? all : !all);
988
989 add_var_to_arange( /*MOD*/scope, aMin, aMax, &var );
990 }
991
992
993 /* This really just checks the constructed data structure, as there is
994 no canonicalisation to do. */
canonicaliseVarInfo(struct _DebugInfo * di)995 static void canonicaliseVarInfo ( struct _DebugInfo* di )
996 {
997 Word i, nInThisScope;
998
999 if (!di->varinfo)
1000 return;
1001
1002 for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
1003
1004 DiAddrRange *range, *rangep;
1005 OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
1006 if (!scope) continue;
1007
1008 /* Deal with the global-scope case. */
1009 if (i == 0) {
1010 Addr zero = 0;
1011 vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1012 range = VG_(OSetGen_Lookup)( scope, &zero );
1013 vg_assert(range);
1014 vg_assert(range->aMin == (Addr)0);
1015 vg_assert(range->aMax == ~(Addr)0);
1016 continue;
1017 }
1018
1019 /* All the rest of this is for the local-scope case. */
1020 /* iterate over all entries in 'scope' */
1021 nInThisScope = 0;
1022 rangep = NULL;
1023 VG_(OSetGen_ResetIter)(scope);
1024 while (True) {
1025 range = VG_(OSetGen_Next)(scope);
1026 if (!range) {
1027 /* We just saw the last one. There must have been at
1028 least one entry in the range. */
1029 vg_assert(rangep);
1030 vg_assert(rangep->aMax == ~(Addr)0);
1031 break;
1032 }
1033
1034 vg_assert(range->aMin <= range->aMax);
1035 vg_assert(range->vars);
1036
1037 if (!rangep) {
1038 /* This is the first entry in the range. */
1039 vg_assert(range->aMin == 0);
1040 } else {
1041 vg_assert(rangep->aMax + 1 == range->aMin);
1042 }
1043
1044 rangep = range;
1045 nInThisScope++;
1046 } /* iterating over ranges in a given scope */
1047
1048 /* If there's only one entry in this (local) scope, it must
1049 cover the entire address space (obviously), but it must not
1050 contain any vars. */
1051
1052 vg_assert(nInThisScope > 0);
1053 if (nInThisScope == 1) {
1054 Addr zero = 0;
1055 vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1056 range = VG_(OSetGen_Lookup)( scope, &zero );
1057 vg_assert(range);
1058 vg_assert(range->aMin == (Addr)0);
1059 vg_assert(range->aMax == ~(Addr)0);
1060 vg_assert(range->vars);
1061 vg_assert(VG_(sizeXA)(range->vars) == 0);
1062 }
1063
1064 } /* iterate over scopes */
1065 }
1066
1067
1068 /*------------------------------------------------------------*/
1069 /*--- Canonicalisers ---*/
1070 /*------------------------------------------------------------*/
1071
1072 /* Sort the symtab by starting address, and emit warnings if any
1073 symbols have overlapping address ranges. We use that old chestnut,
1074 shellsort. Mash the table around so as to establish the property
1075 that addresses are in order and the ranges to not overlap. This
1076 facilitates using binary search to map addresses to symbols when we
1077 come to query the table.
1078 */
compare_DiSym(void * va,void * vb)1079 static Int compare_DiSym ( void* va, void* vb )
1080 {
1081 DiSym* a = (DiSym*)va;
1082 DiSym* b = (DiSym*)vb;
1083 if (a->addr < b->addr) return -1;
1084 if (a->addr > b->addr) return 1;
1085 return 0;
1086 }
1087
1088
1089 /* Two symbols have the same address. Which name do we prefer? In order:
1090
1091 - Prefer "PMPI_<foo>" over "MPI_<foo>".
1092
1093 - Else, prefer a non-NULL name over a NULL one.
1094
1095 - Else, prefer a non-whitespace name over an all-whitespace name.
1096
1097 - Else, prefer the shorter symbol name. If the symbol contains a
1098 version symbol ('@' on Linux, other platforms may differ), which means it
1099 is versioned, then the length up to the version symbol is used for length
1100 comparison purposes (so "foo@GLIBC_2.4.2" is considered shorter than
1101 "foobar").
1102
1103 - Else, if two symbols have the same length, prefer a versioned symbol over
1104 a non-versioned symbol.
1105
1106 - Else, use alphabetical ordering.
1107
1108 - Otherwise, they must be the same; use the symbol with the lower address.
1109
1110 Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are
1111 aliases in glibc, we choose the 'bcmp' symbol because it's shorter,
1112 so we can misdescribe memcmp() as bcmp()). This is hard to avoid.
1113 It's mentioned in the FAQ file.
1114 */
prefersym(struct _DebugInfo * di,DiSym * a,DiSym * b)1115 static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b )
1116 {
1117 Word cmp;
1118 Word vlena, vlenb; /* length without version */
1119 const UChar *vpa, *vpb;
1120
1121 Bool preferA = False;
1122 Bool preferB = False;
1123
1124 vg_assert(a->addr == b->addr);
1125
1126 vlena = VG_(strlen)(a->name);
1127 vlenb = VG_(strlen)(b->name);
1128
1129 #if defined(VGO_linux) || defined(VGO_aix5)
1130 # define VERSION_CHAR '@'
1131 #elif defined(VGO_darwin)
1132 # define VERSION_CHAR '$'
1133 #else
1134 # error Unknown OS
1135 #endif
1136
1137 vpa = VG_(strchr)(a->name, VERSION_CHAR);
1138 vpb = VG_(strchr)(b->name, VERSION_CHAR);
1139
1140 if (vpa)
1141 vlena = vpa - a->name;
1142 if (vpb)
1143 vlenb = vpb - b->name;
1144
1145 /* MPI hack: prefer PMPI_Foo over MPI_Foo */
1146 if (0==VG_(strncmp)(a->name, "MPI_", 4)
1147 && 0==VG_(strncmp)(b->name, "PMPI_", 5)
1148 && 0==VG_(strcmp)(a->name, 1+b->name)) {
1149 preferB = True; goto out;
1150 }
1151 if (0==VG_(strncmp)(b->name, "MPI_", 4)
1152 && 0==VG_(strncmp)(a->name, "PMPI_", 5)
1153 && 0==VG_(strcmp)(b->name, 1+a->name)) {
1154 preferA = True; goto out;
1155 }
1156
1157 /* Prefer non-empty name. */
1158 if (vlena && !vlenb) {
1159 preferA = True; goto out;
1160 }
1161 if (vlenb && !vlena) {
1162 preferB = True; goto out;
1163 }
1164
1165 /* Prefer non-whitespace name. */
1166 {
1167 Bool blankA = True;
1168 Bool blankB = True;
1169 Char *s;
1170 s = a->name;
1171 while (*s) {
1172 if (!VG_(isspace)(*s++)) {
1173 blankA = False;
1174 break;
1175 }
1176 }
1177 s = b->name;
1178 while (*s) {
1179 if (!VG_(isspace)(*s++)) {
1180 blankB = False;
1181 break;
1182 }
1183 }
1184
1185 if (!blankA && blankB) {
1186 preferA = True; goto out;
1187 }
1188 if (!blankB && blankA) {
1189 preferB = True; goto out;
1190 }
1191 }
1192
1193 /* Select the shortest unversioned name */
1194 if (vlena < vlenb) {
1195 preferA = True; goto out;
1196 }
1197 if (vlenb < vlena) {
1198 preferB = True; goto out;
1199 }
1200
1201 /* Equal lengths; select the versioned name */
1202 if (vpa && !vpb) {
1203 preferA = True; goto out;
1204 }
1205 if (vpb && !vpa) {
1206 preferB = True; goto out;
1207 }
1208
1209 /* Either both versioned or neither is versioned; select them
1210 alphabetically */
1211 cmp = VG_(strcmp)(a->name, b->name);
1212 if (cmp < 0) {
1213 preferA = True; goto out;
1214 }
1215 if (cmp > 0) {
1216 preferB = True; goto out;
1217 }
1218
1219 /* If we get here, they are the same name. */
1220
1221 /* In this case we could choose either (arbitrarily), but might as
1222 well choose the one with the lowest DiSym* address, so as to try
1223 and make the comparison mechanism more stable (a la sorting
1224 parlance). Also, skip the diagnostic printing in this case. */
1225 return a <= b ? a : b;
1226
1227 /*NOTREACHED*/
1228 vg_assert(0);
1229 out:
1230 if (preferA && !preferB) {
1231 TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1232 a->addr, a->name, b->name );
1233 return a;
1234 }
1235 if (preferB && !preferA) {
1236 TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1237 b->addr, b->name, a->name );
1238 return b;
1239 }
1240 /*NOTREACHED*/
1241 vg_assert(0);
1242 }
1243
canonicaliseSymtab(struct _DebugInfo * di)1244 static void canonicaliseSymtab ( struct _DebugInfo* di )
1245 {
1246 Word i, j, n_merged, n_truncated;
1247 Addr s1, s2, e1, e2, p1, p2;
1248 UChar *n1, *n2;
1249 Bool t1, t2, f1, f2;
1250
1251 # define SWAP(ty,aa,bb) \
1252 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
1253
1254 if (di->symtab_used == 0)
1255 return;
1256
1257 VG_(ssort)(di->symtab, di->symtab_used,
1258 sizeof(*di->symtab), compare_DiSym);
1259
1260 cleanup_more:
1261
1262 /* If two symbols have identical address ranges, we pick one
1263 using prefersym() (see it for details). */
1264 do {
1265 n_merged = 0;
1266 j = di->symtab_used;
1267 di->symtab_used = 0;
1268 for (i = 0; i < j; i++) {
1269 if (i < j-1
1270 && di->symtab[i].addr == di->symtab[i+1].addr
1271 && di->symtab[i].size == di->symtab[i+1].size
1272 ) {
1273 n_merged++;
1274 /* merge the two into one */
1275 di->symtab[di->symtab_used++]
1276 = *prefersym(di, &di->symtab[i], &di->symtab[i+1]);
1277 i++;
1278 } else {
1279 di->symtab[di->symtab_used++] = di->symtab[i];
1280 }
1281 }
1282 TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
1283 }
1284 while (n_merged > 0);
1285
1286 /* Detect and "fix" overlapping address ranges. */
1287 n_truncated = 0;
1288
1289 for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
1290
1291 vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr);
1292
1293 /* Check for common (no overlap) case. */
1294 if (di->symtab[i].addr + di->symtab[i].size
1295 <= di->symtab[i+1].addr)
1296 continue;
1297
1298 /* There's an overlap. Truncate one or the other. */
1299 if (di->trace_symtab) {
1300 VG_(printf)("overlapping address ranges in symbol table\n\t");
1301 ML_(ppSym)( i, &di->symtab[i] );
1302 VG_(printf)("\t");
1303 ML_(ppSym)( i+1, &di->symtab[i+1] );
1304 VG_(printf)("\n");
1305 }
1306
1307 /* Truncate one or the other. */
1308 s1 = di->symtab[i].addr;
1309 e1 = s1 + di->symtab[i].size - 1;
1310 p1 = di->symtab[i].tocptr;
1311 n1 = di->symtab[i].name;
1312 t1 = di->symtab[i].isText;
1313 f1 = di->symtab[i].isIFunc;
1314 s2 = di->symtab[i+1].addr;
1315 e2 = s2 + di->symtab[i+1].size - 1;
1316 p2 = di->symtab[i+1].tocptr;
1317 n2 = di->symtab[i+1].name;
1318 t2 = di->symtab[i+1].isText;
1319 f2 = di->symtab[i+1].isIFunc;
1320 if (s1 < s2) {
1321 e1 = s2-1;
1322 } else {
1323 vg_assert(s1 == s2);
1324 if (e1 > e2) {
1325 s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); SWAP(Addr,p1,p2);
1326 SWAP(UChar *,n1,n2); SWAP(Bool,t1,t2);
1327 } else
1328 if (e1 < e2) {
1329 s2 = e1+1;
1330 } else {
1331 /* e1 == e2. Identical addr ranges. We'll eventually wind
1332 up back at cleanup_more, which will take care of it. */
1333 }
1334 }
1335 di->symtab[i].addr = s1;
1336 di->symtab[i].size = e1 - s1 + 1;
1337 di->symtab[i].tocptr = p1;
1338 di->symtab[i].name = n1;
1339 di->symtab[i].isText = t1;
1340 di->symtab[i].isIFunc = f1;
1341 di->symtab[i+1].addr = s2;
1342 di->symtab[i+1].size = e2 - s2 + 1;
1343 di->symtab[i+1].tocptr = p2;
1344 di->symtab[i+1].name = n2;
1345 di->symtab[i+1].isText = t2;
1346 di->symtab[i+1].isIFunc = f2;
1347 vg_assert(s1 <= s2);
1348 vg_assert(di->symtab[i].size > 0);
1349 vg_assert(di->symtab[i+1].size > 0);
1350 /* It may be that the i+1 entry now needs to be moved further
1351 along to maintain the address order requirement. */
1352 j = i+1;
1353 while (j < ((Word)di->symtab_used)-1
1354 && di->symtab[j].addr > di->symtab[j+1].addr) {
1355 SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
1356 j++;
1357 }
1358 n_truncated++;
1359 }
1360
1361 if (n_truncated > 0) goto cleanup_more;
1362
1363 /* Ensure relevant postconditions hold. */
1364 for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1365 /* No zero-sized symbols. */
1366 vg_assert(di->symtab[i].size > 0);
1367 /* In order. */
1368 vg_assert(di->symtab[i].addr < di->symtab[i+1].addr);
1369 /* No overlaps. */
1370 vg_assert(di->symtab[i].addr + di->symtab[i].size - 1
1371 < di->symtab[i+1].addr);
1372 }
1373 # undef SWAP
1374 }
1375
1376
1377 /* Sort the location table by starting address. Mash the table around
1378 so as to establish the property that addresses are in order and the
1379 ranges do not overlap. This facilitates using binary search to map
1380 addresses to locations when we come to query the table.
1381 */
compare_DiLoc(void * va,void * vb)1382 static Int compare_DiLoc ( void* va, void* vb )
1383 {
1384 DiLoc* a = (DiLoc*)va;
1385 DiLoc* b = (DiLoc*)vb;
1386 if (a->addr < b->addr) return -1;
1387 if (a->addr > b->addr) return 1;
1388 return 0;
1389 }
1390
canonicaliseLoctab(struct _DebugInfo * di)1391 static void canonicaliseLoctab ( struct _DebugInfo* di )
1392 {
1393 Word i, j;
1394
1395 # define SWAP(ty,aa,bb) \
1396 do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
1397
1398 if (di->loctab_used == 0)
1399 return;
1400
1401 /* Sort by start address. */
1402 VG_(ssort)(di->loctab, di->loctab_used,
1403 sizeof(*di->loctab), compare_DiLoc);
1404
1405 /* If two adjacent entries overlap, truncate the first. */
1406 for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1407 vg_assert(di->loctab[i].size < 10000);
1408 if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
1409 /* Do this in signed int32 because the actual .size fields
1410 are only 12 bits. */
1411 Int new_size = di->loctab[i+1].addr - di->loctab[i].addr;
1412 if (new_size < 0) {
1413 di->loctab[i].size = 0;
1414 } else
1415 if (new_size > MAX_LOC_SIZE) {
1416 di->loctab[i].size = MAX_LOC_SIZE;
1417 } else {
1418 di->loctab[i].size = (UShort)new_size;
1419 }
1420 }
1421 }
1422
1423 /* Zap any zero-sized entries resulting from the truncation
1424 process. */
1425 j = 0;
1426 for (i = 0; i < (Word)di->loctab_used; i++) {
1427 if (di->loctab[i].size > 0) {
1428 if (j != i)
1429 di->loctab[j] = di->loctab[i];
1430 j++;
1431 }
1432 }
1433 di->loctab_used = j;
1434
1435 /* Ensure relevant postconditions hold. */
1436 for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1437 /*
1438 VG_(printf)("%d (%d) %d 0x%x\n",
1439 i, di->loctab[i+1].confident,
1440 di->loctab[i+1].size, di->loctab[i+1].addr );
1441 */
1442 /* No zero-sized symbols. */
1443 vg_assert(di->loctab[i].size > 0);
1444 /* In order. */
1445 vg_assert(di->loctab[i].addr < di->loctab[i+1].addr);
1446 /* No overlaps. */
1447 vg_assert(di->loctab[i].addr + di->loctab[i].size - 1
1448 < di->loctab[i+1].addr);
1449 }
1450 # undef SWAP
1451 }
1452
1453
1454 /* Sort the call-frame-info table by starting address. Mash the table
1455 around so as to establish the property that addresses are in order
1456 and the ranges do not overlap. This facilitates using binary
1457 search to map addresses to locations when we come to query the
1458 table.
1459
1460 Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of
1461 any of the address ranges contained in cfisi[0 .. cfisi_used-1], so
1462 as to facilitate rapidly skipping this SegInfo when looking for an
1463 address which falls outside that range.
1464 */
compare_DiCfSI(void * va,void * vb)1465 static Int compare_DiCfSI ( void* va, void* vb )
1466 {
1467 DiCfSI* a = (DiCfSI*)va;
1468 DiCfSI* b = (DiCfSI*)vb;
1469 if (a->base < b->base) return -1;
1470 if (a->base > b->base) return 1;
1471 return 0;
1472 }
1473
ML_(canonicaliseCFI)1474 void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
1475 {
1476 Word i, j;
1477 const Addr minAvma = 0;
1478 const Addr maxAvma = ~minAvma;
1479
1480 /* Note: take care in here. di->cfsi can be NULL, in which
1481 case _used and _size fields will be zero. */
1482 if (di->cfsi == NULL) {
1483 vg_assert(di->cfsi_used == 0);
1484 vg_assert(di->cfsi_size == 0);
1485 }
1486
1487 /* Set cfsi_minavma and cfsi_maxavma to summarise the entire
1488 address range contained in cfsi[0 .. cfsi_used-1]. */
1489 di->cfsi_minavma = maxAvma;
1490 di->cfsi_maxavma = minAvma;
1491 for (i = 0; i < (Word)di->cfsi_used; i++) {
1492 Addr here_min = di->cfsi[i].base;
1493 Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
1494 if (here_min < di->cfsi_minavma)
1495 di->cfsi_minavma = here_min;
1496 if (here_max > di->cfsi_maxavma)
1497 di->cfsi_maxavma = here_max;
1498 }
1499
1500 if (di->trace_cfi)
1501 VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
1502 di->cfsi_used,
1503 di->cfsi_minavma, di->cfsi_maxavma);
1504
1505 /* Sort the cfsi array by base address. */
1506 VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
1507
1508 /* If two adjacent entries overlap, truncate the first. */
1509 for (i = 0; i < (Word)di->cfsi_used-1; i++) {
1510 if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
1511 Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
1512 /* how could it be otherwise? The entries are sorted by the
1513 .base field. */
1514 vg_assert(new_len >= 0);
1515 vg_assert(new_len <= di->cfsi[i].len);
1516 di->cfsi[i].len = new_len;
1517 }
1518 }
1519
1520 /* Zap any zero-sized entries resulting from the truncation
1521 process. */
1522 j = 0;
1523 for (i = 0; i < (Word)di->cfsi_used; i++) {
1524 if (di->cfsi[i].len > 0) {
1525 if (j != i)
1526 di->cfsi[j] = di->cfsi[i];
1527 j++;
1528 }
1529 }
1530 /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */
1531 di->cfsi_used = j;
1532
1533 /* Ensure relevant postconditions hold. */
1534 for (i = 0; i < (Word)di->cfsi_used; i++) {
1535 /* No zero-length ranges. */
1536 vg_assert(di->cfsi[i].len > 0);
1537 /* Makes sense w.r.t. summary address range */
1538 vg_assert(di->cfsi[i].base >= di->cfsi_minavma);
1539 vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1540 <= di->cfsi_maxavma);
1541
1542 if (i < di->cfsi_used - 1) {
1543 /*
1544 if (!(di->cfsi[i].base < di->cfsi[i+1].base)) {
1545 VG_(printf)("\nOOO cfsis:\n");
1546 ML_(ppCfiSI)(&di->cfsi[i]);
1547 ML_(ppCfiSI)(&di->cfsi[i+1]);
1548 }
1549 */
1550 /* In order. */
1551 vg_assert(di->cfsi[i].base < di->cfsi[i+1].base);
1552 /* No overlaps. */
1553 vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1554 < di->cfsi[i+1].base);
1555 }
1556 }
1557
1558 }
1559
1560
1561 /* Canonicalise the tables held by 'di', in preparation for use. Call
1562 this after finishing adding entries to these tables. */
ML_(canonicaliseTables)1563 void ML_(canonicaliseTables) ( struct _DebugInfo* di )
1564 {
1565 canonicaliseSymtab ( di );
1566 canonicaliseLoctab ( di );
1567 ML_(canonicaliseCFI) ( di );
1568 canonicaliseVarInfo ( di );
1569 }
1570
1571
1572 /*------------------------------------------------------------*/
1573 /*--- Searching the tables ---*/
1574 /*------------------------------------------------------------*/
1575
1576 /* Find a symbol-table index containing the specified pointer, or -1
1577 if not found. Binary search. */
1578
ML_(search_one_symtab)1579 Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
1580 Bool match_anywhere_in_sym,
1581 Bool findText )
1582 {
1583 Addr a_mid_lo, a_mid_hi;
1584 Word mid, size,
1585 lo = 0,
1586 hi = di->symtab_used-1;
1587 while (True) {
1588 /* current unsearched space is from lo to hi, inclusive. */
1589 if (lo > hi) return -1; /* not found */
1590 mid = (lo + hi) / 2;
1591 a_mid_lo = di->symtab[mid].addr;
1592 size = ( match_anywhere_in_sym
1593 ? di->symtab[mid].size
1594 : 1);
1595 a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1;
1596
1597 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1598 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1599 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1600 /* Found a symbol with the correct address range. But is it
1601 of the right kind (text vs data) ? */
1602 if ( findText && di->symtab[mid].isText ) return mid;
1603 if ( (!findText) && (!di->symtab[mid].isText) ) return mid;
1604 return -1;
1605 }
1606 }
1607
1608
1609 /* Find a location-table index containing the specified pointer, or -1
1610 if not found. Binary search. */
1611
ML_(search_one_loctab)1612 Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
1613 {
1614 Addr a_mid_lo, a_mid_hi;
1615 Word mid,
1616 lo = 0,
1617 hi = di->loctab_used-1;
1618 while (True) {
1619 /* current unsearched space is from lo to hi, inclusive. */
1620 if (lo > hi) return -1; /* not found */
1621 mid = (lo + hi) / 2;
1622 a_mid_lo = di->loctab[mid].addr;
1623 a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1;
1624
1625 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1626 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1627 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1628 return mid;
1629 }
1630 }
1631
1632
1633 /* Find a CFI-table index containing the specified pointer, or -1
1634 if not found. Binary search. */
1635
ML_(search_one_cfitab)1636 Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
1637 {
1638 Addr a_mid_lo, a_mid_hi;
1639 Word mid, size,
1640 lo = 0,
1641 hi = di->cfsi_used-1;
1642 while (True) {
1643 /* current unsearched space is from lo to hi, inclusive. */
1644 if (lo > hi) return -1; /* not found */
1645 mid = (lo + hi) / 2;
1646 a_mid_lo = di->cfsi[mid].base;
1647 size = di->cfsi[mid].len;
1648 a_mid_hi = a_mid_lo + size - 1;
1649 vg_assert(a_mid_hi >= a_mid_lo);
1650 if (ptr < a_mid_lo) { hi = mid-1; continue; }
1651 if (ptr > a_mid_hi) { lo = mid+1; continue; }
1652 vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1653 return mid;
1654 }
1655 }
1656
1657
1658 /* Find a FPO-table index containing the specified pointer, or -1
1659 if not found. Binary search. */
1660
ML_(search_one_fpotab)1661 Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr )
1662 {
1663 Addr const addr = ptr - di->rx_map_avma;
1664 Addr a_mid_lo, a_mid_hi;
1665 Word mid, size,
1666 lo = 0,
1667 hi = di->fpo_size-1;
1668 while (True) {
1669 /* current unsearched space is from lo to hi, inclusive. */
1670 if (lo > hi) return -1; /* not found */
1671 mid = (lo + hi) / 2;
1672 a_mid_lo = di->fpo[mid].ulOffStart;
1673 size = di->fpo[mid].cbProcSize;
1674 a_mid_hi = a_mid_lo + size - 1;
1675 vg_assert(a_mid_hi >= a_mid_lo);
1676 if (addr < a_mid_lo) { hi = mid-1; continue; }
1677 if (addr > a_mid_hi) { lo = mid+1; continue; }
1678 vg_assert(addr >= a_mid_lo && addr <= a_mid_hi);
1679 return mid;
1680 }
1681 }
1682
1683 /*--------------------------------------------------------------------*/
1684 /*--- end ---*/
1685 /*--------------------------------------------------------------------*/
1686