• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /*--------------------------------------------------------------------*/
3 /*--- Format-neutral storage of and querying of info acquired from ---*/
4 /*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info.             ---*/
5 /*---                                                    storage.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2010 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 /* This file manages the data structures built by the debuginfo
34    system.  These are: the top level SegInfo list.  For each SegInfo,
35    there are tables for for address-to-symbol mappings,
36    address-to-src-file/line mappings, and address-to-CFI-info
37    mappings.
38 */
39 
40 #include "pub_core_basics.h"
41 #include "pub_core_options.h"      /* VG_(clo_verbosity) */
42 #include "pub_core_debuginfo.h"
43 #include "pub_core_libcassert.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcprint.h"
46 #include "pub_core_xarray.h"
47 #include "pub_core_oset.h"
48 
49 #include "priv_misc.h"         /* dinfo_zalloc/free/strdup */
50 #include "priv_d3basics.h"     /* ML_(pp_GX) */
51 #include "priv_tytypes.h"
52 #include "priv_storage.h"      /* self */
53 
54 
55 /*------------------------------------------------------------*/
56 /*--- Misc (printing, errors)                              ---*/
57 /*------------------------------------------------------------*/
58 
59 /* Show a non-fatal debug info reading error.  Use vg_panic if
60    terminal.  'serious' errors are shown regardless of the
61    verbosity setting. */
ML_(symerr)62 void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg )
63 {
64    /* XML mode hides everything :-( */
65    if (VG_(clo_xml))
66       return;
67 
68    if (serious) {
69 
70       VG_(message)(Vg_DebugMsg, "WARNING: Serious error when "
71                                 "reading debug info\n");
72       if (True || VG_(clo_verbosity) < 2) {
73          /* Need to show what the file name is, at verbosity levels 2
74             or below, since that won't already have been shown */
75          VG_(message)(Vg_DebugMsg,
76                       "When reading debug info from %s:\n",
77                       (di && di->filename) ? di->filename : (UChar*)"???");
78       }
79       VG_(message)(Vg_DebugMsg, "%s\n", msg);
80 
81    } else { /* !serious */
82 
83       if (VG_(clo_verbosity) >= 2)
84          VG_(message)(Vg_DebugMsg, "%s\n", msg);
85 
86    }
87 }
88 
89 
90 /* Print a symbol. */
ML_(ppSym)91 void ML_(ppSym) ( Int idx, DiSym* sym )
92 {
93    VG_(printf)( "%5d:  %#8lx .. %#8lx (%d)      %s\n",
94                 idx,
95                 sym->addr,
96                 sym->addr + sym->size - 1, sym->size,
97 	        sym->name );
98 }
99 
100 /* Print a call-frame-info summary. */
ML_(ppDiCfSI)101 void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si )
102 {
103 #  define SHOW_HOW(_how, _off)                   \
104       do {                                       \
105          if (_how == CFIR_UNKNOWN) {             \
106             VG_(printf)("Unknown");              \
107          } else                                  \
108          if (_how == CFIR_SAME) {                \
109             VG_(printf)("Same");                 \
110          } else                                  \
111          if (_how == CFIR_CFAREL) {              \
112             VG_(printf)("cfa+%d", _off);         \
113          } else                                  \
114          if (_how == CFIR_MEMCFAREL) {           \
115             VG_(printf)("*(cfa+%d)", _off);      \
116          } else                                  \
117          if (_how == CFIR_EXPR) {                \
118             VG_(printf)("{");                    \
119             ML_(ppCfiExpr)(exprs, _off);         \
120             VG_(printf)("}");                    \
121          } else {                                \
122             vg_assert(0+0);                      \
123          }                                       \
124       } while (0)
125 
126    VG_(printf)("[%#lx .. %#lx]: ", si->base,
127                                si->base + (UWord)si->len - 1);
128    switch (si->cfa_how) {
129       case CFIC_IA_SPREL:
130          VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
131          break;
132       case CFIC_IA_BPREL:
133          VG_(printf)("let cfa=oldBP+%d", si->cfa_off);
134          break;
135       case CFIC_ARM_R13REL:
136          VG_(printf)("let cfa=oldR13+%d", si->cfa_off);
137          break;
138       case CFIC_ARM_R12REL:
139          VG_(printf)("let cfa=oldR12+%d", si->cfa_off);
140          break;
141       case CFIC_ARM_R11REL:
142          VG_(printf)("let cfa=oldR11+%d", si->cfa_off);
143          break;
144       case CFIC_ARM_R7REL:
145          VG_(printf)("let cfa=oldR7+%d", si->cfa_off);
146          break;
147       case CFIC_EXPR:
148          VG_(printf)("let cfa={");
149          ML_(ppCfiExpr)(exprs, si->cfa_off);
150          VG_(printf)("}");
151          break;
152       default:
153          vg_assert(0);
154    }
155 
156    VG_(printf)(" in RA=");
157    SHOW_HOW(si->ra_how, si->ra_off);
158 #  if defined(VGA_x86) || defined(VGA_amd64)
159    VG_(printf)(" SP=");
160    SHOW_HOW(si->sp_how, si->sp_off);
161    VG_(printf)(" BP=");
162    SHOW_HOW(si->bp_how, si->bp_off);
163 #  elif defined(VGA_arm)
164    VG_(printf)(" R14=");
165    SHOW_HOW(si->r14_how, si->r14_off);
166    VG_(printf)(" R13=");
167    SHOW_HOW(si->r13_how, si->r13_off);
168    VG_(printf)(" R12=");
169    SHOW_HOW(si->r12_how, si->r12_off);
170    VG_(printf)(" R11=");
171    SHOW_HOW(si->r11_how, si->r11_off);
172    VG_(printf)(" R7=");
173    SHOW_HOW(si->r7_how, si->r7_off);
174 #  elif defined(VGA_ppc32) || defined(VGA_ppc64)
175 #  else
176 #    error "Unknown arch"
177 #  endif
178    VG_(printf)("\n");
179 #  undef SHOW_HOW
180 }
181 
182 
183 /*------------------------------------------------------------*/
184 /*--- Adding stuff                                         ---*/
185 /*------------------------------------------------------------*/
186 
187 /* Add a str to the string table, including terminating zero, and
188    return pointer to the string in vg_strtab.  Unless it's been seen
189    recently, in which case we find the old pointer and return that.
190    This avoids the most egregious duplications.
191 
192    JSGF: changed from returning an index to a pointer, and changed to
193    a chunking memory allocator rather than reallocating, so the
194    pointers are stable.
195 */
ML_(addStr)196 UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len )
197 {
198    struct strchunk *chunk;
199    Int    space_needed;
200    UChar* p;
201 
202    if (len == -1) {
203       len = VG_(strlen)(str);
204    } else {
205       vg_assert(len >= 0);
206    }
207 
208    space_needed = 1 + len;
209 
210    // Allocate a new strtab chunk if necessary
211    if (di->strchunks == NULL ||
212        (di->strchunks->strtab_used
213         + space_needed) > SEGINFO_STRCHUNKSIZE) {
214       chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk));
215       chunk->strtab_used = 0;
216       chunk->next = di->strchunks;
217       di->strchunks = chunk;
218    }
219    chunk = di->strchunks;
220 
221    p = &chunk->strtab[chunk->strtab_used];
222    VG_(memcpy)(p, str, len);
223    chunk->strtab[chunk->strtab_used+len] = '\0';
224    chunk->strtab_used += space_needed;
225 
226    return p;
227 }
228 
229 
230 /* Add a symbol to the symbol table.
231 */
ML_(addSym)232 void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym )
233 {
234    UInt   new_sz, i;
235    DiSym* new_tab;
236 
237    /* Ignore zero-sized syms. */
238    if (sym->size == 0) return;
239 
240    if (di->symtab_used == di->symtab_size) {
241       new_sz = 2 * di->symtab_size;
242       if (new_sz == 0) new_sz = 500;
243       new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1",
244                                    new_sz * sizeof(DiSym) );
245       if (di->symtab != NULL) {
246          for (i = 0; i < di->symtab_used; i++)
247             new_tab[i] = di->symtab[i];
248          ML_(dinfo_free)(di->symtab);
249       }
250       di->symtab = new_tab;
251       di->symtab_size = new_sz;
252    }
253 
254    di->symtab[di->symtab_used] = *sym;
255    di->symtab_used++;
256    vg_assert(di->symtab_used <= di->symtab_size);
257 }
258 
259 
260 /* Resize the symbol table to save memory.
261 */
ML_(shrinkSym)262 void ML_(shrinkSym)( struct _DebugInfo* di )
263 {
264    DiSym* new_tab;
265    UInt new_sz = di->symtab_used;
266    if (new_sz == di->symtab_size) return;
267 
268    new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkSym",
269                                 new_sz * sizeof(DiSym) );
270    VG_(memcpy)(new_tab, di->symtab, new_sz * sizeof(DiSym));
271 
272    ML_(dinfo_free)(di->symtab);
273    di->symtab = new_tab;
274    di->symtab_size = new_sz;
275 }
276 
277 
278 /* Add a location to the location table.
279 */
addLoc(struct _DebugInfo * di,DiLoc * loc)280 static void addLoc ( struct _DebugInfo* di, DiLoc* loc )
281 {
282    UInt   new_sz, i;
283    DiLoc* new_tab;
284 
285    /* Zero-sized locs should have been ignored earlier */
286    vg_assert(loc->size > 0);
287 
288    if (di->loctab_used == di->loctab_size) {
289       new_sz = 2 * di->loctab_size;
290       if (new_sz == 0) new_sz = 500;
291       new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
292                                    new_sz * sizeof(DiLoc) );
293       if (di->loctab != NULL) {
294          for (i = 0; i < di->loctab_used; i++)
295             new_tab[i] = di->loctab[i];
296          ML_(dinfo_free)(di->loctab);
297       }
298       di->loctab = new_tab;
299       di->loctab_size = new_sz;
300    }
301 
302    di->loctab[di->loctab_used] = *loc;
303    di->loctab_used++;
304    vg_assert(di->loctab_used <= di->loctab_size);
305 }
306 
307 
308 /* Resize the lineinfo table to save memory.
309 */
ML_(shrinkLineInfo)310 void ML_(shrinkLineInfo)( struct _DebugInfo* di )
311 {
312    DiLoc* new_tab;
313    UInt new_sz = di->loctab_used;
314    if (new_sz == di->loctab_size) return;
315 
316    new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLineInfo",
317                                 new_sz * sizeof(DiLoc) );
318    VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc));
319 
320    ML_(dinfo_free)(di->loctab);
321    di->loctab = new_tab;
322    di->loctab_size = new_sz;
323 }
324 
325 
326 /* Top-level place to call to add a source-location mapping entry.
327 */
ML_(addLineInfo)328 void ML_(addLineInfo) ( struct _DebugInfo* di,
329                         UChar*   filename,
330                         UChar*   dirname, /* NULL == directory is unknown */
331                         Addr     this,
332                         Addr     next,
333                         Int      lineno,
334                         Int      entry /* only needed for debug printing */
335      )
336 {
337    static const Bool debug = False;
338    DiLoc loc;
339    Int size = next - this;
340 
341    /* Ignore zero-sized locs */
342    if (this == next) return;
343 
344    if (debug)
345       VG_(printf)( "  src %s %s line %d %#lx-%#lx\n",
346                    dirname ? dirname : (UChar*)"(unknown)",
347                    filename, lineno, this, next );
348 
349    /* Maximum sanity checking.  Some versions of GNU as do a shabby
350     * job with stabs entries; if anything looks suspicious, revert to
351     * a size of 1.  This should catch the instruction of interest
352     * (since if using asm-level debug info, one instruction will
353     * correspond to one line, unlike with C-level debug info where
354     * multiple instructions can map to the one line), but avoid
355     * catching any other instructions bogusly. */
356    if (this > next) {
357        if (VG_(clo_verbosity) > 2) {
358            VG_(message)(Vg_DebugMsg,
359                         "warning: line info addresses out of order "
360                         "at entry %d: 0x%lx 0x%lx\n", entry, this, next);
361        }
362        size = 1;
363    }
364 
365    if (size > MAX_LOC_SIZE) {
366        if (0)
367        VG_(message)(Vg_DebugMsg,
368                     "warning: line info address range too large "
369                     "at entry %d: %d\n", entry, size);
370        size = 1;
371    }
372 
373    /* Rule out ones which are completely outside the r-x mapped area.
374       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
375       for background and rationale. */
376    vg_assert(di->have_rx_map && di->have_rw_map);
377    if (next-1 < di->rx_map_avma
378        || this >= di->rx_map_avma + di->rx_map_size ) {
379        if (0)
380           VG_(message)(Vg_DebugMsg,
381                        "warning: ignoring line info entry falling "
382                        "outside current DebugInfo: %#lx %#lx %#lx %#lx\n",
383                        di->text_avma,
384                        di->text_avma + di->text_size,
385                        this, next-1);
386        return;
387    }
388 
389    vg_assert(lineno >= 0);
390    if (lineno > MAX_LINENO) {
391       static Bool complained = False;
392       if (!complained) {
393          complained = True;
394          VG_(message)(Vg_UserMsg,
395                       "warning: ignoring line info entry with "
396                       "huge line number (%d)\n", lineno);
397          VG_(message)(Vg_UserMsg,
398                       "         Can't handle line numbers "
399                       "greater than %d, sorry\n", MAX_LINENO);
400          VG_(message)(Vg_UserMsg,
401                       "(Nb: this message is only shown once)\n");
402       }
403       return;
404    }
405 
406    loc.addr      = this;
407    loc.size      = (UShort)size;
408    loc.lineno    = lineno;
409    loc.filename  = filename;
410    loc.dirname   = dirname;
411 
412    if (0) VG_(message)(Vg_DebugMsg,
413 		       "addLoc: addr %#lx, size %d, line %d, file %s\n",
414 		       this,size,lineno,filename);
415 
416    addLoc ( di, &loc );
417 }
418 
419 
420 /* Top-level place to call to add a CFI summary record.  The supplied
421    DiCfSI is copied. */
ML_(addDiCfSI)422 void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig )
423 {
424    static const Bool debug = False;
425    UInt    new_sz, i;
426    DiCfSI* new_tab;
427    SSizeT  delta;
428 
429    /* copy the original, so we can mess with it */
430    DiCfSI cfsi = *cfsi_orig;
431 
432    if (debug) {
433       VG_(printf)("adding DiCfSI: ");
434       ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
435    }
436 
437    /* sanity */
438    vg_assert(cfsi.len > 0);
439    /* If this fails, the implication is you have a single procedure
440       with more than 5 million bytes of code.  Which is pretty
441       unlikely.  Either that, or the debuginfo reader is somehow
442       broken.  5 million is of course arbitrary; but it's big enough
443       to be bigger than the size of any plausible piece of code that
444       would fall within a single procedure. */
445    vg_assert(cfsi.len < 5000000);
446 
447    vg_assert(di->have_rx_map && di->have_rw_map);
448    /* If we have an empty r-x mapping (is that possible?) then the
449       DiCfSI can't possibly fall inside it.  In which case skip. */
450    if (di->rx_map_size == 0)
451       return;
452 
453    /* Rule out ones which are completely outside the r-x mapped area.
454       See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
455       for background and rationale. */
456    if (cfsi.base + cfsi.len - 1 < di->rx_map_avma
457        || cfsi.base >= di->rx_map_avma + di->rx_map_size) {
458       static Int complaints = 10;
459       if (VG_(clo_trace_cfi) || complaints > 0) {
460          complaints--;
461          if (VG_(clo_verbosity) > 1) {
462             VG_(message)(
463                Vg_DebugMsg,
464                "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx\n",
465                cfsi.base,
466                cfsi.base + cfsi.len - 1,
467                di->text_avma,
468                di->text_avma + di->text_size - 1
469             );
470          }
471          if (VG_(clo_trace_cfi))
472             ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
473       }
474       return;
475    }
476 
477    /* Now we know the range is at least partially inside the r-x
478       mapped area.  That implies that at least one of the ends of the
479       range falls inside the area.  If necessary, clip it so it is
480       completely within the area.  If we don't do this,
481       check_CFSI_related_invariants() in debuginfo.c (invariant #2)
482       will fail.  See
483       "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
484       priv_storage.h for background. */
485    if (cfsi.base < di->rx_map_avma) {
486       /* Lower end is outside the mapped area.  Hence upper end must
487          be inside it. */
488       if (0) VG_(printf)("XXX truncate lower\n");
489       vg_assert(cfsi.base + cfsi.len - 1 >= di->rx_map_avma);
490       delta = (SSizeT)(di->rx_map_avma - cfsi.base);
491       vg_assert(delta > 0);
492       vg_assert(delta < (SSizeT)cfsi.len);
493       cfsi.base += delta;
494       cfsi.len -= delta;
495    }
496    else
497    if (cfsi.base + cfsi.len - 1 > di->rx_map_avma + di->rx_map_size - 1) {
498       /* Upper end is outside the mapped area.  Hence lower end must be
499          inside it. */
500       if (0) VG_(printf)("XXX truncate upper\n");
501       vg_assert(cfsi.base <= di->rx_map_avma + di->rx_map_size - 1);
502       delta = (SSizeT)( (cfsi.base + cfsi.len - 1)
503                         - (di->rx_map_avma + di->rx_map_size - 1) );
504       vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len);
505       cfsi.len -= delta;
506    }
507 
508    /* Final checks */
509 
510    /* Because: either cfsi was entirely inside the range, in which
511       case we asserted that len > 0 at the start, OR it fell partially
512       inside the range, in which case we reduced it by some size
513       (delta) which is < its original size. */
514    vg_assert(cfsi.len > 0);
515 
516    /* Similar logic applies for the next two assertions. */
517    vg_assert(cfsi.base >= di->rx_map_avma);
518    vg_assert(cfsi.base + cfsi.len - 1
519              <= di->rx_map_avma + di->rx_map_size - 1);
520 
521    if (di->cfsi_used == di->cfsi_size) {
522       new_sz = 2 * di->cfsi_size;
523       if (new_sz == 0) new_sz = 20;
524       new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
525                                    new_sz * sizeof(DiCfSI) );
526       if (di->cfsi != NULL) {
527          for (i = 0; i < di->cfsi_used; i++)
528             new_tab[i] = di->cfsi[i];
529          ML_(dinfo_free)(di->cfsi);
530       }
531       di->cfsi = new_tab;
532       di->cfsi_size = new_sz;
533    }
534 
535    di->cfsi[di->cfsi_used] = cfsi;
536    di->cfsi_used++;
537    vg_assert(di->cfsi_used <= di->cfsi_size);
538 }
539 
540 
ML_(CfiExpr_Undef)541 Int ML_(CfiExpr_Undef)( XArray* dst )
542 {
543    CfiExpr e;
544    VG_(memset)( &e, 0, sizeof(e) );
545    e.tag = Cex_Undef;
546    return (Int)VG_(addToXA)( dst, &e );
547 }
ML_(CfiExpr_Deref)548 Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr )
549 {
550    CfiExpr e;
551    VG_(memset)( &e, 0, sizeof(e) );
552    e.tag = Cex_Deref;
553    e.Cex.Deref.ixAddr = ixAddr;
554    return (Int)VG_(addToXA)( dst, &e );
555 }
ML_(CfiExpr_Const)556 Int ML_(CfiExpr_Const)( XArray* dst, UWord con )
557 {
558    CfiExpr e;
559    VG_(memset)( &e, 0, sizeof(e) );
560    e.tag = Cex_Const;
561    e.Cex.Const.con = con;
562    return (Int)VG_(addToXA)( dst, &e );
563 }
ML_(CfiExpr_Binop)564 Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR )
565 {
566    CfiExpr e;
567    VG_(memset)( &e, 0, sizeof(e) );
568    e.tag = Cex_Binop;
569    e.Cex.Binop.op  = op;
570    e.Cex.Binop.ixL = ixL;
571    e.Cex.Binop.ixR = ixR;
572    return (Int)VG_(addToXA)( dst, &e );
573 }
ML_(CfiExpr_CfiReg)574 Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg )
575 {
576    CfiExpr e;
577    VG_(memset)( &e, 0, sizeof(e) );
578    e.tag = Cex_CfiReg;
579    e.Cex.CfiReg.reg = reg;
580    return (Int)VG_(addToXA)( dst, &e );
581 }
ML_(CfiExpr_DwReg)582 Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg )
583 {
584    CfiExpr e;
585    VG_(memset)( &e, 0, sizeof(e) );
586    e.tag = Cex_DwReg;
587    e.Cex.DwReg.reg = reg;
588    return (Int)VG_(addToXA)( dst, &e );
589 }
590 
ppCfiOp(CfiOp op)591 static void ppCfiOp ( CfiOp op )
592 {
593    switch (op) {
594       case Cop_Add: VG_(printf)("+"); break;
595       case Cop_Sub: VG_(printf)("-"); break;
596       case Cop_And: VG_(printf)("&"); break;
597       case Cop_Mul: VG_(printf)("*"); break;
598       default:      vg_assert(0);
599    }
600 }
601 
ppCfiReg(CfiReg reg)602 static void ppCfiReg ( CfiReg reg )
603 {
604    switch (reg) {
605       case Creg_IA_SP:   VG_(printf)("xSP"); break;
606       case Creg_IA_BP:   VG_(printf)("xBP"); break;
607       case Creg_IA_IP:   VG_(printf)("xIP"); break;
608       case Creg_ARM_R13: VG_(printf)("R13"); break;
609       case Creg_ARM_R12: VG_(printf)("R12"); break;
610       case Creg_ARM_R15: VG_(printf)("R15"); break;
611       case Creg_ARM_R14: VG_(printf)("R14"); break;
612       default: vg_assert(0);
613    }
614 }
615 
ML_(ppCfiExpr)616 void ML_(ppCfiExpr)( XArray* src, Int ix )
617 {
618    /* VG_(indexXA) checks for invalid src/ix values, so we can
619       use it indiscriminately. */
620    CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix );
621    switch (e->tag) {
622       case Cex_Undef:
623          VG_(printf)("Undef");
624          break;
625       case Cex_Deref:
626          VG_(printf)("*(");
627          ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr);
628          VG_(printf)(")");
629          break;
630       case Cex_Const:
631          VG_(printf)("0x%lx", e->Cex.Const.con);
632          break;
633       case Cex_Binop:
634          VG_(printf)("(");
635          ML_(ppCfiExpr)(src, e->Cex.Binop.ixL);
636          VG_(printf)(")");
637          ppCfiOp(e->Cex.Binop.op);
638          VG_(printf)("(");
639          ML_(ppCfiExpr)(src, e->Cex.Binop.ixR);
640          VG_(printf)(")");
641          break;
642       case Cex_CfiReg:
643          ppCfiReg(e->Cex.CfiReg.reg);
644          break;
645       case Cex_DwReg:
646          VG_(printf)("dwr%d", e->Cex.DwReg.reg);
647          break;
648       default:
649          VG_(core_panic)("ML_(ppCfiExpr)");
650          /*NOTREACHED*/
651          break;
652    }
653 }
654 
655 
ML_(cmp_for_DiAddrRange_range)656 Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV,
657                                       const void* elemV ) {
658    const Addr* key = (const Addr*)keyV;
659    const DiAddrRange* elem = (const DiAddrRange*)elemV;
660    if (0)
661       VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n",
662                   *key, elem->aMin);
663    if ((*key) < elem->aMin) return -1;
664    if ((*key) > elem->aMax) return 1;
665    return 0;
666 }
667 
668 static
show_scope(OSet * scope,HChar * who)669 void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who )
670 {
671    DiAddrRange* range;
672    VG_(printf)("Scope \"%s\" = {\n", who);
673    VG_(OSetGen_ResetIter)( scope );
674    while (True) {
675       range = VG_(OSetGen_Next)( scope );
676       if (!range) break;
677       VG_(printf)("   %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax,
678                   range->vars ? VG_(sizeXA)(range->vars) : 0);
679    }
680    VG_(printf)("}\n");
681 }
682 
683 /* Add the variable 'var' to 'scope' for the address range [aMin,aMax]
684    (inclusive of aMin and aMax).  Split existing ranges as required if
685    aMin or aMax or both don't match existing range boundaries, and add
686    'var' to all required ranges.  Take great care to preserve the
687    invariant that the ranges in 'scope' cover the entire address range
688    exactly once, with no overlaps and no holes. */
add_var_to_arange(OSet * scope,Addr aMin,Addr aMax,DiVariable * var)689 static void add_var_to_arange (
690                /*MOD*/OSet* /* of DiAddrRange */ scope,
691                Addr aMin,
692                Addr aMax,
693                DiVariable* var
694             )
695 {
696    DiAddrRange *first, *last, *range;
697    /* These xx variables are for assertion checking only; they don't
698       contribute anything to the actual work of this function. */
699    DiAddrRange *xxRangep, *xxFirst, *xxLast;
700    UWord       xxIters;
701 
702    vg_assert(aMin <= aMax);
703 
704    if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax);
705    if (0) show_scope( scope, "add_var_to_arange(1)" );
706 
707    /* See if the lower end of the range (aMin) falls exactly on an
708       existing range boundary.  If not, find the range it does fall
709       into, and split it (copying the variables in the process), so
710       that aMin does exactly fall on a range boundary. */
711    first = VG_(OSetGen_Lookup)( scope, &aMin );
712    /* It must be present, since the presented OSet must cover
713       the entire address range. */
714    vg_assert(first);
715    vg_assert(first->aMin <= first->aMax);
716    vg_assert(first->aMin <= aMin && aMin <= first->aMax);
717 
718    /* Fast track common case, which is that the range specified for
719       the variable exactly coincides with one already-existing
720       range. */
721    if (first->aMin == aMin && first->aMax == aMax) {
722       vg_assert(first->vars);
723       VG_(addToXA)( first->vars, var );
724       return;
725    }
726 
727    /* We have to get into splitting ranges, which is complex
728       and slow. */
729    if (first->aMin < aMin) {
730       DiAddrRange* nyu;
731       /* Ok.  We'll have to split 'first'. */
732       /* truncate the upper end of 'first' */
733       Addr tmp = first->aMax;
734       first->aMax = aMin-1;
735       vg_assert(first->aMin <= first->aMax);
736       /* create a new range */
737       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
738       vg_assert(nyu);
739       nyu->aMin = aMin;
740       nyu->aMax = tmp;
741       vg_assert(nyu->aMin <= nyu->aMax);
742       /* copy vars into it */
743       vg_assert(first->vars);
744       nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
745       vg_assert(nyu->vars);
746       VG_(OSetGen_Insert)( scope, nyu );
747       first = nyu;
748    }
749 
750    vg_assert(first->aMin == aMin);
751 
752    /* Now do exactly the same for the upper end (aMax): if it doesn't
753       fall on a boundary, cause it to do so by splitting the range it
754       does currently fall into. */
755    last = VG_(OSetGen_Lookup)( scope, &aMax );
756    vg_assert(last->aMin <= last->aMax);
757    vg_assert(last->aMin <= aMax && aMax <= last->aMax);
758 
759    if (aMax < last->aMax) {
760       DiAddrRange* nyu;
761       /* We have to split 'last'. */
762       /* truncate the lower end of 'last' */
763       Addr tmp = last->aMin;
764       last->aMin = aMax+1;
765       vg_assert(last->aMin <= last->aMax);
766       /* create a new range */
767       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
768       vg_assert(nyu);
769       nyu->aMin = tmp;
770       nyu->aMax = aMax;
771       vg_assert(nyu->aMin <= nyu->aMax);
772       /* copy vars into it */
773       vg_assert(last->vars);
774       nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
775       vg_assert(nyu->vars);
776       VG_(OSetGen_Insert)( scope, nyu );
777       last = nyu;
778    }
779 
780    vg_assert(aMax == last->aMax);
781 
782    xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin);
783    xxLast  = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax);
784    vg_assert(xxFirst);
785    vg_assert(xxLast);
786    vg_assert(xxFirst->aMin == aMin);
787    vg_assert(xxLast->aMax == aMax);
788    if (xxFirst != xxLast)
789       vg_assert(xxFirst->aMax < xxLast->aMin);
790 
791    /* Great.  Now we merely need to iterate over the segments from
792       'first' to 'last' inclusive, and add 'var' to the variable set
793       of each of them. */
794    if (0) {
795       static UWord ctr = 0;
796       ctr++;
797       VG_(printf)("ctr = %lu\n", ctr);
798       if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" );
799    }
800 
801    xxIters = 0;
802    range = xxRangep = NULL;
803    VG_(OSetGen_ResetIterAt)( scope, &aMin );
804    while (True) {
805       xxRangep = range;
806       range    = VG_(OSetGen_Next)( scope );
807       if (!range) break;
808       if (range->aMin > aMax) break;
809       xxIters++;
810       if (0) VG_(printf)("have range %#lx %#lx\n",
811                          range->aMin, range->aMax);
812 
813       /* Sanity checks */
814       if (!xxRangep) {
815          /* This is the first in the range */
816          vg_assert(range->aMin == aMin);
817       } else {
818          vg_assert(xxRangep->aMax + 1 == range->aMin);
819       }
820 
821       vg_assert(range->vars);
822       VG_(addToXA)( range->vars, var );
823    }
824    /* Done.  We should have seen at least one range. */
825    vg_assert(xxIters >= 1);
826    if (xxIters == 1) vg_assert(xxFirst == xxLast);
827    if (xxFirst == xxLast) vg_assert(xxIters == 1);
828    vg_assert(xxRangep);
829    vg_assert(xxRangep->aMax == aMax);
830    vg_assert(xxRangep == xxLast);
831 }
832 
833 
834 /* Top-level place to call to add a variable description (as extracted
835    from a DWARF3 .debug_info section. */
ML_(addVar)836 void ML_(addVar)( struct _DebugInfo* di,
837                   Int    level,
838                   Addr   aMin,
839                   Addr   aMax,
840                   UChar* name, /* in di's .strchunks */
841                   UWord  typeR, /* a cuOff */
842                   GExpr* gexpr,
843                   GExpr* fbGX,
844                   UChar* fileName, /* where decl'd - may be NULL.
845                                       in di's .strchunks */
846                   Int    lineNo, /* where decl'd - may be zero */
847                   Bool   show )
848 {
849    OSet* /* of DiAddrRange */ scope;
850    DiVariable var;
851    Bool       all;
852    TyEnt*     ent;
853    MaybeULong mul;
854    HChar*     badness;
855 
856    tl_assert(di && di->admin_tyents);
857 
858    if (0) {
859       VG_(printf)("  ML_(addVar): level %d  %#lx-%#lx  %s :: ",
860                   level, aMin, aMax, name );
861       ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
862       VG_(printf)("\n  Var=");
863       ML_(pp_GX)(gexpr);
864       VG_(printf)("\n");
865       if (fbGX) {
866          VG_(printf)("  FrB=");
867          ML_(pp_GX)( fbGX );
868          VG_(printf)("\n");
869       } else {
870          VG_(printf)("  FrB=none\n");
871       }
872       VG_(printf)("\n");
873    }
874 
875    vg_assert(level >= 0);
876    vg_assert(aMin <= aMax);
877    vg_assert(name);
878    vg_assert(gexpr);
879 
880    ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
881    tl_assert(ent);
882    vg_assert(ML_(TyEnt__is_type)(ent));
883 
884    /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
885       ----------------------------------------------------------------
886       Ignore any variables whose aMin .. aMax (that is, range of text
887       addresses for which they actually exist) falls outside the text
888       segment.  Is this indicative of a bug in the reader?  Maybe.
889       (LATER): instead of restricting strictly to the .text segment,
890       be a bit more relaxed, and accept any variable whose text range
891       falls inside the r-x mapped area.  This is useful because .text
892       is not always the only instruction-carrying segment: others are:
893       .init .plt __libc_freeres_fn and .fini.  This implicitly assumes
894       that those extra sections have the same bias as .text, but that
895       seems a reasonable assumption to me. */
896    /* This is assured us by top level steering logic in debuginfo.c,
897       and it is re-checked at the start of
898       ML_(read_elf_debug_info). */
899    vg_assert(di->have_rx_map && di->have_rw_map);
900    if (level > 0
901        && (aMax < di->rx_map_avma
902            || aMin >= di->rx_map_avma + di->rx_map_size)) {
903       if (VG_(clo_verbosity) >= 0) {
904          VG_(message)(Vg_DebugMsg,
905             "warning: addVar: in range %#lx .. %#lx outside "
906             "segment %#lx .. %#lx (%s)\n",
907             aMin, aMax,
908             di->text_avma, di->text_avma + di->text_size -1,
909             name
910          );
911       }
912       return;
913    }
914 
915    /* If the type's size is zero (which can mean unknown size), ignore
916       it.  We will never be able to actually relate a data address to
917       a data object with zero size, so there's no point in storing
918       info on it.  On 32-bit platforms, also reject types whose size
919       is 2^32 bytes or large.  (It's amazing what junk shows up ..) */
920    mul = ML_(sizeOfType)(di->admin_tyents, typeR);
921 
922    badness = NULL;
923    if (mul.b != True)
924       badness = "unknown size";
925    else if (mul.ul == 0)
926       badness = "zero size   ";
927    else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32))
928       badness = "implausibly large";
929 
930    if (badness) {
931       static Int complaints = 10;
932       if (VG_(clo_verbosity) >= 2 && complaints > 0) {
933          VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n",
934                                    badness, name );
935          complaints--;
936       }
937       return;
938    }
939 
940    if (!di->varinfo) {
941       di->varinfo = VG_(newXA)( ML_(dinfo_zalloc),
942                                 "di.storage.addVar.1",
943                                 ML_(dinfo_free),
944                                 sizeof(OSet*) );
945    }
946 
947    vg_assert(level < 256); /* arbitrary; stay sane */
948    /* Expand the top level array enough to map this level */
949    while ( VG_(sizeXA)(di->varinfo) <= level ) {
950       DiAddrRange* nyu;
951       scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin),
952                                    ML_(cmp_for_DiAddrRange_range),
953                                    ML_(dinfo_zalloc), "di.storage.addVar.2",
954                                    ML_(dinfo_free) );
955       vg_assert(scope);
956       if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
957                          scope, VG_(sizeXA)(di->varinfo));
958       VG_(addToXA)( di->varinfo, &scope );
959       /* Add a single range covering the entire address space.  At
960          level 0 we require this doesn't get split.  At levels above 0
961          we require that any additions to it cause it to get split.
962          All of these invariants get checked both add_var_to_arange
963          and after reading is complete, in canonicaliseVarInfo. */
964       nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
965       vg_assert(nyu);
966       nyu->aMin = (Addr)0;
967       nyu->aMax = ~(Addr)0;
968       nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
969                               ML_(dinfo_free),
970                               sizeof(DiVariable) );
971       vg_assert(nyu->vars);
972       VG_(OSetGen_Insert)( scope, nyu );
973    }
974 
975    vg_assert( VG_(sizeXA)(di->varinfo) > level );
976    scope = *(OSet**)VG_(indexXA)( di->varinfo, level );
977    vg_assert(scope);
978 
979    var.name     = name;
980    var.typeR    = typeR;
981    var.gexpr    = gexpr;
982    var.fbGX     = fbGX;
983    var.fileName = fileName;
984    var.lineNo   = lineNo;
985 
986    all = aMin == (Addr)0 && aMax == ~(Addr)0;
987    vg_assert(level == 0 ? all : !all);
988 
989    add_var_to_arange( /*MOD*/scope, aMin, aMax, &var );
990 }
991 
992 
993 /* This really just checks the constructed data structure, as there is
994    no canonicalisation to do. */
canonicaliseVarInfo(struct _DebugInfo * di)995 static void canonicaliseVarInfo ( struct _DebugInfo* di )
996 {
997    Word i, nInThisScope;
998 
999    if (!di->varinfo)
1000       return;
1001 
1002    for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
1003 
1004       DiAddrRange *range, *rangep;
1005       OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
1006       if (!scope) continue;
1007 
1008       /* Deal with the global-scope case. */
1009       if (i == 0) {
1010          Addr zero = 0;
1011          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1012          range = VG_(OSetGen_Lookup)( scope, &zero );
1013          vg_assert(range);
1014          vg_assert(range->aMin == (Addr)0);
1015          vg_assert(range->aMax == ~(Addr)0);
1016          continue;
1017       }
1018 
1019       /* All the rest of this is for the local-scope case. */
1020       /* iterate over all entries in 'scope' */
1021       nInThisScope = 0;
1022       rangep = NULL;
1023       VG_(OSetGen_ResetIter)(scope);
1024       while (True) {
1025          range = VG_(OSetGen_Next)(scope);
1026          if (!range) {
1027            /* We just saw the last one.  There must have been at
1028               least one entry in the range. */
1029            vg_assert(rangep);
1030            vg_assert(rangep->aMax == ~(Addr)0);
1031            break;
1032          }
1033 
1034          vg_assert(range->aMin <= range->aMax);
1035          vg_assert(range->vars);
1036 
1037          if (!rangep) {
1038            /* This is the first entry in the range. */
1039            vg_assert(range->aMin == 0);
1040          } else {
1041            vg_assert(rangep->aMax + 1 == range->aMin);
1042          }
1043 
1044          rangep = range;
1045          nInThisScope++;
1046       } /* iterating over ranges in a given scope */
1047 
1048       /* If there's only one entry in this (local) scope, it must
1049          cover the entire address space (obviously), but it must not
1050          contain any vars. */
1051 
1052       vg_assert(nInThisScope > 0);
1053       if (nInThisScope == 1) {
1054          Addr zero = 0;
1055          vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1056          range = VG_(OSetGen_Lookup)( scope, &zero );
1057          vg_assert(range);
1058          vg_assert(range->aMin == (Addr)0);
1059          vg_assert(range->aMax == ~(Addr)0);
1060          vg_assert(range->vars);
1061          vg_assert(VG_(sizeXA)(range->vars) == 0);
1062       }
1063 
1064    } /* iterate over scopes */
1065 }
1066 
1067 
1068 /*------------------------------------------------------------*/
1069 /*--- Canonicalisers                                       ---*/
1070 /*------------------------------------------------------------*/
1071 
1072 /* Sort the symtab by starting address, and emit warnings if any
1073    symbols have overlapping address ranges.  We use that old chestnut,
1074    shellsort.  Mash the table around so as to establish the property
1075    that addresses are in order and the ranges to not overlap.  This
1076    facilitates using binary search to map addresses to symbols when we
1077    come to query the table.
1078 */
compare_DiSym(void * va,void * vb)1079 static Int compare_DiSym ( void* va, void* vb )
1080 {
1081    DiSym* a = (DiSym*)va;
1082    DiSym* b = (DiSym*)vb;
1083    if (a->addr < b->addr) return -1;
1084    if (a->addr > b->addr) return  1;
1085    return 0;
1086 }
1087 
1088 
1089 /* Two symbols have the same address.  Which name do we prefer?  In order:
1090 
1091    - Prefer "PMPI_<foo>" over "MPI_<foo>".
1092 
1093    - Else, prefer a non-NULL name over a NULL one.
1094 
1095    - Else, prefer a non-whitespace name over an all-whitespace name.
1096 
1097    - Else, prefer the shorter symbol name.  If the symbol contains a
1098      version symbol ('@' on Linux, other platforms may differ), which means it
1099      is versioned, then the length up to the version symbol is used for length
1100      comparison purposes (so "foo@GLIBC_2.4.2" is considered shorter than
1101      "foobar").
1102 
1103    - Else, if two symbols have the same length, prefer a versioned symbol over
1104      a non-versioned symbol.
1105 
1106    - Else, use alphabetical ordering.
1107 
1108    - Otherwise, they must be the same;  use the symbol with the lower address.
1109 
1110    Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are
1111    aliases in glibc, we choose the 'bcmp' symbol because it's shorter,
1112    so we can misdescribe memcmp() as bcmp()).  This is hard to avoid.
1113    It's mentioned in the FAQ file.
1114  */
prefersym(struct _DebugInfo * di,DiSym * a,DiSym * b)1115 static DiSym* prefersym ( struct _DebugInfo* di, DiSym* a, DiSym* b )
1116 {
1117    Word cmp;
1118    Word vlena, vlenb;		/* length without version */
1119    const UChar *vpa, *vpb;
1120 
1121    Bool preferA = False;
1122    Bool preferB = False;
1123 
1124    vg_assert(a->addr == b->addr);
1125 
1126    vlena = VG_(strlen)(a->name);
1127    vlenb = VG_(strlen)(b->name);
1128 
1129 #if defined(VGO_linux) || defined(VGO_aix5)
1130 #  define VERSION_CHAR '@'
1131 #elif defined(VGO_darwin)
1132 #  define VERSION_CHAR '$'
1133 #else
1134 #  error Unknown OS
1135 #endif
1136 
1137    vpa = VG_(strchr)(a->name, VERSION_CHAR);
1138    vpb = VG_(strchr)(b->name, VERSION_CHAR);
1139 
1140    if (vpa)
1141       vlena = vpa - a->name;
1142    if (vpb)
1143       vlenb = vpb - b->name;
1144 
1145    /* MPI hack: prefer PMPI_Foo over MPI_Foo */
1146    if (0==VG_(strncmp)(a->name, "MPI_", 4)
1147        && 0==VG_(strncmp)(b->name, "PMPI_", 5)
1148        && 0==VG_(strcmp)(a->name, 1+b->name)) {
1149       preferB = True; goto out;
1150    }
1151    if (0==VG_(strncmp)(b->name, "MPI_", 4)
1152        && 0==VG_(strncmp)(a->name, "PMPI_", 5)
1153        && 0==VG_(strcmp)(b->name, 1+a->name)) {
1154       preferA = True; goto out;
1155    }
1156 
1157    /* Prefer non-empty name. */
1158    if (vlena  &&  !vlenb) {
1159       preferA = True; goto out;
1160    }
1161    if (vlenb  &&  !vlena) {
1162       preferB = True; goto out;
1163    }
1164 
1165    /* Prefer non-whitespace name. */
1166    {
1167       Bool blankA = True;
1168       Bool blankB = True;
1169       Char *s;
1170       s = a->name;
1171       while (*s) {
1172          if (!VG_(isspace)(*s++)) {
1173             blankA = False;
1174             break;
1175          }
1176       }
1177       s = b->name;
1178       while (*s) {
1179          if (!VG_(isspace)(*s++)) {
1180             blankB = False;
1181             break;
1182          }
1183       }
1184 
1185       if (!blankA  &&  blankB) {
1186          preferA = True; goto out;
1187       }
1188       if (!blankB  &&  blankA) {
1189          preferB = True; goto out;
1190       }
1191    }
1192 
1193    /* Select the shortest unversioned name */
1194    if (vlena < vlenb) {
1195       preferA = True; goto out;
1196    }
1197    if (vlenb < vlena) {
1198       preferB = True; goto out;
1199    }
1200 
1201    /* Equal lengths; select the versioned name */
1202    if (vpa && !vpb) {
1203       preferA = True; goto out;
1204    }
1205    if (vpb && !vpa) {
1206       preferB = True; goto out;
1207    }
1208 
1209    /* Either both versioned or neither is versioned; select them
1210       alphabetically */
1211    cmp = VG_(strcmp)(a->name, b->name);
1212    if (cmp < 0) {
1213       preferA = True; goto out;
1214    }
1215    if (cmp > 0) {
1216       preferB = True; goto out;
1217    }
1218 
1219    /* If we get here, they are the same name. */
1220 
1221    /* In this case we could choose either (arbitrarily), but might as
1222       well choose the one with the lowest DiSym* address, so as to try
1223       and make the comparison mechanism more stable (a la sorting
1224       parlance).  Also, skip the diagnostic printing in this case. */
1225    return a <= b  ? a  : b;
1226 
1227    /*NOTREACHED*/
1228    vg_assert(0);
1229   out:
1230    if (preferA && !preferB) {
1231       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1232                    a->addr, a->name, b->name );
1233       return a;
1234    }
1235    if (preferB && !preferA) {
1236       TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1237                    b->addr, b->name, a->name );
1238       return b;
1239    }
1240    /*NOTREACHED*/
1241    vg_assert(0);
1242 }
1243 
canonicaliseSymtab(struct _DebugInfo * di)1244 static void canonicaliseSymtab ( struct _DebugInfo* di )
1245 {
1246    Word  i, j, n_merged, n_truncated;
1247    Addr  s1, s2, e1, e2, p1, p2;
1248    UChar *n1, *n2;
1249    Bool t1, t2, f1, f2;
1250 
1251 #  define SWAP(ty,aa,bb) \
1252       do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
1253 
1254    if (di->symtab_used == 0)
1255       return;
1256 
1257    VG_(ssort)(di->symtab, di->symtab_used,
1258                           sizeof(*di->symtab), compare_DiSym);
1259 
1260   cleanup_more:
1261 
1262    /* If two symbols have identical address ranges, we pick one
1263       using prefersym() (see it for details). */
1264    do {
1265       n_merged = 0;
1266       j = di->symtab_used;
1267       di->symtab_used = 0;
1268       for (i = 0; i < j; i++) {
1269          if (i < j-1
1270              && di->symtab[i].addr   == di->symtab[i+1].addr
1271              && di->symtab[i].size   == di->symtab[i+1].size
1272              ) {
1273             n_merged++;
1274             /* merge the two into one */
1275 	    di->symtab[di->symtab_used++]
1276                = *prefersym(di, &di->symtab[i], &di->symtab[i+1]);
1277             i++;
1278          } else {
1279             di->symtab[di->symtab_used++] = di->symtab[i];
1280          }
1281       }
1282       TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
1283    }
1284    while (n_merged > 0);
1285 
1286    /* Detect and "fix" overlapping address ranges. */
1287    n_truncated = 0;
1288 
1289    for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
1290 
1291       vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr);
1292 
1293       /* Check for common (no overlap) case. */
1294       if (di->symtab[i].addr + di->symtab[i].size
1295           <= di->symtab[i+1].addr)
1296          continue;
1297 
1298       /* There's an overlap.  Truncate one or the other. */
1299       if (di->trace_symtab) {
1300          VG_(printf)("overlapping address ranges in symbol table\n\t");
1301          ML_(ppSym)( i, &di->symtab[i] );
1302          VG_(printf)("\t");
1303          ML_(ppSym)( i+1, &di->symtab[i+1] );
1304          VG_(printf)("\n");
1305       }
1306 
1307       /* Truncate one or the other. */
1308       s1 = di->symtab[i].addr;
1309       e1 = s1 + di->symtab[i].size - 1;
1310       p1 = di->symtab[i].tocptr;
1311       n1 = di->symtab[i].name;
1312       t1 = di->symtab[i].isText;
1313       f1 = di->symtab[i].isIFunc;
1314       s2 = di->symtab[i+1].addr;
1315       e2 = s2 + di->symtab[i+1].size - 1;
1316       p2 = di->symtab[i+1].tocptr;
1317       n2 = di->symtab[i+1].name;
1318       t2 = di->symtab[i+1].isText;
1319       f2 = di->symtab[i+1].isIFunc;
1320       if (s1 < s2) {
1321          e1 = s2-1;
1322       } else {
1323          vg_assert(s1 == s2);
1324          if (e1 > e2) {
1325             s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); SWAP(Addr,p1,p2);
1326                        SWAP(UChar *,n1,n2); SWAP(Bool,t1,t2);
1327          } else
1328          if (e1 < e2) {
1329             s2 = e1+1;
1330          } else {
1331 	   /* e1 == e2.  Identical addr ranges.  We'll eventually wind
1332               up back at cleanup_more, which will take care of it. */
1333 	 }
1334       }
1335       di->symtab[i].addr    = s1;
1336       di->symtab[i].size    = e1 - s1 + 1;
1337       di->symtab[i].tocptr  = p1;
1338       di->symtab[i].name    = n1;
1339       di->symtab[i].isText  = t1;
1340       di->symtab[i].isIFunc = f1;
1341       di->symtab[i+1].addr    = s2;
1342       di->symtab[i+1].size    = e2 - s2 + 1;
1343       di->symtab[i+1].tocptr  = p2;
1344       di->symtab[i+1].name    = n2;
1345       di->symtab[i+1].isText  = t2;
1346       di->symtab[i+1].isIFunc = f2;
1347       vg_assert(s1 <= s2);
1348       vg_assert(di->symtab[i].size > 0);
1349       vg_assert(di->symtab[i+1].size > 0);
1350       /* It may be that the i+1 entry now needs to be moved further
1351          along to maintain the address order requirement. */
1352       j = i+1;
1353       while (j < ((Word)di->symtab_used)-1
1354              && di->symtab[j].addr > di->symtab[j+1].addr) {
1355          SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
1356          j++;
1357       }
1358       n_truncated++;
1359    }
1360 
1361    if (n_truncated > 0) goto cleanup_more;
1362 
1363    /* Ensure relevant postconditions hold. */
1364    for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1365       /* No zero-sized symbols. */
1366       vg_assert(di->symtab[i].size > 0);
1367       /* In order. */
1368       vg_assert(di->symtab[i].addr < di->symtab[i+1].addr);
1369       /* No overlaps. */
1370       vg_assert(di->symtab[i].addr + di->symtab[i].size - 1
1371                 < di->symtab[i+1].addr);
1372    }
1373 #  undef SWAP
1374 }
1375 
1376 
1377 /* Sort the location table by starting address.  Mash the table around
1378    so as to establish the property that addresses are in order and the
1379    ranges do not overlap.  This facilitates using binary search to map
1380    addresses to locations when we come to query the table.
1381 */
compare_DiLoc(void * va,void * vb)1382 static Int compare_DiLoc ( void* va, void* vb )
1383 {
1384    DiLoc* a = (DiLoc*)va;
1385    DiLoc* b = (DiLoc*)vb;
1386    if (a->addr < b->addr) return -1;
1387    if (a->addr > b->addr) return  1;
1388    return 0;
1389 }
1390 
canonicaliseLoctab(struct _DebugInfo * di)1391 static void canonicaliseLoctab ( struct _DebugInfo* di )
1392 {
1393    Word i, j;
1394 
1395 #  define SWAP(ty,aa,bb) \
1396       do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
1397 
1398    if (di->loctab_used == 0)
1399       return;
1400 
1401    /* Sort by start address. */
1402    VG_(ssort)(di->loctab, di->loctab_used,
1403                           sizeof(*di->loctab), compare_DiLoc);
1404 
1405    /* If two adjacent entries overlap, truncate the first. */
1406    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1407       vg_assert(di->loctab[i].size < 10000);
1408       if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
1409          /* Do this in signed int32 because the actual .size fields
1410             are only 12 bits. */
1411          Int new_size = di->loctab[i+1].addr - di->loctab[i].addr;
1412          if (new_size < 0) {
1413             di->loctab[i].size = 0;
1414          } else
1415          if (new_size > MAX_LOC_SIZE) {
1416            di->loctab[i].size = MAX_LOC_SIZE;
1417          } else {
1418            di->loctab[i].size = (UShort)new_size;
1419          }
1420       }
1421    }
1422 
1423    /* Zap any zero-sized entries resulting from the truncation
1424       process. */
1425    j = 0;
1426    for (i = 0; i < (Word)di->loctab_used; i++) {
1427       if (di->loctab[i].size > 0) {
1428          if (j != i)
1429             di->loctab[j] = di->loctab[i];
1430          j++;
1431       }
1432    }
1433    di->loctab_used = j;
1434 
1435    /* Ensure relevant postconditions hold. */
1436    for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1437       /*
1438       VG_(printf)("%d   (%d) %d 0x%x\n",
1439                    i, di->loctab[i+1].confident,
1440                    di->loctab[i+1].size, di->loctab[i+1].addr );
1441       */
1442       /* No zero-sized symbols. */
1443       vg_assert(di->loctab[i].size > 0);
1444       /* In order. */
1445       vg_assert(di->loctab[i].addr < di->loctab[i+1].addr);
1446       /* No overlaps. */
1447       vg_assert(di->loctab[i].addr + di->loctab[i].size - 1
1448                 < di->loctab[i+1].addr);
1449    }
1450 #  undef SWAP
1451 }
1452 
1453 
1454 /* Sort the call-frame-info table by starting address.  Mash the table
1455    around so as to establish the property that addresses are in order
1456    and the ranges do not overlap.  This facilitates using binary
1457    search to map addresses to locations when we come to query the
1458    table.
1459 
1460    Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of
1461    any of the address ranges contained in cfisi[0 .. cfisi_used-1], so
1462    as to facilitate rapidly skipping this SegInfo when looking for an
1463    address which falls outside that range.
1464 */
compare_DiCfSI(void * va,void * vb)1465 static Int compare_DiCfSI ( void* va, void* vb )
1466 {
1467    DiCfSI* a = (DiCfSI*)va;
1468    DiCfSI* b = (DiCfSI*)vb;
1469    if (a->base < b->base) return -1;
1470    if (a->base > b->base) return  1;
1471    return 0;
1472 }
1473 
ML_(canonicaliseCFI)1474 void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
1475 {
1476    Word  i, j;
1477    const Addr minAvma = 0;
1478    const Addr maxAvma = ~minAvma;
1479 
1480    /* Note: take care in here.  di->cfsi can be NULL, in which
1481       case _used and _size fields will be zero. */
1482    if (di->cfsi == NULL) {
1483       vg_assert(di->cfsi_used == 0);
1484       vg_assert(di->cfsi_size == 0);
1485    }
1486 
1487    /* Set cfsi_minavma and cfsi_maxavma to summarise the entire
1488       address range contained in cfsi[0 .. cfsi_used-1]. */
1489    di->cfsi_minavma = maxAvma;
1490    di->cfsi_maxavma = minAvma;
1491    for (i = 0; i < (Word)di->cfsi_used; i++) {
1492       Addr here_min = di->cfsi[i].base;
1493       Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
1494       if (here_min < di->cfsi_minavma)
1495          di->cfsi_minavma = here_min;
1496       if (here_max > di->cfsi_maxavma)
1497          di->cfsi_maxavma = here_max;
1498    }
1499 
1500    if (di->trace_cfi)
1501       VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
1502                   di->cfsi_used,
1503 	          di->cfsi_minavma, di->cfsi_maxavma);
1504 
1505    /* Sort the cfsi array by base address. */
1506    VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
1507 
1508    /* If two adjacent entries overlap, truncate the first. */
1509    for (i = 0; i < (Word)di->cfsi_used-1; i++) {
1510       if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
1511          Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
1512          /* how could it be otherwise?  The entries are sorted by the
1513             .base field. */
1514          vg_assert(new_len >= 0);
1515 	 vg_assert(new_len <= di->cfsi[i].len);
1516          di->cfsi[i].len = new_len;
1517       }
1518    }
1519 
1520    /* Zap any zero-sized entries resulting from the truncation
1521       process. */
1522    j = 0;
1523    for (i = 0; i < (Word)di->cfsi_used; i++) {
1524       if (di->cfsi[i].len > 0) {
1525          if (j != i)
1526             di->cfsi[j] = di->cfsi[i];
1527          j++;
1528       }
1529    }
1530    /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */
1531    di->cfsi_used = j;
1532 
1533    /* Ensure relevant postconditions hold. */
1534    for (i = 0; i < (Word)di->cfsi_used; i++) {
1535       /* No zero-length ranges. */
1536       vg_assert(di->cfsi[i].len > 0);
1537       /* Makes sense w.r.t. summary address range */
1538       vg_assert(di->cfsi[i].base >= di->cfsi_minavma);
1539       vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1540                 <= di->cfsi_maxavma);
1541 
1542       if (i < di->cfsi_used - 1) {
1543          /*
1544          if (!(di->cfsi[i].base < di->cfsi[i+1].base)) {
1545             VG_(printf)("\nOOO cfsis:\n");
1546             ML_(ppCfiSI)(&di->cfsi[i]);
1547             ML_(ppCfiSI)(&di->cfsi[i+1]);
1548          }
1549          */
1550          /* In order. */
1551          vg_assert(di->cfsi[i].base < di->cfsi[i+1].base);
1552          /* No overlaps. */
1553          vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1554                    < di->cfsi[i+1].base);
1555       }
1556    }
1557 
1558 }
1559 
1560 
1561 /* Canonicalise the tables held by 'di', in preparation for use.  Call
1562    this after finishing adding entries to these tables. */
ML_(canonicaliseTables)1563 void ML_(canonicaliseTables) ( struct _DebugInfo* di )
1564 {
1565    canonicaliseSymtab ( di );
1566    canonicaliseLoctab ( di );
1567    ML_(canonicaliseCFI) ( di );
1568    canonicaliseVarInfo ( di );
1569 }
1570 
1571 
1572 /*------------------------------------------------------------*/
1573 /*--- Searching the tables                                 ---*/
1574 /*------------------------------------------------------------*/
1575 
1576 /* Find a symbol-table index containing the specified pointer, or -1
1577    if not found.  Binary search.  */
1578 
ML_(search_one_symtab)1579 Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
1580                               Bool match_anywhere_in_sym,
1581                               Bool findText )
1582 {
1583    Addr a_mid_lo, a_mid_hi;
1584    Word mid, size,
1585         lo = 0,
1586         hi = di->symtab_used-1;
1587    while (True) {
1588       /* current unsearched space is from lo to hi, inclusive. */
1589       if (lo > hi) return -1; /* not found */
1590       mid      = (lo + hi) / 2;
1591       a_mid_lo = di->symtab[mid].addr;
1592       size = ( match_anywhere_in_sym
1593              ? di->symtab[mid].size
1594              : 1);
1595       a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1;
1596 
1597       if (ptr < a_mid_lo) { hi = mid-1; continue; }
1598       if (ptr > a_mid_hi) { lo = mid+1; continue; }
1599       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1600       /* Found a symbol with the correct address range.  But is it
1601          of the right kind (text vs data) ? */
1602       if (  findText   &&   di->symtab[mid].isText  ) return mid;
1603       if ( (!findText) && (!di->symtab[mid].isText) ) return mid;
1604       return -1;
1605    }
1606 }
1607 
1608 
1609 /* Find a location-table index containing the specified pointer, or -1
1610    if not found.  Binary search.  */
1611 
ML_(search_one_loctab)1612 Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
1613 {
1614    Addr a_mid_lo, a_mid_hi;
1615    Word mid,
1616         lo = 0,
1617         hi = di->loctab_used-1;
1618    while (True) {
1619       /* current unsearched space is from lo to hi, inclusive. */
1620       if (lo > hi) return -1; /* not found */
1621       mid      = (lo + hi) / 2;
1622       a_mid_lo = di->loctab[mid].addr;
1623       a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1;
1624 
1625       if (ptr < a_mid_lo) { hi = mid-1; continue; }
1626       if (ptr > a_mid_hi) { lo = mid+1; continue; }
1627       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1628       return mid;
1629    }
1630 }
1631 
1632 
1633 /* Find a CFI-table index containing the specified pointer, or -1
1634    if not found.  Binary search.  */
1635 
ML_(search_one_cfitab)1636 Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
1637 {
1638    Addr a_mid_lo, a_mid_hi;
1639    Word mid, size,
1640         lo = 0,
1641         hi = di->cfsi_used-1;
1642    while (True) {
1643       /* current unsearched space is from lo to hi, inclusive. */
1644       if (lo > hi) return -1; /* not found */
1645       mid      = (lo + hi) / 2;
1646       a_mid_lo = di->cfsi[mid].base;
1647       size     = di->cfsi[mid].len;
1648       a_mid_hi = a_mid_lo + size - 1;
1649       vg_assert(a_mid_hi >= a_mid_lo);
1650       if (ptr < a_mid_lo) { hi = mid-1; continue; }
1651       if (ptr > a_mid_hi) { lo = mid+1; continue; }
1652       vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1653       return mid;
1654    }
1655 }
1656 
1657 
1658 /* Find a FPO-table index containing the specified pointer, or -1
1659    if not found.  Binary search.  */
1660 
ML_(search_one_fpotab)1661 Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr )
1662 {
1663    Addr const addr = ptr - di->rx_map_avma;
1664    Addr a_mid_lo, a_mid_hi;
1665    Word mid, size,
1666         lo = 0,
1667         hi = di->fpo_size-1;
1668    while (True) {
1669       /* current unsearched space is from lo to hi, inclusive. */
1670       if (lo > hi) return -1; /* not found */
1671       mid      = (lo + hi) / 2;
1672       a_mid_lo = di->fpo[mid].ulOffStart;
1673       size     = di->fpo[mid].cbProcSize;
1674       a_mid_hi = a_mid_lo + size - 1;
1675       vg_assert(a_mid_hi >= a_mid_lo);
1676       if (addr < a_mid_lo) { hi = mid-1; continue; }
1677       if (addr > a_mid_hi) { lo = mid+1; continue; }
1678       vg_assert(addr >= a_mid_lo && addr <= a_mid_hi);
1679       return mid;
1680    }
1681 }
1682 
1683 /*--------------------------------------------------------------------*/
1684 /*--- end                                                          ---*/
1685 /*--------------------------------------------------------------------*/
1686