1
2 /*--------------------------------------------------------------------*/
3 /*--- Reading of syms & debug info from Mach-O files. ---*/
4 /*--- readmacho.c ---*/
5 /*--------------------------------------------------------------------*/
6
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
10
11 Copyright (C) 2005-2012 Apple Inc.
12 Greg Parker gparker@apple.com
13
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
29 The GNU General Public License is contained in the file COPYING.
30 */
31
32 #if defined(VGO_darwin)
33
34 #include "pub_core_basics.h"
35 #include "pub_core_vki.h"
36 #include "pub_core_libcbase.h"
37 #include "pub_core_libcprint.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcfile.h"
40 #include "pub_core_libcproc.h"
41 #include "pub_core_aspacemgr.h" /* for mmaping debuginfo files */
42 #include "pub_core_machine.h" /* VG_ELF_CLASS */
43 #include "pub_core_options.h"
44 #include "pub_core_oset.h"
45 #include "pub_core_tooliface.h" /* VG_(needs) */
46 #include "pub_core_xarray.h"
47 #include "pub_core_clientstate.h"
48 #include "pub_core_debuginfo.h"
49
50 #include "priv_d3basics.h"
51 #include "priv_misc.h"
52 #include "priv_tytypes.h"
53 #include "priv_storage.h"
54 #include "priv_readmacho.h"
55 #include "priv_readdwarf.h"
56 #include "priv_readdwarf3.h"
57 #include "priv_readstabs.h"
58
59 /* --- !!! --- EXTERNAL HEADERS start --- !!! --- */
60 #include <mach-o/loader.h>
61 #include <mach-o/nlist.h>
62 #include <mach-o/fat.h>
63 /* --- !!! --- EXTERNAL HEADERS end --- !!! --- */
64
65 #if VG_WORDSIZE == 4
66 # define MAGIC MH_MAGIC
67 # define MACH_HEADER mach_header
68 # define LC_SEGMENT_CMD LC_SEGMENT
69 # define SEGMENT_COMMAND segment_command
70 # define SECTION section
71 # define NLIST nlist
72 #else
73 # define MAGIC MH_MAGIC_64
74 # define MACH_HEADER mach_header_64
75 # define LC_SEGMENT_CMD LC_SEGMENT_64
76 # define SEGMENT_COMMAND segment_command_64
77 # define SECTION section_64
78 # define NLIST nlist_64
79 #endif
80
81
82 /*------------------------------------------------------------*/
83 /*--- ---*/
84 /*--- Mach-O file mapping/unmapping helpers ---*/
85 /*--- ---*/
86 /*------------------------------------------------------------*/
87
88 typedef
89 struct {
90 /* These two describe the entire mapped-in ("primary") image,
91 fat headers, kitchen sink, whatnot: the entire file. The
92 image is mapped into img[0 .. img_szB-1]. */
93 UChar* img;
94 SizeT img_szB;
95 /* These two describe the Mach-O object of interest, which is
96 presumably somewhere inside the primary image.
97 map_image_aboard() below, which generates this info, will
98 carefully check that the macho_ fields denote a section of
99 memory that falls entirely inside img[0 .. img_szB-1]. */
100 UChar* macho_img;
101 SizeT macho_img_szB;
102 }
103 ImageInfo;
104
105
ML_(is_macho_object_file)106 Bool ML_(is_macho_object_file)( const void* buf, SizeT szB )
107 {
108 /* (JRS: the Mach-O headers might not be in this mapped data,
109 because we only mapped a page for this initial check,
110 or at least not very much, and what's at the start of the file
111 is in general a so-called fat header. The Mach-O object we're
112 interested in could be arbitrarily far along the image, and so
113 we can't assume its header will fall within this page.) */
114
115 /* But we can say that either it's a fat object, in which case it
116 begins with a fat header, or it's unadorned Mach-O, in which
117 case it starts with a normal header. At least do what checks we
118 can to establish whether or not we're looking at something
119 sane. */
120
121 const struct fat_header* fh_be = buf;
122 const struct MACH_HEADER* mh = buf;
123
124 vg_assert(buf);
125 if (szB < sizeof(struct fat_header))
126 return False;
127 if (VG_(ntohl)(fh_be->magic) == FAT_MAGIC)
128 return True;
129
130 if (szB < sizeof(struct MACH_HEADER))
131 return False;
132 if (mh->magic == MAGIC)
133 return True;
134
135 return False;
136 }
137
138
139 /* Unmap an image mapped in by map_image_aboard. */
unmap_image(ImageInfo * ii)140 static void unmap_image ( /*MOD*/ImageInfo* ii )
141 {
142 SysRes sres;
143 vg_assert(ii->img);
144 vg_assert(ii->img_szB > 0);
145 sres = VG_(am_munmap_valgrind)( (Addr)ii->img, ii->img_szB );
146 /* Do we care if this fails? I suppose so; it would indicate
147 some fairly serious snafu with the mapping of the file. */
148 vg_assert( !sr_isError(sres) );
149 VG_(memset)(ii, 0, sizeof(*ii));
150 }
151
152
153 /* Map a given fat or thin object aboard, find the thin part if
154 necessary, do some checks, and write details of both the fat and
155 thin parts into *ii. Returns False (and leaves the file unmapped)
156 on failure. Guarantees to return pointers to a valid(ish) Mach-O
157 image if it succeeds. */
map_image_aboard(DebugInfo * di,ImageInfo * ii,UChar * filename)158 static Bool map_image_aboard ( DebugInfo* di, /* only for err msgs */
159 /*OUT*/ImageInfo* ii, UChar* filename )
160 {
161 VG_(memset)(ii, 0, sizeof(*ii));
162
163 /* First off, try to map the thing in. */
164 { SizeT size;
165 SysRes fd, sres;
166 struct vg_stat stat_buf;
167
168 fd = VG_(stat)(filename, &stat_buf);
169 if (sr_isError(fd)) {
170 ML_(symerr)(di, True, "Can't stat image (to determine its size)?!");
171 return False;
172 }
173 size = stat_buf.size;
174
175 fd = VG_(open)(filename, VKI_O_RDONLY, 0);
176 if (sr_isError(fd)) {
177 ML_(symerr)(di, True, "Can't open image to read symbols?!");
178 return False;
179 }
180
181 sres = VG_(am_mmap_file_float_valgrind)
182 ( size, VKI_PROT_READ, sr_Res(fd), 0 );
183 if (sr_isError(sres)) {
184 ML_(symerr)(di, True, "Can't mmap image to read symbols?!");
185 return False;
186 }
187
188 VG_(close)(sr_Res(fd));
189
190 ii->img = (UChar*)sr_Res(sres);
191 ii->img_szB = size;
192 }
193
194 /* Now it's mapped in and we have .img and .img_szB set. Look for
195 the embedded Mach-O object. If not findable, unmap and fail. */
196 { struct fat_header* fh_be;
197 struct fat_header fh;
198 struct MACH_HEADER* mh;
199
200 // Assume initially that we have a thin image, and update
201 // these if it turns out to be fat.
202 ii->macho_img = ii->img;
203 ii->macho_img_szB = ii->img_szB;
204
205 // Check for fat header.
206 if (ii->img_szB < sizeof(struct fat_header)) {
207 ML_(symerr)(di, True, "Invalid Mach-O file (0 too small).");
208 goto unmap_and_fail;
209 }
210
211 // Fat header is always BIG-ENDIAN
212 fh_be = (struct fat_header *)ii->img;
213 fh.magic = VG_(ntohl)(fh_be->magic);
214 fh.nfat_arch = VG_(ntohl)(fh_be->nfat_arch);
215 if (fh.magic == FAT_MAGIC) {
216 // Look for a good architecture.
217 struct fat_arch *arch_be;
218 struct fat_arch arch;
219 Int f;
220 if (ii->img_szB < sizeof(struct fat_header)
221 + fh.nfat_arch * sizeof(struct fat_arch)) {
222 ML_(symerr)(di, True, "Invalid Mach-O file (1 too small).");
223 goto unmap_and_fail;
224 }
225 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
226 f < fh.nfat_arch;
227 f++, arch_be++) {
228 Int cputype;
229 # if defined(VGA_ppc)
230 cputype = CPU_TYPE_POWERPC;
231 # elif defined(VGA_ppc64)
232 cputype = CPU_TYPE_POWERPC64;
233 # elif defined(VGA_x86)
234 cputype = CPU_TYPE_X86;
235 # elif defined(VGA_amd64)
236 cputype = CPU_TYPE_X86_64;
237 # else
238 # error "unknown architecture"
239 # endif
240 arch.cputype = VG_(ntohl)(arch_be->cputype);
241 arch.cpusubtype = VG_(ntohl)(arch_be->cpusubtype);
242 arch.offset = VG_(ntohl)(arch_be->offset);
243 arch.size = VG_(ntohl)(arch_be->size);
244 if (arch.cputype == cputype) {
245 if (ii->img_szB < arch.offset + arch.size) {
246 ML_(symerr)(di, True, "Invalid Mach-O file (2 too small).");
247 goto unmap_and_fail;
248 }
249 ii->macho_img = ii->img + arch.offset;
250 ii->macho_img_szB = arch.size;
251 break;
252 }
253 }
254 if (f == fh.nfat_arch) {
255 ML_(symerr)(di, True,
256 "No acceptable architecture found in fat file.");
257 goto unmap_and_fail;
258 }
259 }
260
261 /* Sanity check what we found. */
262
263 /* assured by logic above */
264 vg_assert(ii->img_szB >= sizeof(struct fat_header));
265
266 if (ii->macho_img_szB < sizeof(struct MACH_HEADER)) {
267 ML_(symerr)(di, True, "Invalid Mach-O file (3 too small).");
268 goto unmap_and_fail;
269 }
270
271 if (ii->macho_img_szB > ii->img_szB) {
272 ML_(symerr)(di, True, "Invalid Mach-O file (thin bigger than fat).");
273 goto unmap_and_fail;
274 }
275
276 if (ii->macho_img >= ii->img
277 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
278 /* thin entirely within fat, as expected */
279 } else {
280 ML_(symerr)(di, True, "Invalid Mach-O file (thin not inside fat).");
281 goto unmap_and_fail;
282 }
283
284 mh = (struct MACH_HEADER *)ii->macho_img;
285 if (mh->magic != MAGIC) {
286 ML_(symerr)(di, True, "Invalid Mach-O file (bad magic).");
287 goto unmap_and_fail;
288 }
289
290 if (ii->macho_img_szB < sizeof(struct MACH_HEADER) + mh->sizeofcmds) {
291 ML_(symerr)(di, True, "Invalid Mach-O file (4 too small).");
292 goto unmap_and_fail;
293 }
294 }
295
296 vg_assert(ii->img);
297 vg_assert(ii->macho_img);
298 vg_assert(ii->img_szB > 0);
299 vg_assert(ii->macho_img_szB > 0);
300 vg_assert(ii->macho_img >= ii->img);
301 vg_assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
302 return True; /* success */
303 /*NOTREACHED*/
304
305 unmap_and_fail:
306 unmap_image(ii);
307 return False; /* bah! */
308 }
309
310
311 /*------------------------------------------------------------*/
312 /*--- ---*/
313 /*--- Mach-O symbol table reading ---*/
314 /*--- ---*/
315 /*------------------------------------------------------------*/
316
317 /* Read a symbol table (nlist). Add the resulting candidate symbols
318 to 'syms'; the caller will post-process them and hand them off to
319 ML_(addSym) itself. */
320 static
read_symtab(XArray * syms,struct _DebugInfo * di,struct NLIST * o_symtab,UInt o_symtab_count,UChar * o_strtab,UInt o_strtab_sz)321 void read_symtab( /*OUT*/XArray* /* DiSym */ syms,
322 struct _DebugInfo* di,
323 struct NLIST* o_symtab, UInt o_symtab_count,
324 UChar* o_strtab, UInt o_strtab_sz )
325 {
326 Int i;
327 Addr sym_addr;
328 DiSym disym;
329 UChar* name;
330
331 static UChar* s_a_t_v = NULL; /* do not make non-static */
332
333 for (i = 0; i < o_symtab_count; i++) {
334 struct NLIST *nl = o_symtab+i;
335 if ((nl->n_type & N_TYPE) == N_SECT) {
336 sym_addr = di->text_bias + nl->n_value;
337 /*} else if ((nl->n_type & N_TYPE) == N_ABS) {
338 GrP fixme don't ignore absolute symbols?
339 sym_addr = nl->n_value; */
340 } else {
341 continue;
342 }
343
344 if (di->trace_symtab)
345 VG_(printf)("nlist raw: avma %010lx %s\n",
346 sym_addr, o_strtab + nl->n_un.n_strx );
347
348 /* If no part of the symbol falls within the mapped range,
349 ignore it. */
350 if (sym_addr <= di->text_avma
351 || sym_addr >= di->text_avma+di->text_size) {
352 continue;
353 }
354
355 /* skip names which point outside the string table;
356 following these risks segfaulting Valgrind */
357 name = o_strtab + nl->n_un.n_strx;
358 if (name < o_strtab || name >= o_strtab + o_strtab_sz)
359 continue;
360
361 /* skip nameless symbols; these appear to be common, but
362 useless */
363 if (*name == 0)
364 continue;
365
366 disym.addr = sym_addr;
367 disym.tocptr = 0;
368 disym.pri_name = ML_(addStr)(di, name, -1);
369 disym.sec_names = NULL;
370 disym.size = // let canonicalize fix it
371 di->text_avma+di->text_size - sym_addr;
372 disym.isText = True;
373 disym.isIFunc = False;
374 // Lots of user function names get prepended with an underscore. Eg. the
375 // function 'f' becomes the symbol '_f'. And the "below main"
376 // function is called "start". So we skip the leading underscore, and
377 // if we see 'start' and --show-below-main=no, we rename it as
378 // "start_according_to_valgrind", which makes it easy to spot later
379 // and display as "(below main)".
380 if (disym.pri_name[0] == '_') {
381 disym.pri_name++;
382 }
383 else if (!VG_(clo_show_below_main) && VG_STREQ(disym.pri_name, "start")) {
384 if (s_a_t_v == NULL)
385 s_a_t_v = ML_(addStr)(di, "start_according_to_valgrind", -1);
386 vg_assert(s_a_t_v);
387 disym.pri_name = s_a_t_v;
388 }
389
390 vg_assert(disym.pri_name);
391 VG_(addToXA)( syms, &disym );
392 }
393 }
394
395
396 /* Compare DiSyms by their start address, and for equal addresses, use
397 the primary name as a secondary sort key. */
cmp_DiSym_by_start_then_name(void * v1,void * v2)398 static Int cmp_DiSym_by_start_then_name ( void* v1, void* v2 )
399 {
400 DiSym* s1 = (DiSym*)v1;
401 DiSym* s2 = (DiSym*)v2;
402 if (s1->addr < s2->addr) return -1;
403 if (s1->addr > s2->addr) return 1;
404 return VG_(strcmp)(s1->pri_name, s2->pri_name);
405 }
406
407 /* 'cand' is a bunch of candidate symbols obtained by reading
408 nlist-style symbol table entries. Their ends may overlap, so sort
409 them and truncate them accordingly. The code in this routine is
410 copied almost verbatim from read_symbol_table() in readxcoff.c. */
tidy_up_cand_syms(XArray * syms,Bool trace_symtab)411 static void tidy_up_cand_syms ( /*MOD*/XArray* /* of DiSym */ syms,
412 Bool trace_symtab )
413 {
414 Word nsyms, i, j, k, m;
415
416 nsyms = VG_(sizeXA)(syms);
417
418 VG_(setCmpFnXA)(syms, cmp_DiSym_by_start_then_name);
419 VG_(sortXA)(syms);
420
421 /* We only know for sure the start addresses (actual VMAs) of
422 symbols, and an overestimation of their end addresses. So sort
423 by start address, then clip each symbol so that its end address
424 does not overlap with the next one along.
425
426 There is a small refinement: if a group of symbols have the same
427 address, treat them as a group: find the next symbol along that
428 has a higher start address, and clip all of the group
429 accordingly. This clips the group as a whole so as not to
430 overlap following symbols. This leaves prefersym() in
431 storage.c, which is not nlist-specific, to later decide which of
432 the symbols in the group to keep.
433
434 Another refinement is that we need to get rid of symbols which,
435 after clipping, have identical starts, ends, and names. So the
436 sorting uses the name as a secondary key.
437 */
438
439 for (i = 0; i < nsyms; i++) {
440 for (k = i+1;
441 k < nsyms
442 && ((DiSym*)VG_(indexXA)(syms,i))->addr
443 == ((DiSym*)VG_(indexXA)(syms,k))->addr;
444 k++)
445 ;
446 /* So now [i .. k-1] is a group all with the same start address.
447 Clip their ending addresses so they don't overlap [k]. In
448 the normal case (no overlaps), k == i+1. */
449 if (k < nsyms) {
450 DiSym* next = (DiSym*)VG_(indexXA)(syms,k);
451 for (m = i; m < k; m++) {
452 DiSym* here = (DiSym*)VG_(indexXA)(syms,m);
453 vg_assert(here->addr < next->addr);
454 if (here->addr + here->size > next->addr)
455 here->size = next->addr - here->addr;
456 }
457 }
458 i = k-1;
459 vg_assert(i <= nsyms);
460 }
461
462 j = 0;
463 if (nsyms > 0) {
464 j = 1;
465 for (i = 1; i < nsyms; i++) {
466 DiSym *s_j1, *s_j, *s_i;
467 vg_assert(j <= i);
468 s_j1 = (DiSym*)VG_(indexXA)(syms, j-1);
469 s_j = (DiSym*)VG_(indexXA)(syms, j);
470 s_i = (DiSym*)VG_(indexXA)(syms, i);
471 if (s_i->addr != s_j1->addr
472 || s_i->size != s_j1->size
473 || 0 != VG_(strcmp)(s_i->pri_name, s_j1->pri_name)) {
474 *s_j = *s_i;
475 j++;
476 } else {
477 if (trace_symtab)
478 VG_(printf)("nlist cleanup: dump duplicate avma %010lx %s\n",
479 s_i->addr, s_i->pri_name );
480 }
481 }
482 }
483 vg_assert(j >= 0 && j <= nsyms);
484 VG_(dropTailXA)(syms, nsyms - j);
485 }
486
487
488 /*------------------------------------------------------------*/
489 /*--- ---*/
490 /*--- Mach-O top-level processing ---*/
491 /*--- ---*/
492 /*------------------------------------------------------------*/
493
494 #if !defined(APPLE_DSYM_EXT_AND_SUBDIRECTORY)
495 #define APPLE_DSYM_EXT_AND_SUBDIRECTORY ".dSYM/Contents/Resources/DWARF/"
496 #endif
497
498
file_exists_p(const Char * path)499 static Bool file_exists_p(const Char *path)
500 {
501 struct vg_stat sbuf;
502 SysRes res = VG_(stat)(path, &sbuf);
503 return sr_isError(res) ? False : True;
504 }
505
506
507 /* Search for an existing dSYM file as a possible separate debug file.
508 Adapted from gdb. */
509 static Char *
find_separate_debug_file(const Char * executable_name)510 find_separate_debug_file (const Char *executable_name)
511 {
512 Char *basename_str;
513 Char *dot_ptr;
514 Char *slash_ptr;
515 Char *dsymfile;
516
517 /* Make sure the object file name itself doesn't contain ".dSYM" in it or we
518 will end up with an infinite loop where after we add a dSYM symbol file,
519 it will then enter this function asking if there is a debug file for the
520 dSYM file itself. */
521 if (VG_(strcasestr) (executable_name, ".dSYM") == NULL)
522 {
523 /* Check for the existence of a .dSYM file for a given executable. */
524 basename_str = VG_(basename) (executable_name);
525 dsymfile = ML_(dinfo_zalloc)("di.readmacho.dsymfile",
526 VG_(strlen) (executable_name)
527 + VG_(strlen) (APPLE_DSYM_EXT_AND_SUBDIRECTORY)
528 + VG_(strlen) (basename_str)
529 + 1
530 );
531
532 /* First try for the dSYM in the same directory as the original file. */
533 VG_(strcpy) (dsymfile, executable_name);
534 VG_(strcat) (dsymfile, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
535 VG_(strcat) (dsymfile, basename_str);
536
537 if (file_exists_p (dsymfile))
538 return dsymfile;
539
540 /* Now search for any parent directory that has a '.' in it so we can find
541 Mac OS X applications, bundles, plugins, and any other kinds of files.
542 Mac OS X application bundles wil have their program in
543 "/some/path/MyApp.app/Contents/MacOS/MyApp" (or replace ".app" with
544 ".bundle" or ".plugin" for other types of bundles). So we look for any
545 prior '.' character and try appending the apple dSYM extension and
546 subdirectory and see if we find an existing dSYM file (in the above
547 MyApp example the dSYM would be at either:
548 "/some/path/MyApp.app.dSYM/Contents/Resources/DWARF/MyApp" or
549 "/some/path/MyApp.dSYM/Contents/Resources/DWARF/MyApp". */
550 VG_(strcpy) (dsymfile, VG_(dirname) (executable_name));
551 while ((dot_ptr = VG_(strrchr) (dsymfile, '.')))
552 {
553 /* Find the directory delimiter that follows the '.' character since
554 we now look for a .dSYM that follows any bundle extension. */
555 slash_ptr = VG_(strchr) (dot_ptr, '/');
556 if (slash_ptr)
557 {
558 /* NULL terminate the string at the '/' character and append
559 the path down to the dSYM file. */
560 *slash_ptr = '\0';
561 VG_(strcat) (slash_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
562 VG_(strcat) (slash_ptr, basename_str);
563 if (file_exists_p (dsymfile))
564 return dsymfile;
565 }
566
567 /* NULL terminate the string at the '.' character and append
568 the path down to the dSYM file. */
569 *dot_ptr = '\0';
570 VG_(strcat) (dot_ptr, APPLE_DSYM_EXT_AND_SUBDIRECTORY);
571 VG_(strcat) (dot_ptr, basename_str);
572 if (file_exists_p (dsymfile))
573 return dsymfile;
574
575 /* NULL terminate the string at the '.' locatated by the strrchr()
576 function again. */
577 *dot_ptr = '\0';
578
579 /* We found a previous extension '.' character and did not find a
580 dSYM file so now find previous directory delimiter so we don't
581 try multiple times on a file name that may have a version number
582 in it such as "/some/path/MyApp.6.0.4.app". */
583 slash_ptr = VG_(strrchr) (dsymfile, '/');
584 if (!slash_ptr)
585 break;
586 /* NULL terminate the string at the previous directory character
587 and search again. */
588 *slash_ptr = '\0';
589 }
590 }
591
592 return NULL;
593 }
594
595
getsectdata(UChar * base,SizeT size,Char * segname,Char * sectname,Word * sect_size)596 static UChar *getsectdata(UChar* base, SizeT size,
597 Char *segname, Char *sectname,
598 /*OUT*/Word *sect_size)
599 {
600 struct MACH_HEADER *mh = (struct MACH_HEADER *)base;
601 struct load_command *cmd;
602 Int c;
603
604 for (c = 0, cmd = (struct load_command *)(mh+1);
605 c < mh->ncmds;
606 c++, cmd = (struct load_command *)(cmd->cmdsize + (Addr)cmd))
607 {
608 if (cmd->cmd == LC_SEGMENT_CMD) {
609 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
610 if (0 == VG_(strncmp(seg->segname, segname, sizeof(seg->segname)))) {
611 struct SECTION *sects = (struct SECTION *)(seg+1);
612 Int s;
613 for (s = 0; s < seg->nsects; s++) {
614 if (0 == VG_(strncmp(sects[s].sectname, sectname,
615 sizeof(sects[s].sectname))))
616 {
617 if (sect_size) *sect_size = sects[s].size;
618 return (UChar *)(base + sects[s].offset);
619 }
620 }
621 }
622 }
623 }
624
625 if (sect_size) *sect_size = 0;
626 return 0;
627 }
628
629
630 /* Brute force just simply search for uuid[0..15] in img[0..n_img-1] */
check_uuid_matches(Addr imgA,Word n_img,UChar * uuid)631 static Bool check_uuid_matches ( Addr imgA, Word n_img, UChar* uuid )
632 {
633 Word i;
634 UChar* img = (UChar*)imgA;
635 UChar first = uuid[0];
636 if (n_img < 16)
637 return False;
638 for (i = 0; i < n_img-16; i++) {
639 if (img[i] != first)
640 continue;
641 if (0 == VG_(memcmp)( &img[i], &uuid[0], 16 ))
642 return True;
643 }
644 return False;
645 }
646
647
648 /* Heuristic kludge: return True if this looks like an installed
649 standard library; hence we shouldn't consider automagically running
650 dsymutil on it. */
is_systemish_library_name(UChar * name)651 static Bool is_systemish_library_name ( UChar* name )
652 {
653 vg_assert(name);
654 if (0 == VG_(strncasecmp)(name, "/usr/", 5)
655 || 0 == VG_(strncasecmp)(name, "/bin/", 5)
656 || 0 == VG_(strncasecmp)(name, "/sbin/", 6)
657 || 0 == VG_(strncasecmp)(name, "/opt/", 5)
658 || 0 == VG_(strncasecmp)(name, "/sw/", 4)
659 || 0 == VG_(strncasecmp)(name, "/System/", 8)
660 || 0 == VG_(strncasecmp)(name, "/Library/", 9)
661 || 0 == VG_(strncasecmp)(name, "/Applications/", 14)) {
662 return True;
663 } else {
664 return False;
665 }
666 }
667
668
ML_(read_macho_debug_info)669 Bool ML_(read_macho_debug_info)( struct _DebugInfo* di )
670 {
671 struct symtab_command *symcmd = NULL;
672 struct dysymtab_command *dysymcmd = NULL;
673 HChar* dsymfilename = NULL;
674 Bool have_uuid = False;
675 UChar uuid[16];
676 ImageInfo ii; /* main file */
677 ImageInfo iid; /* auxiliary .dSYM file */
678 Bool ok;
679 Word i;
680 struct _DebugInfoMapping* rx_map = NULL;
681 struct _DebugInfoMapping* rw_map = NULL;
682
683 /* mmap the object file to look for di->soname and di->text_bias
684 and uuid and nlist and STABS */
685
686 /* This should be ensured by our caller (that we're in the accept
687 state). */
688 vg_assert(di->fsm.have_rx_map);
689 vg_assert(di->fsm.have_rw_map);
690
691 for (i = 0; i < VG_(sizeXA)(di->fsm.maps); i++) {
692 struct _DebugInfoMapping* map = VG_(indexXA)(di->fsm.maps, i);
693 if (map->rx && !rx_map)
694 rx_map = map;
695 if (map->rw && !rw_map)
696 rw_map = map;
697 if (rx_map && rw_map)
698 break;
699 }
700 vg_assert(rx_map);
701 vg_assert(rw_map);
702
703 if (VG_(clo_verbosity) > 1)
704 VG_(message)(Vg_DebugMsg,
705 "%s (rx at %#lx, rw at %#lx)\n", di->fsm.filename,
706 rx_map->avma, rw_map->avma );
707
708 VG_(memset)(&ii, 0, sizeof(ii));
709 VG_(memset)(&iid, 0, sizeof(iid));
710 VG_(memset)(&uuid, 0, sizeof(uuid));
711
712 ok = map_image_aboard( di, &ii, di->fsm.filename );
713 if (!ok) goto fail;
714
715 vg_assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
716
717 /* Poke around in the Mach-O header, to find some important
718 stuff. */
719 // Find LC_SYMTAB and LC_DYSYMTAB, if present.
720 // Read di->soname from LC_ID_DYLIB if present,
721 // or from LC_ID_DYLINKER if present,
722 // or use "NONE".
723 // Get di->text_bias (aka slide) based on the corresponding LC_SEGMENT
724 // Get uuid for later dsym search
725
726 di->text_bias = 0;
727
728 { struct MACH_HEADER *mh = (struct MACH_HEADER *)ii.macho_img;
729 struct load_command *cmd;
730 Int c;
731
732 for (c = 0, cmd = (struct load_command *)(mh+1);
733 c < mh->ncmds;
734 c++, cmd = (struct load_command *)(cmd->cmdsize
735 + (unsigned long)cmd)) {
736 if (cmd->cmd == LC_SYMTAB) {
737 symcmd = (struct symtab_command *)cmd;
738 }
739 else if (cmd->cmd == LC_DYSYMTAB) {
740 dysymcmd = (struct dysymtab_command *)cmd;
741 }
742 else if (cmd->cmd == LC_ID_DYLIB && mh->filetype == MH_DYLIB) {
743 // GrP fixme bundle?
744 struct dylib_command *dcmd = (struct dylib_command *)cmd;
745 UChar *dylibname = dcmd->dylib.name.offset + (UChar *)dcmd;
746 UChar *soname = VG_(strrchr)(dylibname, '/');
747 if (!soname) soname = dylibname;
748 else soname++;
749 di->soname = ML_(dinfo_strdup)("di.readmacho.dylibname",
750 soname);
751 }
752 else if (cmd->cmd==LC_ID_DYLINKER && mh->filetype==MH_DYLINKER) {
753 struct dylinker_command *dcmd = (struct dylinker_command *)cmd;
754 UChar *dylinkername = dcmd->name.offset + (UChar *)dcmd;
755 UChar *soname = VG_(strrchr)(dylinkername, '/');
756 if (!soname) soname = dylinkername;
757 else soname++;
758 di->soname = ML_(dinfo_strdup)("di.readmacho.dylinkername",
759 soname);
760 }
761
762 // A comment from Julian about why varinfo[35] fail:
763 //
764 // My impression is, from comparing the output of otool -l for these
765 // executables with the logic in ML_(read_macho_debug_info),
766 // specifically the part that begins "else if (cmd->cmd ==
767 // LC_SEGMENT_CMD) {", that it's a complete hack which just happens
768 // to work ok for text symbols. In particular, it appears to assume
769 // that in a "struct load_command" of type LC_SEGMENT_CMD, the first
770 // "struct SEGMENT_COMMAND" inside it is going to contain the info we
771 // need. However, otool -l shows, and also the Apple docs state,
772 // that a struct load_command may contain an arbitrary number of
773 // struct SEGMENT_COMMANDs, so I'm not sure why it's OK to merely
774 // snarf the first. But I'm not sure about this.
775 //
776 // The "Try for __DATA" block below simply adds acquisition of data
777 // svma/bias values using the same assumption. It also needs
778 // (probably) to deal with bss sections, but I don't understand how
779 // this all ties together really, so it requires further study.
780 //
781 // If you can get your head around the relationship between MachO
782 // segments, sections and load commands, this might be relatively
783 // easy to fix properly.
784 //
785 // Basically we need to come up with plausible numbers for di->
786 // {text,data,bss}_{avma,svma}, from which the _bias numbers are
787 // then trivially derived. Then I think the debuginfo reader should
788 // work pretty well.
789 else if (cmd->cmd == LC_SEGMENT_CMD) {
790 struct SEGMENT_COMMAND *seg = (struct SEGMENT_COMMAND *)cmd;
791 /* Try for __TEXT */
792 if (!di->text_present
793 && 0 == VG_(strcmp)(seg->segname, "__TEXT")
794 /* DDD: is the next line a kludge? -- JRS */
795 && seg->fileoff == 0 && seg->filesize != 0) {
796 di->text_present = True;
797 di->text_svma = (Addr)seg->vmaddr;
798 di->text_avma = rx_map->avma;
799 di->text_size = seg->vmsize;
800 di->text_bias = di->text_avma - di->text_svma;
801 /* Make the _debug_ values be the same as the
802 svma/bias for the primary object, since there is
803 no secondary (debuginfo) object, but nevertheless
804 downstream biasing of Dwarf3 relies on the
805 _debug_ values. */
806 di->text_debug_svma = di->text_svma;
807 di->text_debug_bias = di->text_bias;
808 }
809 /* Try for __DATA */
810 if (!di->data_present
811 && 0 == VG_(strcmp)(seg->segname, "__DATA")
812 /* && DDD:seg->fileoff == 0 */ && seg->filesize != 0) {
813 di->data_present = True;
814 di->data_svma = (Addr)seg->vmaddr;
815 di->data_avma = rw_map->avma;
816 di->data_size = seg->vmsize;
817 di->data_bias = di->data_avma - di->data_svma;
818 di->data_debug_svma = di->data_svma;
819 di->data_debug_bias = di->data_bias;
820 }
821 }
822 else if (cmd->cmd == LC_UUID) {
823 struct uuid_command *uuid_cmd = (struct uuid_command *)cmd;
824 VG_(memcpy)(uuid, uuid_cmd->uuid, sizeof(uuid));
825 have_uuid = True;
826 }
827 }
828 }
829
830 if (!di->soname) {
831 di->soname = ML_(dinfo_strdup)("di.readmacho.noname", "NONE");
832 }
833
834 if (di->trace_symtab) {
835 VG_(printf)("\n");
836 VG_(printf)("SONAME = %s\n", di->soname);
837 VG_(printf)("\n");
838 }
839
840 /* Now we have the base object to hand. Read symbols from it. */
841
842 if (ii.macho_img && ii.macho_img_szB > 0 && symcmd && dysymcmd) {
843
844 /* Read nlist symbol table */
845 struct NLIST *syms;
846 UChar *strs;
847 XArray* /* DiSym */ candSyms = NULL;
848 Word nCandSyms;
849
850 if (ii.macho_img_szB < symcmd->stroff + symcmd->strsize
851 || ii.macho_img_szB < symcmd->symoff + symcmd->nsyms
852 * sizeof(struct NLIST)) {
853 ML_(symerr)(di, False, "Invalid Mach-O file (5 too small).");
854 goto fail;
855 }
856 if (dysymcmd->ilocalsym + dysymcmd->nlocalsym > symcmd->nsyms
857 || dysymcmd->iextdefsym + dysymcmd->nextdefsym > symcmd->nsyms) {
858 ML_(symerr)(di, False, "Invalid Mach-O file (bad symbol table).");
859 goto fail;
860 }
861
862 syms = (struct NLIST *)(ii.macho_img + symcmd->symoff);
863 strs = (UChar *)(ii.macho_img + symcmd->stroff);
864
865 if (VG_(clo_verbosity) > 1)
866 VG_(message)(Vg_DebugMsg,
867 " reading syms from primary file (%d %d)\n",
868 dysymcmd->nextdefsym, dysymcmd->nlocalsym );
869
870 /* Read candidate symbols into 'candSyms', so we can truncate
871 overlapping ends and generally tidy up, before presenting
872 them to ML_(addSym). */
873 candSyms = VG_(newXA)(
874 ML_(dinfo_zalloc), "di.readmacho.candsyms.1",
875 ML_(dinfo_free), sizeof(DiSym)
876 );
877 vg_assert(candSyms);
878
879 // extern symbols
880 read_symtab(candSyms,
881 di,
882 syms + dysymcmd->iextdefsym, dysymcmd->nextdefsym,
883 strs, symcmd->strsize);
884 // static and private_extern symbols
885 read_symtab(candSyms,
886 di,
887 syms + dysymcmd->ilocalsym, dysymcmd->nlocalsym,
888 strs, symcmd->strsize);
889
890 /* tidy up the cand syms -- trim overlapping ends. May resize
891 candSyms. */
892 tidy_up_cand_syms( candSyms, di->trace_symtab );
893
894 /* and finally present them to ML_(addSym) */
895 nCandSyms = VG_(sizeXA)( candSyms );
896 for (i = 0; i < nCandSyms; i++) {
897 DiSym* cand = (DiSym*) VG_(indexXA)( candSyms, i );
898 vg_assert(cand->pri_name != NULL);
899 vg_assert(cand->sec_names == NULL);
900 if (di->trace_symtab)
901 VG_(printf)("nlist final: acquire avma %010lx-%010lx %s\n",
902 cand->addr, cand->addr + cand->size - 1,
903 cand->pri_name );
904 ML_(addSym)( di, cand );
905 }
906 VG_(deleteXA)( candSyms );
907 }
908
909 /* If there's no UUID in the primary, don't even bother to try and
910 read any DWARF, since we won't be able to verify it matches.
911 Our policy is not to load debug info unless we can verify that
912 it matches the primary. Just declare success at this point.
913 And don't complain to the user, since that would cause us to
914 complain on objects compiled without -g. (Some versions of
915 XCode are observed to omit a UUID entry for object linked(?)
916 without -g. Others don't appear to omit it.) */
917 if (!have_uuid)
918 goto success;
919
920 /* mmap the dSYM file to look for DWARF debug info. If successful,
921 use the .macho_img and .macho_img_szB in iid. */
922
923 dsymfilename = find_separate_debug_file( di->fsm.filename );
924
925 /* Try to load it. */
926 if (dsymfilename) {
927 Bool valid;
928
929 if (VG_(clo_verbosity) > 1)
930 VG_(message)(Vg_DebugMsg, " dSYM= %s\n", dsymfilename);
931
932 ok = map_image_aboard( di, &iid, dsymfilename );
933 if (!ok) goto fail;
934
935 /* check it has the right uuid. */
936 vg_assert(have_uuid);
937 valid = iid.macho_img && iid.macho_img_szB > 0
938 && check_uuid_matches( (Addr)iid.macho_img,
939 iid.macho_img_szB, uuid );
940 if (valid)
941 goto read_the_dwarf;
942
943 if (VG_(clo_verbosity) > 1)
944 VG_(message)(Vg_DebugMsg, " dSYM does not have "
945 "correct UUID (out of date?)\n");
946 }
947
948 /* There was no dsym file, or it doesn't match. We'll have to try
949 regenerating it, unless --dsymutil=no, in which case just complain
950 instead. */
951
952 /* If this looks like a lib that we shouldn't run dsymutil on, just
953 give up. (possible reasons: is system lib, or in /usr etc, or
954 the dsym dir would not be writable by the user, or we're running
955 as root) */
956 vg_assert(di->fsm.filename);
957 if (is_systemish_library_name(di->fsm.filename))
958 goto success;
959
960 if (!VG_(clo_dsymutil)) {
961 if (VG_(clo_verbosity) == 1) {
962 VG_(message)(Vg_DebugMsg, "%s:\n", di->fsm.filename);
963 }
964 if (VG_(clo_verbosity) > 0)
965 VG_(message)(Vg_DebugMsg, "%sdSYM directory %s; consider using "
966 "--dsymutil=yes\n",
967 VG_(clo_verbosity) > 1 ? " " : "",
968 dsymfilename ? "has wrong UUID" : "is missing");
969 goto success;
970 }
971
972 /* Run dsymutil */
973
974 { Int r;
975 HChar* dsymutil = "/usr/bin/dsymutil ";
976 HChar* cmd = ML_(dinfo_zalloc)( "di.readmacho.tmp1",
977 VG_(strlen)(dsymutil)
978 + VG_(strlen)(di->fsm.filename)
979 + 32 /* misc */ );
980 VG_(strcpy)(cmd, dsymutil);
981 if (0) VG_(strcat)(cmd, "--verbose ");
982 VG_(strcat)(cmd, "\"");
983 VG_(strcat)(cmd, di->fsm.filename);
984 VG_(strcat)(cmd, "\"");
985 VG_(message)(Vg_DebugMsg, "run: %s\n", cmd);
986 r = VG_(system)( cmd );
987 if (r)
988 VG_(message)(Vg_DebugMsg, "run: %s FAILED\n", dsymutil);
989 ML_(dinfo_free)(cmd);
990 dsymfilename = find_separate_debug_file(di->fsm.filename);
991 }
992
993 /* Try again to load it. */
994 if (dsymfilename) {
995 Bool valid;
996
997 if (VG_(clo_verbosity) > 1)
998 VG_(message)(Vg_DebugMsg, " dsyms= %s\n", dsymfilename);
999
1000 ok = map_image_aboard( di, &iid, dsymfilename );
1001 if (!ok) goto fail;
1002
1003 /* check it has the right uuid. */
1004 vg_assert(have_uuid);
1005 valid = iid.macho_img && iid.macho_img_szB > 0
1006 && check_uuid_matches( (Addr)iid.macho_img,
1007 iid.macho_img_szB, uuid );
1008 if (!valid) {
1009 if (VG_(clo_verbosity) > 0) {
1010 VG_(message)(Vg_DebugMsg,
1011 "WARNING: did not find expected UUID %02X%02X%02X%02X"
1012 "-%02X%02X-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X"
1013 " in dSYM dir\n",
1014 (UInt)uuid[0], (UInt)uuid[1], (UInt)uuid[2], (UInt)uuid[3],
1015 (UInt)uuid[4], (UInt)uuid[5], (UInt)uuid[6], (UInt)uuid[7],
1016 (UInt)uuid[8], (UInt)uuid[9], (UInt)uuid[10],
1017 (UInt)uuid[11], (UInt)uuid[12], (UInt)uuid[13],
1018 (UInt)uuid[14], (UInt)uuid[15] );
1019 VG_(message)(Vg_DebugMsg,
1020 "WARNING: for %s\n", di->fsm.filename);
1021 }
1022 unmap_image( &iid );
1023 /* unmap_image zeroes the fields, so the following test makes
1024 sense. */
1025 goto fail;
1026 }
1027 }
1028
1029 /* Right. Finally we have our best try at the dwarf image, so go
1030 on to reading stuff out of it. */
1031
1032 read_the_dwarf:
1033 if (iid.macho_img && iid.macho_img_szB > 0) {
1034 UChar* debug_info_img = NULL;
1035 Word debug_info_sz;
1036 UChar* debug_abbv_img;
1037 Word debug_abbv_sz;
1038 UChar* debug_line_img;
1039 Word debug_line_sz;
1040 UChar* debug_str_img;
1041 Word debug_str_sz;
1042 UChar* debug_ranges_img;
1043 Word debug_ranges_sz;
1044 UChar* debug_loc_img;
1045 Word debug_loc_sz;
1046 UChar* debug_name_img;
1047 Word debug_name_sz;
1048
1049 debug_info_img =
1050 getsectdata(iid.macho_img, iid.macho_img_szB,
1051 "__DWARF", "__debug_info", &debug_info_sz);
1052 debug_abbv_img =
1053 getsectdata(iid.macho_img, iid.macho_img_szB,
1054 "__DWARF", "__debug_abbrev", &debug_abbv_sz);
1055 debug_line_img =
1056 getsectdata(iid.macho_img, iid.macho_img_szB,
1057 "__DWARF", "__debug_line", &debug_line_sz);
1058 debug_str_img =
1059 getsectdata(iid.macho_img, iid.macho_img_szB,
1060 "__DWARF", "__debug_str", &debug_str_sz);
1061 debug_ranges_img =
1062 getsectdata(iid.macho_img, iid.macho_img_szB,
1063 "__DWARF", "__debug_ranges", &debug_ranges_sz);
1064 debug_loc_img =
1065 getsectdata(iid.macho_img, iid.macho_img_szB,
1066 "__DWARF", "__debug_loc", &debug_loc_sz);
1067 debug_name_img =
1068 getsectdata(iid.macho_img, iid.macho_img_szB,
1069 "__DWARF", "__debug_pubnames", &debug_name_sz);
1070
1071 if (debug_info_img) {
1072 if (VG_(clo_verbosity) > 1) {
1073 if (0)
1074 VG_(message)(Vg_DebugMsg,
1075 "Reading dwarf3 for %s (%#lx) from %s"
1076 " (%ld %ld %ld %ld %ld %ld)\n",
1077 di->fsm.filename, di->text_avma, dsymfilename,
1078 debug_info_sz, debug_abbv_sz, debug_line_sz,
1079 debug_str_sz, debug_ranges_sz, debug_loc_sz
1080 );
1081 VG_(message)(Vg_DebugMsg,
1082 " reading dwarf3 from dsyms file\n");
1083 }
1084 /* The old reader: line numbers and unwind info only */
1085 ML_(read_debuginfo_dwarf3) ( di,
1086 debug_info_img, debug_info_sz,
1087 NULL, 0,
1088 debug_abbv_img, debug_abbv_sz,
1089 debug_line_img, debug_line_sz,
1090 debug_str_img, debug_str_sz,
1091 NULL, 0 /* ALT .debug_str */ );
1092
1093 /* The new reader: read the DIEs in .debug_info to acquire
1094 information on variable types and locations. But only if
1095 the tool asks for it, or the user requests it on the
1096 command line. */
1097 if (VG_(needs).var_info /* the tool requires it */
1098 || VG_(clo_read_var_info) /* the user asked for it */) {
1099 ML_(new_dwarf3_reader)(
1100 di, debug_info_img, debug_info_sz,
1101 NULL, 0,
1102 debug_abbv_img, debug_abbv_sz,
1103 debug_line_img, debug_line_sz,
1104 debug_str_img, debug_str_sz,
1105 debug_ranges_img, debug_ranges_sz,
1106 debug_loc_img, debug_loc_sz,
1107 NULL, 0, /* ALT .debug_info */
1108 NULL, 0, /* ALT .debug_abbv */
1109 NULL, 0, /* ALT .debug_line */
1110 NULL, 0 /* ALT .debug_str */
1111 );
1112 }
1113 }
1114 }
1115
1116 if (dsymfilename) ML_(dinfo_free)(dsymfilename);
1117
1118 success:
1119 if (ii.img)
1120 unmap_image(&ii);
1121 if (iid.img)
1122 unmap_image(&iid);
1123 return True;
1124
1125 /* NOTREACHED */
1126
1127 fail:
1128 ML_(symerr)(di, True, "Error reading Mach-O object.");
1129 if (ii.img)
1130 unmap_image(&ii);
1131 if (iid.img)
1132 unmap_image(&iid);
1133 return False;
1134 }
1135
1136 #endif // defined(VGO_darwin)
1137
1138 /*--------------------------------------------------------------------*/
1139 /*--- end ---*/
1140 /*--------------------------------------------------------------------*/
1141