• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3    GPL 2+ therefore.
4 
5    Can be compiled as either a 32- or 64-bit program (doesn't matter).
6 */
7 
8 /* What does this program do?  In short it postprocesses tool
9    executables on MacOSX, after linking using /usr/bin/ld.
10 
11    This is to deal with two separate and entirely unrelated problems.
12    Problem (1) is a bug in the linker in Xcode 4.0.0.  Problem (2) is
13    much newer and concerns linking 64-bit tool executables for
14    Yosemite (10.10).
15 
16    --- Problem (1) ------------------------------------------------
17 
18    This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1.  Xcode
19    versions prior to 4.0.0 are unaffected.
20 
21    The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
22 
23    The bug causes 64-bit tool executables to segfault at startup,
24    because:
25 
26    Comparing the MachO load commands vs a (working) tool executable
27    that was created by Xcode 3.2.x, it appears that the new linker has
28    partially ignored the build system's request to place the tool
29    executable's stack at a non standard location.  The build system
30    tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
31 
32    With the Xcode 3.2 linker those flags produce two results:
33 
34    (1) A load command to allocate the stack at the said location:
35           Load command 3
36                 cmd LC_SEGMENT_64
37             cmdsize 72
38             segname __UNIXSTACK
39              vmaddr 0x0000000133800000
40              vmsize 0x0000000000800000
41             fileoff 2285568
42            filesize 0
43             maxprot 0x00000007
44            initprot 0x00000003
45              nsects 0
46               flags 0x0
47 
48    (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
49        at process startup, 0x134000000.
50 
51    With Xcode 4.0.1, (1) is missing but (2) is still present.  The
52    tool executable therefore starts up with %rsp pointing to unmapped
53    memory and faults almost instantly.
54 
55    The workaround implemented by this program is documented in comment
56    8 of bug 267997, viz:
57 
58    One really sick workaround is to observe that the executables
59    contain a redundant MachO load command:
60 
61       Load command 2
62             cmd LC_SEGMENT_64
63         cmdsize 72
64         segname __LINKEDIT
65          vmaddr 0x0000000138dea000
66          vmsize 0x00000000000ad000
67         fileoff 2658304
68        filesize 705632
69         maxprot 0x00000007
70        initprot 0x00000001
71          nsects 0
72           flags 0x0
73 
74    The described section presumably contains information intended for
75    the dynamic linker, but is irrelevant because this is a statically
76    linked executable.  Hence it might be possible to postprocess the
77    executables after linking, to overwrite this entry with the
78    information that would have been in the missing __UNIXSTACK entry.
79    I tried this by hand (with a binary editor) earlier and got
80    something that worked.
81 
82    --- Problem (2) ------------------------------------------------
83 
84    On MacOSX 10.10 (Yosemite), the kernel requires all valid
85    executables to have a __PAGEZERO section with SVMA of zero and size
86    of at least one page.  However, our tool executables have a
87    __PAGEZERO section with SVMA set to the requested Valgrind load
88    address (typically 0x1'3800'0000).  And the kernel won't start
89    those.  So we take the opportunity to "fix" this by setting the
90    SVMA to zero.  Seems to work and have no obvious bad side effects.
91 */
92 
93 #define DEBUGPRINTING 0
94 
95 #include <assert.h>
96 #include <stdlib.h>
97 #include <stdio.h>
98 #include <string.h>
99 #include <sys/mman.h>
100 #include <sys/stat.h>
101 #include <unistd.h>
102 #include <fcntl.h>
103 
104 #undef PLAT_x86_darwin
105 #undef PLAT_amd64_darwin
106 
107 #if defined(__APPLE__) && defined(__i386__)
108 #  define PLAT_x86_darwin 1
109 #elif defined(__APPLE__) && defined(__x86_64__)
110 #  define PLAT_amd64_darwin 1
111 #else
112 #  error "Can't be compiled on this platform"
113 #endif
114 
115 #include <mach-o/loader.h>
116 #include <mach-o/nlist.h>
117 #include <mach-o/fat.h>
118 #include <mach/i386/thread_status.h>
119 
120 /* Get hold of DARWIN_VERS, and check it has a sane value. */
121 #include "config.h"
122 #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
123     && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
124     && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \
125     && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12
126 #  error "Unknown DARWIN_VERS value.  This file only compiles on Darwin."
127 #endif
128 
129 
130 typedef  unsigned char   UChar;
131 typedef    signed char   Char;
132 typedef           char   HChar; /* signfulness depends on host */
133 
134 typedef  unsigned int    UInt;
135 typedef    signed int    Int;
136 
137 typedef  unsigned char   Bool;
138 #define  True   ((Bool)1)
139 #define  False  ((Bool)0)
140 
141 typedef  unsigned long   UWord;
142 
143 typedef  UWord           SizeT;
144 typedef  UWord           Addr;
145 
146 typedef  unsigned long long int   ULong;
147 typedef    signed long long int   Long;
148 
149 
150 
151 __attribute__((noreturn))
fail(HChar * msg)152 void fail ( HChar* msg )
153 {
154    fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
155    exit(1);
156 }
157 
158 
159 /*------------------------------------------------------------*/
160 /*---                                                      ---*/
161 /*--- Mach-O file mapping/unmapping helpers                ---*/
162 /*---                                                      ---*/
163 /*------------------------------------------------------------*/
164 
165 typedef
166    struct {
167       /* These two describe the entire mapped-in ("primary") image,
168          fat headers, kitchen sink, whatnot: the entire file.  The
169          image is mapped into img[0 .. img_szB-1]. */
170       UChar* img;
171       SizeT  img_szB;
172       /* These two describe the Mach-O object of interest, which is
173          presumably somewhere inside the primary image.
174          map_image_aboard() below, which generates this info, will
175          carefully check that the macho_ fields denote a section of
176          memory that falls entirely inside img[0 .. img_szB-1]. */
177       UChar* macho_img;
178       SizeT  macho_img_szB;
179    }
180    ImageInfo;
181 
182 
is_macho_object_file(const void * buf,SizeT szB)183 Bool is_macho_object_file( const void* buf, SizeT szB )
184 {
185    /* (JRS: the Mach-O headers might not be in this mapped data,
186       because we only mapped a page for this initial check,
187       or at least not very much, and what's at the start of the file
188       is in general a so-called fat header.  The Mach-O object we're
189       interested in could be arbitrarily far along the image, and so
190       we can't assume its header will fall within this page.) */
191 
192    /* But we can say that either it's a fat object, in which case it
193       begins with a fat header, or it's unadorned Mach-O, in which
194       case it starts with a normal header.  At least do what checks we
195       can to establish whether or not we're looking at something
196       sane. */
197 
198    const struct fat_header*  fh_be = buf;
199    const struct mach_header_64* mh    = buf;
200 
201    assert(buf);
202    if (szB < sizeof(struct fat_header))
203       return False;
204    if (ntohl(fh_be->magic) == FAT_MAGIC)
205       return True;
206 
207    if (szB < sizeof(struct mach_header_64))
208       return False;
209    if (mh->magic == MH_MAGIC_64)
210       return True;
211 
212    return False;
213 }
214 
215 
216 /* Unmap an image mapped in by map_image_aboard. */
unmap_image(ImageInfo * ii)217 static void unmap_image ( /*MOD*/ImageInfo* ii )
218 {
219    Int r;
220    assert(ii->img);
221    assert(ii->img_szB > 0);
222    r = munmap( ii->img, ii->img_szB );
223    /* Do we care if this fails?  I suppose so; it would indicate
224       some fairly serious snafu with the mapping of the file. */
225    assert( !r );
226    memset(ii, 0, sizeof(*ii));
227 }
228 
229 
230 /* Map a given fat or thin object aboard, find the thin part if
231    necessary, do some checks, and write details of both the fat and
232    thin parts into *ii.  Returns 32 (and leaves the file unmapped) if
233    the thin part is a 32 bit file.  Returns 64 if it's a 64 bit file.
234    Does not return on failure.  Guarantees to return pointers to a
235    valid(ish) Mach-O image if it succeeds. */
map_image_aboard(ImageInfo * ii,HChar * filename)236 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
237 {
238    memset(ii, 0, sizeof(*ii));
239 
240    /* First off, try to map the thing in. */
241    { SizeT  size;
242      Int r, fd;
243      struct stat stat_buf;
244 
245      r = stat(filename, &stat_buf);
246      if (r)
247         fail("Can't stat image (to determine its size)?!");
248      size = stat_buf.st_size;
249 
250      fd = open(filename, O_RDWR, 0);
251      if (fd == -1)
252         fail("Can't open image for possible modification!");
253      if (DEBUGPRINTING)
254         printf("size %lu fd %d\n", size, fd);
255      void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
256                                   MAP_FILE|MAP_SHARED, fd, 0 );
257      if (v == MAP_FAILED) {
258         perror("mmap failed");
259         fail("Can't mmap image for possible modification!");
260      }
261 
262      close(fd);
263 
264      ii->img     = (UChar*)v;
265      ii->img_szB = size;
266    }
267 
268    /* Now it's mapped in and we have .img and .img_szB set.  Look for
269       the embedded Mach-O object.  If not findable, unmap and fail. */
270    { struct fat_header*  fh_be;
271      struct fat_header   fh;
272      struct mach_header_64* mh;
273 
274      // Assume initially that we have a thin image, and update
275      // these if it turns out to be fat.
276      ii->macho_img     = ii->img;
277      ii->macho_img_szB = ii->img_szB;
278 
279      // Check for fat header.
280      if (ii->img_szB < sizeof(struct fat_header))
281         fail("Invalid Mach-O file (0 too small).");
282 
283      // Fat header is always BIG-ENDIAN
284      fh_be = (struct fat_header *)ii->img;
285      fh.magic = ntohl(fh_be->magic);
286      fh.nfat_arch = ntohl(fh_be->nfat_arch);
287      if (fh.magic == FAT_MAGIC) {
288         // Look for a good architecture.
289         struct fat_arch *arch_be;
290         struct fat_arch arch;
291         Int f;
292         if (ii->img_szB < sizeof(struct fat_header)
293                           + fh.nfat_arch * sizeof(struct fat_arch))
294            fail("Invalid Mach-O file (1 too small).");
295 
296         for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
297              f < fh.nfat_arch;
298              f++, arch_be++) {
299            Int cputype;
300 #          if defined(PLAT_x86_darwin)
301            cputype = CPU_TYPE_X86;
302 #          elif defined(PLAT_amd64_darwin)
303            cputype = CPU_TYPE_X86_64;
304 #          else
305 #            error "unknown architecture"
306 #          endif
307            arch.cputype    = ntohl(arch_be->cputype);
308            arch.cpusubtype = ntohl(arch_be->cpusubtype);
309            arch.offset     = ntohl(arch_be->offset);
310            arch.size       = ntohl(arch_be->size);
311            if (arch.cputype == cputype) {
312               if (ii->img_szB < arch.offset + arch.size)
313                  fail("Invalid Mach-O file (2 too small).");
314               ii->macho_img     = ii->img + arch.offset;
315               ii->macho_img_szB = arch.size;
316               break;
317            }
318         }
319         if (f == fh.nfat_arch)
320            fail("No acceptable architecture found in fat file.");
321      }
322 
323      /* Sanity check what we found. */
324 
325      /* assured by logic above */
326      assert(ii->img_szB >= sizeof(struct fat_header));
327 
328      if (ii->macho_img_szB < sizeof(struct mach_header_64))
329         fail("Invalid Mach-O file (3 too small).");
330 
331      if (ii->macho_img_szB > ii->img_szB)
332         fail("Invalid Mach-O file (thin bigger than fat).");
333 
334      if (ii->macho_img >= ii->img
335          && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
336         /* thin entirely within fat, as expected */
337      } else {
338         fail("Invalid Mach-O file (thin not inside fat).");
339      }
340 
341      mh = (struct mach_header_64 *)ii->macho_img;
342      if (mh->magic == MH_MAGIC) {
343         assert(ii->img);
344         assert(ii->macho_img);
345         assert(ii->img_szB > 0);
346         assert(ii->macho_img_szB > 0);
347         assert(ii->macho_img >= ii->img);
348         assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
349         return 32;
350      }
351      if (mh->magic != MH_MAGIC_64)
352         fail("Invalid Mach-O file (bad magic).");
353 
354      if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
355         fail("Invalid Mach-O file (4 too small).");
356    }
357 
358    assert(ii->img);
359    assert(ii->macho_img);
360    assert(ii->img_szB > 0);
361    assert(ii->macho_img_szB > 0);
362    assert(ii->macho_img >= ii->img);
363    assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
364    return 64;
365 }
366 
367 
368 /*------------------------------------------------------------*/
369 /*---                                                      ---*/
370 /*--- Mach-O top-level processing                          ---*/
371 /*---                                                      ---*/
372 /*------------------------------------------------------------*/
373 
modify_macho_loadcmds(HChar * filename,ULong expected_stack_start,ULong expected_stack_size)374 void modify_macho_loadcmds ( HChar* filename,
375                              ULong  expected_stack_start,
376                              ULong  expected_stack_size )
377 {
378    ImageInfo ii;
379    memset(&ii, 0, sizeof(ii));
380 
381    Int size = map_image_aboard( &ii, filename );
382    if (size == 32) {
383       fprintf(stderr, "fixup_macho_loadcmds:   Is 32-bit MachO file;"
384               " no modifications needed.\n");
385       goto out;
386    }
387 
388    assert(size == 64);
389 
390    assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
391 
392    /* Poke around in the Mach-O header, to find some important
393       stuff.
394       * the location of the __UNIXSTACK load command, if any
395       * the location of the __LINKEDIT load command, if any
396       * the initial RSP value as stated in the LC_UNIXTHREAD
397    */
398 
399    /* The collected data */
400    ULong init_rsp = 0;
401    Bool  have_rsp = False;
402    struct segment_command_64* seg__unixstack = NULL;
403    struct segment_command_64* seg__linkedit  = NULL;
404    struct segment_command_64* seg__pagezero  = NULL;
405 
406    /* Loop over the load commands and fill in the above 4 variables. */
407 
408    { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
409       struct load_command *cmd;
410       Int c;
411 
412       for (c = 0, cmd = (struct load_command *)(mh+1);
413            c < mh->ncmds;
414            c++, cmd = (struct load_command *)(cmd->cmdsize
415                                               + (unsigned long)cmd)) {
416          if (DEBUGPRINTING)
417             printf("load cmd: offset %4lu   size %3d   kind %2d = ",
418                    (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
419                    cmd->cmdsize, cmd->cmd);
420 
421          switch (cmd->cmd) {
422             case LC_SEGMENT_64:
423                if (DEBUGPRINTING)
424                   printf("LC_SEGMENT_64");
425                break;
426             case LC_SYMTAB:
427                if (DEBUGPRINTING)
428                   printf("LC_SYMTAB");
429                break;
430             case LC_DYSYMTAB:
431                if (DEBUGPRINTING)
432                   printf("LC_DYSYMTAB");
433                break;
434             case LC_UUID:
435                if (DEBUGPRINTING)
436                   printf("LC_UUID");
437                break;
438             case LC_UNIXTHREAD:
439                if (DEBUGPRINTING)
440                   printf("LC_UNIXTHREAD");
441                break;
442             default:
443                if (DEBUGPRINTING)
444                   printf("???");
445                fail("unexpected load command in Mach header");
446             break;
447          }
448          if (DEBUGPRINTING)
449             printf("\n");
450 
451          /* Note what the stated initial RSP value is, so we can
452             check it is as expected. */
453          if (cmd->cmd == LC_UNIXTHREAD) {
454             struct thread_command* tcmd = (struct thread_command*)cmd;
455             UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
456             if (DEBUGPRINTING)
457                printf("UnixThread: flavor %u = ", w32s[0]);
458             if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
459                if (DEBUGPRINTING)
460                   printf("x86_THREAD_STATE64\n");
461                x86_thread_state64_t* state64
462                   = (x86_thread_state64_t*)(&w32s[2]);
463                have_rsp = True;
464                init_rsp = state64->__rsp;
465                if (DEBUGPRINTING)
466                   printf("rsp = 0x%llx\n", init_rsp);
467             } else {
468                if (DEBUGPRINTING)
469                   printf("???");
470             }
471             if (DEBUGPRINTING)
472                printf("\n");
473          }
474 
475          if (cmd->cmd == LC_SEGMENT_64) {
476             struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
477             if (0 == strcmp(seg->segname, "__LINKEDIT"))
478                seg__linkedit = seg;
479             if (0 == strcmp(seg->segname, "__UNIXSTACK"))
480                seg__unixstack = seg;
481             if (0 == strcmp(seg->segname, "__PAGEZERO"))
482                seg__pagezero = seg;
483          }
484 
485       }
486    }
487 
488    /*
489       Actions are then as follows:
490 
491       * (always) check the RSP value is as expected, and abort if not
492 
493       * if there's a UNIXSTACK load command, check it is as expected.
494         If not abort, if yes, do nothing more.
495 
496       * (so there's no UNIXSTACK load command).  if there's a LINKEDIT
497         load command, check if it is minimally usable (has 0 for
498         nsects and flags).  If yes, convert it to a UNIXSTACK load
499         command.  If there is none, or is unusable, then we're out of
500         options and have to abort.
501    */
502    if (!have_rsp)
503       fail("Can't find / check initial RSP setting");
504    if (init_rsp != expected_stack_start + expected_stack_size)
505       fail("Initial RSP value not as expected");
506 
507    fprintf(stderr, "fixup_macho_loadcmds:   "
508                    "initial RSP is as expected (0x%llx)\n",
509                    expected_stack_start + expected_stack_size );
510 
511    if (seg__unixstack) {
512       struct segment_command_64 *seg = seg__unixstack;
513       if (seg->vmaddr != expected_stack_start)
514          fail("has __UNIXSTACK, but wrong ::vmaddr");
515       if (seg->vmsize != expected_stack_size)
516          fail("has __UNIXSTACK, but wrong ::vmsize");
517       if (seg->maxprot != 7)
518          fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
519       if (seg->initprot != 3)
520          fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
521       if (seg->nsects != 0)
522          fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
523       if (seg->flags != 0)
524          fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
525       /* looks ok */
526       fprintf(stderr, "fixup_macho_loadcmds:   "
527               "acceptable __UNIXSTACK present; no modifications.\n" );
528       goto maybe_mash_pagezero;
529    }
530 
531    if (seg__linkedit) {
532       struct segment_command_64 *seg = seg__linkedit;
533       if (seg->nsects != 0)
534          fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
535       if (seg->flags != 0)
536          fail("has __LINKEDIT, but wrong ::flags (should be 0)");
537       fprintf(stderr, "fixup_macho_loadcmds:   "
538               "no __UNIXSTACK present.\n" );
539       fprintf(stderr, "fixup_macho_loadcmds:   "
540               "converting __LINKEDIT to __UNIXSTACK.\n" );
541       strcpy(seg->segname, "__UNIXSTACK");
542       seg->vmaddr   = expected_stack_start;
543       seg->vmsize   = expected_stack_size;
544       seg->fileoff  = 0;
545       seg->filesize = 0;
546       seg->maxprot  = 7;
547       seg->initprot = 3;
548       /* success */
549       goto maybe_mash_pagezero;
550    }
551 
552    /* out of options */
553    fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
554         "out of options.");
555    /* NOTREACHED */
556 
557   maybe_mash_pagezero:
558    /* Deal with Problem (2) as documented above. */
559 #  if DARWIN_VERS >= DARWIN_10_10
560    assert(size == 64);
561    if (!seg__pagezero) {
562       fail("Can't find __PAGEZERO to modify; can't continue.");
563    }
564    fprintf(stderr, "fixup_macho_loadcmds:   "
565            "changing __PAGEZERO.vmaddr from %p to 0x0.\n",
566            (void*)seg__pagezero->vmaddr);
567    seg__pagezero->vmaddr = 0;
568 #  endif
569 
570   out:
571    if (ii.img)
572       unmap_image(&ii);
573 }
574 
575 
is_plausible_tool_exe_name(HChar * nm)576 static Bool is_plausible_tool_exe_name ( HChar* nm )
577 {
578    HChar* p;
579    if (!nm)
580       return False;
581 
582    // Does it end with this string?
583    p = strstr(nm, "-x86-darwin");
584    if (p && 0 == strcmp(p, "-x86-darwin"))
585       return True;
586 
587    p = strstr(nm, "-amd64-darwin");
588    if (p && 0 == strcmp(p, "-amd64-darwin"))
589       return True;
590 
591    return False;
592 }
593 
594 
main(int argc,char ** argv)595 int main ( int argc, char** argv )
596 {
597    Int   r;
598    ULong req_stack_addr = 0;
599    ULong req_stack_size = 0;
600 
601    if (argc != 4)
602       fail("args: -stack_addr-arg -stack_size-arg "
603            "name-of-tool-executable-to-modify");
604 
605    r= sscanf(argv[1], "0x%llx", &req_stack_addr);
606    if (r != 1) fail("invalid stack_addr arg");
607 
608    r= sscanf(argv[2], "0x%llx", &req_stack_size);
609    if (r != 1) fail("invalid stack_size arg");
610 
611    fprintf(stderr, "fixup_macho_loadcmds: "
612            "requested stack_addr (top) 0x%llx, "
613            "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
614 
615    if (!is_plausible_tool_exe_name(argv[3]))
616       fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
617 
618    fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
619            argv[3] );
620    modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
621                           req_stack_size );
622 
623    return 0;
624 }
625 
626 /*
627       cmd LC_SEGMENT_64
628   cmdsize 72
629   segname __LINKEDIT
630    vmaddr 0x0000000138dea000
631    vmsize 0x00000000000ad000
632   fileoff 2658304
633  filesize 705632
634   maxprot 0x00000007
635  initprot 0x00000001
636    nsects 0
637     flags 0x0
638 */
639 
640 /*
641       cmd LC_SEGMENT_64
642   cmdsize 72
643   segname __UNIXSTACK
644    vmaddr 0x0000000133800000
645    vmsize 0x0000000000800000
646   fileoff 2498560
647  filesize 0
648   maxprot 0x00000007
649  initprot 0x00000003
650    nsects 0
651     flags 0x0
652 */
653