• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3    GPL 2+ therefore.
4 
5    Can be compiled as either a 32- or 64-bit program (doesn't matter).
6 */
7 
8 /* What does this program do?  In short it postprocesses tool
9    executables on MacOSX, after linking using /usr/bin/ld.  This is so
10    as to work around a bug in the linker on Xcode 4.0.0 and Xcode
11    4.0.1.  Xcode versions prior to 4.0.0 are unaffected.
12 
13    The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
14 
15    The bug causes 64-bit tool executables to segfault at startup,
16    because:
17 
18    Comparing the MachO load commands vs a (working) tool executable
19    that was created by Xcode 3.2.x, it appears that the new linker has
20    partially ignored the build system's request to place the tool
21    executable's stack at a non standard location.  The build system
22    tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
23 
24    With the Xcode 3.2 linker those flags produce two results:
25 
26    (1) A load command to allocate the stack at the said location:
27           Load command 3
28                 cmd LC_SEGMENT_64
29             cmdsize 72
30             segname __UNIXSTACK
31              vmaddr 0x0000000133800000
32              vmsize 0x0000000000800000
33             fileoff 2285568
34            filesize 0
35             maxprot 0x00000007
36            initprot 0x00000003
37              nsects 0
38               flags 0x0
39 
40    (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
41        at process startup, 0x134000000.
42 
43    With Xcode 4.0.1, (1) is missing but (2) is still present.  The
44    tool executable therefore starts up with %rsp pointing to unmapped
45    memory and faults almost instantly.
46 
47    The workaround implemented by this program is documented in comment
48    8 of bug 267997, viz:
49 
50    One really sick workaround is to observe that the executables
51    contain a redundant MachO load command:
52 
53       Load command 2
54             cmd LC_SEGMENT_64
55         cmdsize 72
56         segname __LINKEDIT
57          vmaddr 0x0000000138dea000
58          vmsize 0x00000000000ad000
59         fileoff 2658304
60        filesize 705632
61         maxprot 0x00000007
62        initprot 0x00000001
63          nsects 0
64           flags 0x0
65 
66    The described section presumably contains information intended for
67    the dynamic linker, but is irrelevant because this is a statically
68    linked executable.  Hence it might be possible to postprocess the
69    executables after linking, to overwrite this entry with the
70    information that would have been in the missing __UNIXSTACK entry.
71    I tried this by hand (with a binary editor) earlier and got
72    something that worked.
73 */
74 
75 #define DEBUGPRINTING 0
76 
77 #include <assert.h>
78 #include <stdlib.h>
79 #include <stdio.h>
80 #include <string.h>
81 #include <sys/mman.h>
82 #include <sys/stat.h>
83 #include <unistd.h>
84 #include <fcntl.h>
85 
86 
87 #undef PLAT_x86_darwin
88 #undef PLAT_amd64_darwin
89 
90 #if defined(__APPLE__) && defined(__i386__)
91 #  define PLAT_x86_darwin 1
92 #elif defined(__APPLE__) && defined(__x86_64__)
93 #  define PLAT_amd64_darwin 1
94 #else
95 #  error "Can't be compiled on this platform"
96 #endif
97 
98 #include <mach-o/loader.h>
99 #include <mach-o/nlist.h>
100 #include <mach-o/fat.h>
101 #include <mach/i386/thread_status.h>
102 
103 
104 typedef  unsigned char   UChar;
105 typedef    signed char   Char;
106 typedef           char   HChar; /* signfulness depends on host */
107 
108 typedef  unsigned int    UInt;
109 typedef    signed int    Int;
110 
111 typedef  unsigned char   Bool;
112 #define  True   ((Bool)1)
113 #define  False  ((Bool)0)
114 
115 typedef  unsigned long   UWord;
116 
117 typedef  UWord           SizeT;
118 typedef  UWord           Addr;
119 
120 typedef  unsigned long long int   ULong;
121 typedef    signed long long int   Long;
122 
123 
124 
125 __attribute__((noreturn))
fail(HChar * msg)126 void fail ( HChar* msg )
127 {
128    fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
129    exit(1);
130 }
131 
132 
133 /*------------------------------------------------------------*/
134 /*---                                                      ---*/
135 /*--- Mach-O file mapping/unmapping helpers                ---*/
136 /*---                                                      ---*/
137 /*------------------------------------------------------------*/
138 
139 typedef
140    struct {
141       /* These two describe the entire mapped-in ("primary") image,
142          fat headers, kitchen sink, whatnot: the entire file.  The
143          image is mapped into img[0 .. img_szB-1]. */
144       UChar* img;
145       SizeT  img_szB;
146       /* These two describe the Mach-O object of interest, which is
147          presumably somewhere inside the primary image.
148          map_image_aboard() below, which generates this info, will
149          carefully check that the macho_ fields denote a section of
150          memory that falls entirely inside img[0 .. img_szB-1]. */
151       UChar* macho_img;
152       SizeT  macho_img_szB;
153    }
154    ImageInfo;
155 
156 
is_macho_object_file(const void * buf,SizeT szB)157 Bool is_macho_object_file( const void* buf, SizeT szB )
158 {
159    /* (JRS: the Mach-O headers might not be in this mapped data,
160       because we only mapped a page for this initial check,
161       or at least not very much, and what's at the start of the file
162       is in general a so-called fat header.  The Mach-O object we're
163       interested in could be arbitrarily far along the image, and so
164       we can't assume its header will fall within this page.) */
165 
166    /* But we can say that either it's a fat object, in which case it
167       begins with a fat header, or it's unadorned Mach-O, in which
168       case it starts with a normal header.  At least do what checks we
169       can to establish whether or not we're looking at something
170       sane. */
171 
172    const struct fat_header*  fh_be = buf;
173    const struct mach_header_64* mh    = buf;
174 
175    assert(buf);
176    if (szB < sizeof(struct fat_header))
177       return False;
178    if (ntohl(fh_be->magic) == FAT_MAGIC)
179       return True;
180 
181    if (szB < sizeof(struct mach_header_64))
182       return False;
183    if (mh->magic == MH_MAGIC_64)
184       return True;
185 
186    return False;
187 }
188 
189 
190 /* Unmap an image mapped in by map_image_aboard. */
unmap_image(ImageInfo * ii)191 static void unmap_image ( /*MOD*/ImageInfo* ii )
192 {
193    Int r;
194    assert(ii->img);
195    assert(ii->img_szB > 0);
196    r = munmap( ii->img, ii->img_szB );
197    /* Do we care if this fails?  I suppose so; it would indicate
198       some fairly serious snafu with the mapping of the file. */
199    assert( !r );
200    memset(ii, 0, sizeof(*ii));
201 }
202 
203 
204 /* Map a given fat or thin object aboard, find the thin part if
205    necessary, do some checks, and write details of both the fat and
206    thin parts into *ii.  Returns 32 (and leaves the file unmapped) if
207    the thin part is a 32 bit file.  Returns 64 if it's a 64 bit file.
208    Does not return on failure.  Guarantees to return pointers to a
209    valid(ish) Mach-O image if it succeeds. */
map_image_aboard(ImageInfo * ii,HChar * filename)210 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
211 {
212    memset(ii, 0, sizeof(*ii));
213 
214    /* First off, try to map the thing in. */
215    { SizeT  size;
216      Int r, fd;
217      struct stat stat_buf;
218 
219      r = stat(filename, &stat_buf);
220      if (r)
221         fail("Can't stat image (to determine its size)?!");
222      size = stat_buf.st_size;
223 
224      fd = open(filename, O_RDWR, 0);
225      if (fd == -1)
226         fail("Can't open image for possible modification!");
227      if (DEBUGPRINTING)
228         printf("size %lu fd %d\n", size, fd);
229      void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
230                                   MAP_FILE|MAP_SHARED, fd, 0 );
231      if (v == MAP_FAILED) {
232         perror("mmap failed");
233         fail("Can't mmap image for possible modification!");
234      }
235 
236      close(fd);
237 
238      ii->img     = (UChar*)v;
239      ii->img_szB = size;
240    }
241 
242    /* Now it's mapped in and we have .img and .img_szB set.  Look for
243       the embedded Mach-O object.  If not findable, unmap and fail. */
244    { struct fat_header*  fh_be;
245      struct fat_header   fh;
246      struct mach_header_64* mh;
247 
248      // Assume initially that we have a thin image, and update
249      // these if it turns out to be fat.
250      ii->macho_img     = ii->img;
251      ii->macho_img_szB = ii->img_szB;
252 
253      // Check for fat header.
254      if (ii->img_szB < sizeof(struct fat_header))
255         fail("Invalid Mach-O file (0 too small).");
256 
257      // Fat header is always BIG-ENDIAN
258      fh_be = (struct fat_header *)ii->img;
259      fh.magic = ntohl(fh_be->magic);
260      fh.nfat_arch = ntohl(fh_be->nfat_arch);
261      if (fh.magic == FAT_MAGIC) {
262         // Look for a good architecture.
263         struct fat_arch *arch_be;
264         struct fat_arch arch;
265         Int f;
266         if (ii->img_szB < sizeof(struct fat_header)
267                           + fh.nfat_arch * sizeof(struct fat_arch))
268            fail("Invalid Mach-O file (1 too small).");
269 
270         for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
271              f < fh.nfat_arch;
272              f++, arch_be++) {
273            Int cputype;
274 #          if defined(PLAT_x86_darwin)
275            cputype = CPU_TYPE_X86;
276 #          elif defined(PLAT_amd64_darwin)
277            cputype = CPU_TYPE_X86_64;
278 #          else
279 #            error "unknown architecture"
280 #          endif
281            arch.cputype    = ntohl(arch_be->cputype);
282            arch.cpusubtype = ntohl(arch_be->cpusubtype);
283            arch.offset     = ntohl(arch_be->offset);
284            arch.size       = ntohl(arch_be->size);
285            if (arch.cputype == cputype) {
286               if (ii->img_szB < arch.offset + arch.size)
287                  fail("Invalid Mach-O file (2 too small).");
288               ii->macho_img     = ii->img + arch.offset;
289               ii->macho_img_szB = arch.size;
290               break;
291            }
292         }
293         if (f == fh.nfat_arch)
294            fail("No acceptable architecture found in fat file.");
295      }
296 
297      /* Sanity check what we found. */
298 
299      /* assured by logic above */
300      assert(ii->img_szB >= sizeof(struct fat_header));
301 
302      if (ii->macho_img_szB < sizeof(struct mach_header_64))
303         fail("Invalid Mach-O file (3 too small).");
304 
305      if (ii->macho_img_szB > ii->img_szB)
306         fail("Invalid Mach-O file (thin bigger than fat).");
307 
308      if (ii->macho_img >= ii->img
309          && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
310         /* thin entirely within fat, as expected */
311      } else {
312         fail("Invalid Mach-O file (thin not inside fat).");
313      }
314 
315      mh = (struct mach_header_64 *)ii->macho_img;
316      if (mh->magic == MH_MAGIC) {
317         assert(ii->img);
318         assert(ii->macho_img);
319         assert(ii->img_szB > 0);
320         assert(ii->macho_img_szB > 0);
321         assert(ii->macho_img >= ii->img);
322         assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
323         return 32;
324      }
325      if (mh->magic != MH_MAGIC_64)
326         fail("Invalid Mach-O file (bad magic).");
327 
328      if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
329         fail("Invalid Mach-O file (4 too small).");
330    }
331 
332    assert(ii->img);
333    assert(ii->macho_img);
334    assert(ii->img_szB > 0);
335    assert(ii->macho_img_szB > 0);
336    assert(ii->macho_img >= ii->img);
337    assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
338    return 64;
339 }
340 
341 
342 /*------------------------------------------------------------*/
343 /*---                                                      ---*/
344 /*--- Mach-O top-level processing                          ---*/
345 /*---                                                      ---*/
346 /*------------------------------------------------------------*/
347 
modify_macho_loadcmds(HChar * filename,ULong expected_stack_start,ULong expected_stack_size)348 void modify_macho_loadcmds ( HChar* filename,
349                              ULong  expected_stack_start,
350                              ULong  expected_stack_size )
351 {
352    ImageInfo ii;
353    memset(&ii, 0, sizeof(ii));
354 
355    Int size = map_image_aboard( &ii, filename );
356    if (size == 32) {
357       fprintf(stderr, "fixup_macho_loadcmds:   Is 32-bit MachO file;"
358               " no modifications needed.\n");
359       goto out;
360    }
361 
362    assert(size == 64);
363 
364    assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
365 
366    /* Poke around in the Mach-O header, to find some important
367       stuff.
368       * the location of the __UNIXSTACK load command, if any
369       * the location of the __LINKEDIT load command, if any
370       * the initial RSP value as stated in the LC_UNIXTHREAD
371    */
372 
373    /* The collected data */
374    ULong init_rsp = 0;
375    Bool  have_rsp = False;
376    struct segment_command_64* seg__unixstack = NULL;
377    struct segment_command_64* seg__linkedit  = NULL;
378 
379    /* Loop over the load commands and fill in the above 4 variables. */
380 
381    { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
382       struct load_command *cmd;
383       Int c;
384 
385       for (c = 0, cmd = (struct load_command *)(mh+1);
386            c < mh->ncmds;
387            c++, cmd = (struct load_command *)(cmd->cmdsize
388                                               + (unsigned long)cmd)) {
389          if (DEBUGPRINTING)
390             printf("load cmd: offset %4lu   size %3d   kind %2d = ",
391                    (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
392                    cmd->cmdsize, cmd->cmd);
393 
394          switch (cmd->cmd) {
395             case LC_SEGMENT_64:
396                if (DEBUGPRINTING)
397                   printf("LC_SEGMENT_64");
398                break;
399             case LC_SYMTAB:
400                if (DEBUGPRINTING)
401                   printf("LC_SYMTAB");
402                break;
403             case LC_DYSYMTAB:
404                if (DEBUGPRINTING)
405                   printf("LC_DYSYMTAB");
406                break;
407             case LC_UUID:
408                if (DEBUGPRINTING)
409                   printf("LC_UUID");
410                break;
411             case LC_UNIXTHREAD:
412                if (DEBUGPRINTING)
413                   printf("LC_UNIXTHREAD");
414                break;
415             default:
416                   printf("???");
417                fail("unexpected load command in Mach header");
418             break;
419          }
420          if (DEBUGPRINTING)
421             printf("\n");
422 
423          /* Note what the stated initial RSP value is, so we can
424             check it is as expected. */
425          if (cmd->cmd == LC_UNIXTHREAD) {
426             struct thread_command* tcmd = (struct thread_command*)cmd;
427             UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
428             if (DEBUGPRINTING)
429                printf("UnixThread: flavor %u = ", w32s[0]);
430             if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
431                if (DEBUGPRINTING)
432                   printf("x86_THREAD_STATE64\n");
433                x86_thread_state64_t* state64
434                   = (x86_thread_state64_t*)(&w32s[2]);
435                have_rsp = True;
436                init_rsp = state64->__rsp;
437                if (DEBUGPRINTING)
438                   printf("rsp = 0x%llx\n", init_rsp);
439             } else {
440                if (DEBUGPRINTING)
441                   printf("???");
442             }
443             if (DEBUGPRINTING)
444                printf("\n");
445          }
446 
447          if (cmd->cmd == LC_SEGMENT_64) {
448             struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
449             if (0 == strcmp(seg->segname, "__LINKEDIT"))
450                seg__linkedit = seg;
451             if (0 == strcmp(seg->segname, "__UNIXSTACK"))
452                seg__unixstack = seg;
453          }
454 
455       }
456    }
457 
458    /*
459       Actions are then as follows:
460 
461       * (always) check the RSP value is as expected, and abort if not
462 
463       * if there's a UNIXSTACK load command, check it is as expected.
464         If not abort, if yes, do nothing more.
465 
466       * (so there's no UNIXSTACK load command).  if there's a LINKEDIT
467         load command, check if it is minimally usable (has 0 for
468         nsects and flags).  If yes, convert it to a UNIXSTACK load
469         command.  If there is none, or is unusable, then we're out of
470         options and have to abort.
471    */
472    if (!have_rsp)
473       fail("Can't find / check initial RSP setting");
474    if (init_rsp != expected_stack_start + expected_stack_size)
475       fail("Initial RSP value not as expected");
476 
477    fprintf(stderr, "fixup_macho_loadcmds:   "
478                    "initial RSP is as expected (0x%llx)\n",
479                    expected_stack_start + expected_stack_size );
480 
481    if (seg__unixstack) {
482       struct segment_command_64 *seg = seg__unixstack;
483       if (seg->vmaddr != expected_stack_start)
484          fail("has __UNIXSTACK, but wrong ::vmaddr");
485       if (seg->vmsize != expected_stack_size)
486          fail("has __UNIXSTACK, but wrong ::vmsize");
487       if (seg->maxprot != 7)
488          fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
489       if (seg->initprot != 3)
490          fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
491       if (seg->nsects != 0)
492          fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
493       if (seg->flags != 0)
494          fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
495       /* looks ok */
496       fprintf(stderr, "fixup_macho_loadcmds:   "
497               "acceptable __UNIXSTACK present; no modifications.\n" );
498       goto out;
499    }
500 
501    if (seg__linkedit) {
502       struct segment_command_64 *seg = seg__linkedit;
503       if (seg->nsects != 0)
504          fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
505       if (seg->flags != 0)
506          fail("has __LINKEDIT, but wrong ::flags (should be 0)");
507       fprintf(stderr, "fixup_macho_loadcmds:   "
508               "no __UNIXSTACK present.\n" );
509       fprintf(stderr, "fixup_macho_loadcmds:   "
510               "converting __LINKEDIT to __UNIXSTACK.\n" );
511       strcpy(seg->segname, "__UNIXSTACK");
512       seg->vmaddr   = expected_stack_start;
513       seg->vmsize   = expected_stack_size;
514       seg->fileoff  = 0;
515       seg->filesize = 0;
516       seg->maxprot  = 7;
517       seg->initprot = 3;
518       /* success */
519       goto out;
520    }
521 
522    /* out of options */
523    fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
524         "out of options.");
525    /* NOTREACHED */
526 
527   out:
528    if (ii.img)
529       unmap_image(&ii);
530 }
531 
532 
is_plausible_tool_exe_name(HChar * nm)533 static Bool is_plausible_tool_exe_name ( HChar* nm )
534 {
535    HChar* p;
536    if (!nm)
537       return False;
538 
539    // Does it end with this string?
540    p = strstr(nm, "-x86-darwin");
541    if (p && 0 == strcmp(p, "-x86-darwin"))
542       return True;
543 
544    p = strstr(nm, "-amd64-darwin");
545    if (p && 0 == strcmp(p, "-amd64-darwin"))
546       return True;
547 
548    return False;
549 }
550 
551 
main(int argc,char ** argv)552 int main ( int argc, char** argv )
553 {
554    Int   r;
555    ULong req_stack_addr = 0;
556    ULong req_stack_size = 0;
557 
558    if (argc != 4)
559       fail("args: -stack_addr-arg -stack_size-arg "
560            "name-of-tool-executable-to-modify");
561 
562    r= sscanf(argv[1], "0x%llx", &req_stack_addr);
563    if (r != 1) fail("invalid stack_addr arg");
564 
565    r= sscanf(argv[2], "0x%llx", &req_stack_size);
566    if (r != 1) fail("invalid stack_size arg");
567 
568    fprintf(stderr, "fixup_macho_loadcmds: "
569            "requested stack_addr (top) 0x%llx, "
570            "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
571 
572    if (!is_plausible_tool_exe_name(argv[3]))
573       fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
574 
575    fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
576            argv[3] );
577    modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
578                           req_stack_size );
579 
580    return 0;
581 }
582 
583 /*
584       cmd LC_SEGMENT_64
585   cmdsize 72
586   segname __LINKEDIT
587    vmaddr 0x0000000138dea000
588    vmsize 0x00000000000ad000
589   fileoff 2658304
590  filesize 705632
591   maxprot 0x00000007
592  initprot 0x00000001
593    nsects 0
594     flags 0x0
595 */
596 
597 /*
598       cmd LC_SEGMENT_64
599   cmdsize 72
600   segname __UNIXSTACK
601    vmaddr 0x0000000133800000
602    vmsize 0x0000000000800000
603   fileoff 2498560
604  filesize 0
605   maxprot 0x00000007
606  initprot 0x00000003
607    nsects 0
608     flags 0x0
609 */
610