1
2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3 GPL 2+ therefore.
4
5 Can be compiled as either a 32- or 64-bit program (doesn't matter).
6 */
7
8 /* What does this program do? In short it postprocesses tool
9 executables on MacOSX, after linking using /usr/bin/ld.
10
11 This is to deal with two separate and entirely unrelated problems.
12 Problem (1) is a bug in the linker in Xcode 4.0.0. Problem (2) is
13 much newer and concerns linking 64-bit tool executables for
14 Yosemite (10.10).
15
16 --- Problem (1) ------------------------------------------------
17
18 This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1. Xcode
19 versions prior to 4.0.0 are unaffected.
20
21 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
22
23 The bug causes 64-bit tool executables to segfault at startup,
24 because:
25
26 Comparing the MachO load commands vs a (working) tool executable
27 that was created by Xcode 3.2.x, it appears that the new linker has
28 partially ignored the build system's request to place the tool
29 executable's stack at a non standard location. The build system
30 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
31
32 With the Xcode 3.2 linker those flags produce two results:
33
34 (1) A load command to allocate the stack at the said location:
35 Load command 3
36 cmd LC_SEGMENT_64
37 cmdsize 72
38 segname __UNIXSTACK
39 vmaddr 0x0000000133800000
40 vmsize 0x0000000000800000
41 fileoff 2285568
42 filesize 0
43 maxprot 0x00000007
44 initprot 0x00000003
45 nsects 0
46 flags 0x0
47
48 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
49 at process startup, 0x134000000.
50
51 With Xcode 4.0.1, (1) is missing but (2) is still present. The
52 tool executable therefore starts up with %rsp pointing to unmapped
53 memory and faults almost instantly.
54
55 The workaround implemented by this program is documented in comment
56 8 of bug 267997, viz:
57
58 One really sick workaround is to observe that the executables
59 contain a redundant MachO load command:
60
61 Load command 2
62 cmd LC_SEGMENT_64
63 cmdsize 72
64 segname __LINKEDIT
65 vmaddr 0x0000000138dea000
66 vmsize 0x00000000000ad000
67 fileoff 2658304
68 filesize 705632
69 maxprot 0x00000007
70 initprot 0x00000001
71 nsects 0
72 flags 0x0
73
74 The described section presumably contains information intended for
75 the dynamic linker, but is irrelevant because this is a statically
76 linked executable. Hence it might be possible to postprocess the
77 executables after linking, to overwrite this entry with the
78 information that would have been in the missing __UNIXSTACK entry.
79 I tried this by hand (with a binary editor) earlier and got
80 something that worked.
81
82 --- Problem (2) ------------------------------------------------
83
84 On MacOSX 10.10 (Yosemite), the kernel requires all valid
85 executables to have a __PAGEZERO section with SVMA of zero and size
86 of at least one page. However, our tool executables have a
87 __PAGEZERO section with SVMA set to the requested Valgrind load
88 address (typically 0x1'3800'0000). And the kernel won't start
89 those. So we take the opportunity to "fix" this by setting the
90 SVMA to zero. Seems to work and have no obvious bad side effects.
91 */
92
93 #define DEBUGPRINTING 0
94
95 #include <assert.h>
96 #include <stdlib.h>
97 #include <stdio.h>
98 #include <string.h>
99 #include <sys/mman.h>
100 #include <sys/stat.h>
101 #include <unistd.h>
102 #include <fcntl.h>
103
104 #undef PLAT_x86_darwin
105 #undef PLAT_amd64_darwin
106
107 #if defined(__APPLE__) && defined(__i386__)
108 # define PLAT_x86_darwin 1
109 #elif defined(__APPLE__) && defined(__x86_64__)
110 # define PLAT_amd64_darwin 1
111 #else
112 # error "Can't be compiled on this platform"
113 #endif
114
115 #include <mach-o/loader.h>
116 #include <mach-o/nlist.h>
117 #include <mach-o/fat.h>
118 #include <mach/i386/thread_status.h>
119
120 /* Get hold of DARWIN_VERS, and check it has a sane value. */
121 #include "config.h"
122 #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
123 && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
124 && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10
125 # error "Unknown DARWIN_VERS value. This file only compiles on Darwin."
126 #endif
127
128
129 typedef unsigned char UChar;
130 typedef signed char Char;
131 typedef char HChar; /* signfulness depends on host */
132
133 typedef unsigned int UInt;
134 typedef signed int Int;
135
136 typedef unsigned char Bool;
137 #define True ((Bool)1)
138 #define False ((Bool)0)
139
140 typedef unsigned long UWord;
141
142 typedef UWord SizeT;
143 typedef UWord Addr;
144
145 typedef unsigned long long int ULong;
146 typedef signed long long int Long;
147
148
149
150 __attribute__((noreturn))
fail(HChar * msg)151 void fail ( HChar* msg )
152 {
153 fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
154 exit(1);
155 }
156
157
158 /*------------------------------------------------------------*/
159 /*--- ---*/
160 /*--- Mach-O file mapping/unmapping helpers ---*/
161 /*--- ---*/
162 /*------------------------------------------------------------*/
163
164 typedef
165 struct {
166 /* These two describe the entire mapped-in ("primary") image,
167 fat headers, kitchen sink, whatnot: the entire file. The
168 image is mapped into img[0 .. img_szB-1]. */
169 UChar* img;
170 SizeT img_szB;
171 /* These two describe the Mach-O object of interest, which is
172 presumably somewhere inside the primary image.
173 map_image_aboard() below, which generates this info, will
174 carefully check that the macho_ fields denote a section of
175 memory that falls entirely inside img[0 .. img_szB-1]. */
176 UChar* macho_img;
177 SizeT macho_img_szB;
178 }
179 ImageInfo;
180
181
is_macho_object_file(const void * buf,SizeT szB)182 Bool is_macho_object_file( const void* buf, SizeT szB )
183 {
184 /* (JRS: the Mach-O headers might not be in this mapped data,
185 because we only mapped a page for this initial check,
186 or at least not very much, and what's at the start of the file
187 is in general a so-called fat header. The Mach-O object we're
188 interested in could be arbitrarily far along the image, and so
189 we can't assume its header will fall within this page.) */
190
191 /* But we can say that either it's a fat object, in which case it
192 begins with a fat header, or it's unadorned Mach-O, in which
193 case it starts with a normal header. At least do what checks we
194 can to establish whether or not we're looking at something
195 sane. */
196
197 const struct fat_header* fh_be = buf;
198 const struct mach_header_64* mh = buf;
199
200 assert(buf);
201 if (szB < sizeof(struct fat_header))
202 return False;
203 if (ntohl(fh_be->magic) == FAT_MAGIC)
204 return True;
205
206 if (szB < sizeof(struct mach_header_64))
207 return False;
208 if (mh->magic == MH_MAGIC_64)
209 return True;
210
211 return False;
212 }
213
214
215 /* Unmap an image mapped in by map_image_aboard. */
unmap_image(ImageInfo * ii)216 static void unmap_image ( /*MOD*/ImageInfo* ii )
217 {
218 Int r;
219 assert(ii->img);
220 assert(ii->img_szB > 0);
221 r = munmap( ii->img, ii->img_szB );
222 /* Do we care if this fails? I suppose so; it would indicate
223 some fairly serious snafu with the mapping of the file. */
224 assert( !r );
225 memset(ii, 0, sizeof(*ii));
226 }
227
228
229 /* Map a given fat or thin object aboard, find the thin part if
230 necessary, do some checks, and write details of both the fat and
231 thin parts into *ii. Returns 32 (and leaves the file unmapped) if
232 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file.
233 Does not return on failure. Guarantees to return pointers to a
234 valid(ish) Mach-O image if it succeeds. */
map_image_aboard(ImageInfo * ii,HChar * filename)235 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
236 {
237 memset(ii, 0, sizeof(*ii));
238
239 /* First off, try to map the thing in. */
240 { SizeT size;
241 Int r, fd;
242 struct stat stat_buf;
243
244 r = stat(filename, &stat_buf);
245 if (r)
246 fail("Can't stat image (to determine its size)?!");
247 size = stat_buf.st_size;
248
249 fd = open(filename, O_RDWR, 0);
250 if (fd == -1)
251 fail("Can't open image for possible modification!");
252 if (DEBUGPRINTING)
253 printf("size %lu fd %d\n", size, fd);
254 void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
255 MAP_FILE|MAP_SHARED, fd, 0 );
256 if (v == MAP_FAILED) {
257 perror("mmap failed");
258 fail("Can't mmap image for possible modification!");
259 }
260
261 close(fd);
262
263 ii->img = (UChar*)v;
264 ii->img_szB = size;
265 }
266
267 /* Now it's mapped in and we have .img and .img_szB set. Look for
268 the embedded Mach-O object. If not findable, unmap and fail. */
269 { struct fat_header* fh_be;
270 struct fat_header fh;
271 struct mach_header_64* mh;
272
273 // Assume initially that we have a thin image, and update
274 // these if it turns out to be fat.
275 ii->macho_img = ii->img;
276 ii->macho_img_szB = ii->img_szB;
277
278 // Check for fat header.
279 if (ii->img_szB < sizeof(struct fat_header))
280 fail("Invalid Mach-O file (0 too small).");
281
282 // Fat header is always BIG-ENDIAN
283 fh_be = (struct fat_header *)ii->img;
284 fh.magic = ntohl(fh_be->magic);
285 fh.nfat_arch = ntohl(fh_be->nfat_arch);
286 if (fh.magic == FAT_MAGIC) {
287 // Look for a good architecture.
288 struct fat_arch *arch_be;
289 struct fat_arch arch;
290 Int f;
291 if (ii->img_szB < sizeof(struct fat_header)
292 + fh.nfat_arch * sizeof(struct fat_arch))
293 fail("Invalid Mach-O file (1 too small).");
294
295 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
296 f < fh.nfat_arch;
297 f++, arch_be++) {
298 Int cputype;
299 # if defined(PLAT_x86_darwin)
300 cputype = CPU_TYPE_X86;
301 # elif defined(PLAT_amd64_darwin)
302 cputype = CPU_TYPE_X86_64;
303 # else
304 # error "unknown architecture"
305 # endif
306 arch.cputype = ntohl(arch_be->cputype);
307 arch.cpusubtype = ntohl(arch_be->cpusubtype);
308 arch.offset = ntohl(arch_be->offset);
309 arch.size = ntohl(arch_be->size);
310 if (arch.cputype == cputype) {
311 if (ii->img_szB < arch.offset + arch.size)
312 fail("Invalid Mach-O file (2 too small).");
313 ii->macho_img = ii->img + arch.offset;
314 ii->macho_img_szB = arch.size;
315 break;
316 }
317 }
318 if (f == fh.nfat_arch)
319 fail("No acceptable architecture found in fat file.");
320 }
321
322 /* Sanity check what we found. */
323
324 /* assured by logic above */
325 assert(ii->img_szB >= sizeof(struct fat_header));
326
327 if (ii->macho_img_szB < sizeof(struct mach_header_64))
328 fail("Invalid Mach-O file (3 too small).");
329
330 if (ii->macho_img_szB > ii->img_szB)
331 fail("Invalid Mach-O file (thin bigger than fat).");
332
333 if (ii->macho_img >= ii->img
334 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
335 /* thin entirely within fat, as expected */
336 } else {
337 fail("Invalid Mach-O file (thin not inside fat).");
338 }
339
340 mh = (struct mach_header_64 *)ii->macho_img;
341 if (mh->magic == MH_MAGIC) {
342 assert(ii->img);
343 assert(ii->macho_img);
344 assert(ii->img_szB > 0);
345 assert(ii->macho_img_szB > 0);
346 assert(ii->macho_img >= ii->img);
347 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
348 return 32;
349 }
350 if (mh->magic != MH_MAGIC_64)
351 fail("Invalid Mach-O file (bad magic).");
352
353 if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
354 fail("Invalid Mach-O file (4 too small).");
355 }
356
357 assert(ii->img);
358 assert(ii->macho_img);
359 assert(ii->img_szB > 0);
360 assert(ii->macho_img_szB > 0);
361 assert(ii->macho_img >= ii->img);
362 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
363 return 64;
364 }
365
366
367 /*------------------------------------------------------------*/
368 /*--- ---*/
369 /*--- Mach-O top-level processing ---*/
370 /*--- ---*/
371 /*------------------------------------------------------------*/
372
modify_macho_loadcmds(HChar * filename,ULong expected_stack_start,ULong expected_stack_size)373 void modify_macho_loadcmds ( HChar* filename,
374 ULong expected_stack_start,
375 ULong expected_stack_size )
376 {
377 ImageInfo ii;
378 memset(&ii, 0, sizeof(ii));
379
380 Int size = map_image_aboard( &ii, filename );
381 if (size == 32) {
382 fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;"
383 " no modifications needed.\n");
384 goto out;
385 }
386
387 assert(size == 64);
388
389 assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
390
391 /* Poke around in the Mach-O header, to find some important
392 stuff.
393 * the location of the __UNIXSTACK load command, if any
394 * the location of the __LINKEDIT load command, if any
395 * the initial RSP value as stated in the LC_UNIXTHREAD
396 */
397
398 /* The collected data */
399 ULong init_rsp = 0;
400 Bool have_rsp = False;
401 struct segment_command_64* seg__unixstack = NULL;
402 struct segment_command_64* seg__linkedit = NULL;
403 struct segment_command_64* seg__pagezero = NULL;
404
405 /* Loop over the load commands and fill in the above 4 variables. */
406
407 { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
408 struct load_command *cmd;
409 Int c;
410
411 for (c = 0, cmd = (struct load_command *)(mh+1);
412 c < mh->ncmds;
413 c++, cmd = (struct load_command *)(cmd->cmdsize
414 + (unsigned long)cmd)) {
415 if (DEBUGPRINTING)
416 printf("load cmd: offset %4lu size %3d kind %2d = ",
417 (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
418 cmd->cmdsize, cmd->cmd);
419
420 switch (cmd->cmd) {
421 case LC_SEGMENT_64:
422 if (DEBUGPRINTING)
423 printf("LC_SEGMENT_64");
424 break;
425 case LC_SYMTAB:
426 if (DEBUGPRINTING)
427 printf("LC_SYMTAB");
428 break;
429 case LC_DYSYMTAB:
430 if (DEBUGPRINTING)
431 printf("LC_DYSYMTAB");
432 break;
433 case LC_UUID:
434 if (DEBUGPRINTING)
435 printf("LC_UUID");
436 break;
437 case LC_UNIXTHREAD:
438 if (DEBUGPRINTING)
439 printf("LC_UNIXTHREAD");
440 break;
441 default:
442 if (DEBUGPRINTING)
443 printf("???");
444 fail("unexpected load command in Mach header");
445 break;
446 }
447 if (DEBUGPRINTING)
448 printf("\n");
449
450 /* Note what the stated initial RSP value is, so we can
451 check it is as expected. */
452 if (cmd->cmd == LC_UNIXTHREAD) {
453 struct thread_command* tcmd = (struct thread_command*)cmd;
454 UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
455 if (DEBUGPRINTING)
456 printf("UnixThread: flavor %u = ", w32s[0]);
457 if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
458 if (DEBUGPRINTING)
459 printf("x86_THREAD_STATE64\n");
460 x86_thread_state64_t* state64
461 = (x86_thread_state64_t*)(&w32s[2]);
462 have_rsp = True;
463 init_rsp = state64->__rsp;
464 if (DEBUGPRINTING)
465 printf("rsp = 0x%llx\n", init_rsp);
466 } else {
467 if (DEBUGPRINTING)
468 printf("???");
469 }
470 if (DEBUGPRINTING)
471 printf("\n");
472 }
473
474 if (cmd->cmd == LC_SEGMENT_64) {
475 struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
476 if (0 == strcmp(seg->segname, "__LINKEDIT"))
477 seg__linkedit = seg;
478 if (0 == strcmp(seg->segname, "__UNIXSTACK"))
479 seg__unixstack = seg;
480 if (0 == strcmp(seg->segname, "__PAGEZERO"))
481 seg__pagezero = seg;
482 }
483
484 }
485 }
486
487 /*
488 Actions are then as follows:
489
490 * (always) check the RSP value is as expected, and abort if not
491
492 * if there's a UNIXSTACK load command, check it is as expected.
493 If not abort, if yes, do nothing more.
494
495 * (so there's no UNIXSTACK load command). if there's a LINKEDIT
496 load command, check if it is minimally usable (has 0 for
497 nsects and flags). If yes, convert it to a UNIXSTACK load
498 command. If there is none, or is unusable, then we're out of
499 options and have to abort.
500 */
501 if (!have_rsp)
502 fail("Can't find / check initial RSP setting");
503 if (init_rsp != expected_stack_start + expected_stack_size)
504 fail("Initial RSP value not as expected");
505
506 fprintf(stderr, "fixup_macho_loadcmds: "
507 "initial RSP is as expected (0x%llx)\n",
508 expected_stack_start + expected_stack_size );
509
510 if (seg__unixstack) {
511 struct segment_command_64 *seg = seg__unixstack;
512 if (seg->vmaddr != expected_stack_start)
513 fail("has __UNIXSTACK, but wrong ::vmaddr");
514 if (seg->vmsize != expected_stack_size)
515 fail("has __UNIXSTACK, but wrong ::vmsize");
516 if (seg->maxprot != 7)
517 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
518 if (seg->initprot != 3)
519 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
520 if (seg->nsects != 0)
521 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
522 if (seg->flags != 0)
523 fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
524 /* looks ok */
525 fprintf(stderr, "fixup_macho_loadcmds: "
526 "acceptable __UNIXSTACK present; no modifications.\n" );
527 goto maybe_mash_pagezero;
528 }
529
530 if (seg__linkedit) {
531 struct segment_command_64 *seg = seg__linkedit;
532 if (seg->nsects != 0)
533 fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
534 if (seg->flags != 0)
535 fail("has __LINKEDIT, but wrong ::flags (should be 0)");
536 fprintf(stderr, "fixup_macho_loadcmds: "
537 "no __UNIXSTACK present.\n" );
538 fprintf(stderr, "fixup_macho_loadcmds: "
539 "converting __LINKEDIT to __UNIXSTACK.\n" );
540 strcpy(seg->segname, "__UNIXSTACK");
541 seg->vmaddr = expected_stack_start;
542 seg->vmsize = expected_stack_size;
543 seg->fileoff = 0;
544 seg->filesize = 0;
545 seg->maxprot = 7;
546 seg->initprot = 3;
547 /* success */
548 goto maybe_mash_pagezero;
549 }
550
551 /* out of options */
552 fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
553 "out of options.");
554 /* NOTREACHED */
555
556 maybe_mash_pagezero:
557 /* Deal with Problem (2) as documented above. */
558 # if DARWIN_VERS == DARWIN_10_10
559 assert(size == 64);
560 if (!seg__pagezero) {
561 fail("Can't find __PAGEZERO to modify; can't continue.");
562 }
563 fprintf(stderr, "fixup_macho_loadcmds: "
564 "changing __PAGEZERO.vmaddr from %p to 0x0.\n",
565 (void*)seg__pagezero->vmaddr);
566 seg__pagezero->vmaddr = 0;
567 # endif
568
569 out:
570 if (ii.img)
571 unmap_image(&ii);
572 }
573
574
is_plausible_tool_exe_name(HChar * nm)575 static Bool is_plausible_tool_exe_name ( HChar* nm )
576 {
577 HChar* p;
578 if (!nm)
579 return False;
580
581 // Does it end with this string?
582 p = strstr(nm, "-x86-darwin");
583 if (p && 0 == strcmp(p, "-x86-darwin"))
584 return True;
585
586 p = strstr(nm, "-amd64-darwin");
587 if (p && 0 == strcmp(p, "-amd64-darwin"))
588 return True;
589
590 return False;
591 }
592
593
main(int argc,char ** argv)594 int main ( int argc, char** argv )
595 {
596 Int r;
597 ULong req_stack_addr = 0;
598 ULong req_stack_size = 0;
599
600 if (argc != 4)
601 fail("args: -stack_addr-arg -stack_size-arg "
602 "name-of-tool-executable-to-modify");
603
604 r= sscanf(argv[1], "0x%llx", &req_stack_addr);
605 if (r != 1) fail("invalid stack_addr arg");
606
607 r= sscanf(argv[2], "0x%llx", &req_stack_size);
608 if (r != 1) fail("invalid stack_size arg");
609
610 fprintf(stderr, "fixup_macho_loadcmds: "
611 "requested stack_addr (top) 0x%llx, "
612 "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
613
614 if (!is_plausible_tool_exe_name(argv[3]))
615 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
616
617 fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
618 argv[3] );
619 modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
620 req_stack_size );
621
622 return 0;
623 }
624
625 /*
626 cmd LC_SEGMENT_64
627 cmdsize 72
628 segname __LINKEDIT
629 vmaddr 0x0000000138dea000
630 vmsize 0x00000000000ad000
631 fileoff 2658304
632 filesize 705632
633 maxprot 0x00000007
634 initprot 0x00000001
635 nsects 0
636 flags 0x0
637 */
638
639 /*
640 cmd LC_SEGMENT_64
641 cmdsize 72
642 segname __UNIXSTACK
643 vmaddr 0x0000000133800000
644 vmsize 0x0000000000800000
645 fileoff 2498560
646 filesize 0
647 maxprot 0x00000007
648 initprot 0x00000003
649 nsects 0
650 flags 0x0
651 */
652