1
2 /* Derived from Valgrind sources, coregrind/m_debuginfo/readmacho.c.
3 GPL 2+ therefore.
4
5 Can be compiled as either a 32- or 64-bit program (doesn't matter).
6 */
7
8 /* What does this program do? In short it postprocesses tool
9 executables on MacOSX, after linking using /usr/bin/ld.
10
11 This is to deal with two separate and entirely unrelated problems.
12 Problem (1) is a bug in the linker in Xcode 4.0.0. Problem (2) is
13 much newer and concerns linking 64-bit tool executables for
14 Yosemite (10.10).
15
16 --- Problem (1) ------------------------------------------------
17
18 This is a bug in the linker on Xcode 4.0.0 and Xcode 4.0.1. Xcode
19 versions prior to 4.0.0 are unaffected.
20
21 The tracking bug is https://bugs.kde.org/show_bug.cgi?id=267997
22
23 The bug causes 64-bit tool executables to segfault at startup,
24 because:
25
26 Comparing the MachO load commands vs a (working) tool executable
27 that was created by Xcode 3.2.x, it appears that the new linker has
28 partially ignored the build system's request to place the tool
29 executable's stack at a non standard location. The build system
30 tells the linker "-stack_addr 0x134000000 -stack_size 0x800000".
31
32 With the Xcode 3.2 linker those flags produce two results:
33
34 (1) A load command to allocate the stack at the said location:
35 Load command 3
36 cmd LC_SEGMENT_64
37 cmdsize 72
38 segname __UNIXSTACK
39 vmaddr 0x0000000133800000
40 vmsize 0x0000000000800000
41 fileoff 2285568
42 filesize 0
43 maxprot 0x00000007
44 initprot 0x00000003
45 nsects 0
46 flags 0x0
47
48 (2) A request (in LC_UNIXTHREAD) to set %rsp to the correct value
49 at process startup, 0x134000000.
50
51 With Xcode 4.0.1, (1) is missing but (2) is still present. The
52 tool executable therefore starts up with %rsp pointing to unmapped
53 memory and faults almost instantly.
54
55 The workaround implemented by this program is documented in comment
56 8 of bug 267997, viz:
57
58 One really sick workaround is to observe that the executables
59 contain a redundant MachO load command:
60
61 Load command 2
62 cmd LC_SEGMENT_64
63 cmdsize 72
64 segname __LINKEDIT
65 vmaddr 0x0000000138dea000
66 vmsize 0x00000000000ad000
67 fileoff 2658304
68 filesize 705632
69 maxprot 0x00000007
70 initprot 0x00000001
71 nsects 0
72 flags 0x0
73
74 The described section presumably contains information intended for
75 the dynamic linker, but is irrelevant because this is a statically
76 linked executable. Hence it might be possible to postprocess the
77 executables after linking, to overwrite this entry with the
78 information that would have been in the missing __UNIXSTACK entry.
79 I tried this by hand (with a binary editor) earlier and got
80 something that worked.
81
82 --- Problem (2) ------------------------------------------------
83
84 On MacOSX 10.10 (Yosemite), the kernel requires all valid
85 executables to have a __PAGEZERO section with SVMA of zero and size
86 of at least one page. However, our tool executables have a
87 __PAGEZERO section with SVMA set to the requested Valgrind load
88 address (typically 0x1'3800'0000). And the kernel won't start
89 those. So we take the opportunity to "fix" this by setting the
90 SVMA to zero. Seems to work and have no obvious bad side effects.
91 */
92
93 #define DEBUGPRINTING 0
94
95 #include <assert.h>
96 #include <stdlib.h>
97 #include <stdio.h>
98 #include <string.h>
99 #include <sys/mman.h>
100 #include <sys/stat.h>
101 #include <unistd.h>
102 #include <fcntl.h>
103
104 #undef PLAT_x86_darwin
105 #undef PLAT_amd64_darwin
106
107 #if defined(__APPLE__) && defined(__i386__)
108 # define PLAT_x86_darwin 1
109 #elif defined(__APPLE__) && defined(__x86_64__)
110 # define PLAT_amd64_darwin 1
111 #else
112 # error "Can't be compiled on this platform"
113 #endif
114
115 #include <mach-o/loader.h>
116 #include <mach-o/nlist.h>
117 #include <mach-o/fat.h>
118 #include <mach/i386/thread_status.h>
119
120 /* Get hold of DARWIN_VERS, and check it has a sane value. */
121 #include "config.h"
122 #if DARWIN_VERS != DARWIN_10_5 && DARWIN_VERS != DARWIN_10_6 \
123 && DARWIN_VERS != DARWIN_10_7 && DARWIN_VERS != DARWIN_10_8 \
124 && DARWIN_VERS != DARWIN_10_9 && DARWIN_VERS != DARWIN_10_10 \
125 && DARWIN_VERS != DARWIN_10_11 && DARWIN_VERS != DARWIN_10_12
126 # error "Unknown DARWIN_VERS value. This file only compiles on Darwin."
127 #endif
128
129
130 typedef unsigned char UChar;
131 typedef signed char Char;
132 typedef char HChar; /* signfulness depends on host */
133
134 typedef unsigned int UInt;
135 typedef signed int Int;
136
137 typedef unsigned char Bool;
138 #define True ((Bool)1)
139 #define False ((Bool)0)
140
141 typedef unsigned long UWord;
142
143 typedef UWord SizeT;
144 typedef UWord Addr;
145
146 typedef unsigned long long int ULong;
147 typedef signed long long int Long;
148
149
150
151 __attribute__((noreturn))
fail(HChar * msg)152 void fail ( HChar* msg )
153 {
154 fprintf(stderr, "fixup_macho_loadcmds: fail: %s\n", msg);
155 exit(1);
156 }
157
158
159 /*------------------------------------------------------------*/
160 /*--- ---*/
161 /*--- Mach-O file mapping/unmapping helpers ---*/
162 /*--- ---*/
163 /*------------------------------------------------------------*/
164
165 typedef
166 struct {
167 /* These two describe the entire mapped-in ("primary") image,
168 fat headers, kitchen sink, whatnot: the entire file. The
169 image is mapped into img[0 .. img_szB-1]. */
170 UChar* img;
171 SizeT img_szB;
172 /* These two describe the Mach-O object of interest, which is
173 presumably somewhere inside the primary image.
174 map_image_aboard() below, which generates this info, will
175 carefully check that the macho_ fields denote a section of
176 memory that falls entirely inside img[0 .. img_szB-1]. */
177 UChar* macho_img;
178 SizeT macho_img_szB;
179 }
180 ImageInfo;
181
182
is_macho_object_file(const void * buf,SizeT szB)183 Bool is_macho_object_file( const void* buf, SizeT szB )
184 {
185 /* (JRS: the Mach-O headers might not be in this mapped data,
186 because we only mapped a page for this initial check,
187 or at least not very much, and what's at the start of the file
188 is in general a so-called fat header. The Mach-O object we're
189 interested in could be arbitrarily far along the image, and so
190 we can't assume its header will fall within this page.) */
191
192 /* But we can say that either it's a fat object, in which case it
193 begins with a fat header, or it's unadorned Mach-O, in which
194 case it starts with a normal header. At least do what checks we
195 can to establish whether or not we're looking at something
196 sane. */
197
198 const struct fat_header* fh_be = buf;
199 const struct mach_header_64* mh = buf;
200
201 assert(buf);
202 if (szB < sizeof(struct fat_header))
203 return False;
204 if (ntohl(fh_be->magic) == FAT_MAGIC)
205 return True;
206
207 if (szB < sizeof(struct mach_header_64))
208 return False;
209 if (mh->magic == MH_MAGIC_64)
210 return True;
211
212 return False;
213 }
214
215
216 /* Unmap an image mapped in by map_image_aboard. */
unmap_image(ImageInfo * ii)217 static void unmap_image ( /*MOD*/ImageInfo* ii )
218 {
219 Int r;
220 assert(ii->img);
221 assert(ii->img_szB > 0);
222 r = munmap( ii->img, ii->img_szB );
223 /* Do we care if this fails? I suppose so; it would indicate
224 some fairly serious snafu with the mapping of the file. */
225 assert( !r );
226 memset(ii, 0, sizeof(*ii));
227 }
228
229
230 /* Map a given fat or thin object aboard, find the thin part if
231 necessary, do some checks, and write details of both the fat and
232 thin parts into *ii. Returns 32 (and leaves the file unmapped) if
233 the thin part is a 32 bit file. Returns 64 if it's a 64 bit file.
234 Does not return on failure. Guarantees to return pointers to a
235 valid(ish) Mach-O image if it succeeds. */
map_image_aboard(ImageInfo * ii,HChar * filename)236 static Int map_image_aboard ( /*OUT*/ImageInfo* ii, HChar* filename )
237 {
238 memset(ii, 0, sizeof(*ii));
239
240 /* First off, try to map the thing in. */
241 { SizeT size;
242 Int r, fd;
243 struct stat stat_buf;
244
245 r = stat(filename, &stat_buf);
246 if (r)
247 fail("Can't stat image (to determine its size)?!");
248 size = stat_buf.st_size;
249
250 fd = open(filename, O_RDWR, 0);
251 if (fd == -1)
252 fail("Can't open image for possible modification!");
253 if (DEBUGPRINTING)
254 printf("size %lu fd %d\n", size, fd);
255 void* v = mmap ( NULL, size, PROT_READ|PROT_WRITE,
256 MAP_FILE|MAP_SHARED, fd, 0 );
257 if (v == MAP_FAILED) {
258 perror("mmap failed");
259 fail("Can't mmap image for possible modification!");
260 }
261
262 close(fd);
263
264 ii->img = (UChar*)v;
265 ii->img_szB = size;
266 }
267
268 /* Now it's mapped in and we have .img and .img_szB set. Look for
269 the embedded Mach-O object. If not findable, unmap and fail. */
270 { struct fat_header* fh_be;
271 struct fat_header fh;
272 struct mach_header_64* mh;
273
274 // Assume initially that we have a thin image, and update
275 // these if it turns out to be fat.
276 ii->macho_img = ii->img;
277 ii->macho_img_szB = ii->img_szB;
278
279 // Check for fat header.
280 if (ii->img_szB < sizeof(struct fat_header))
281 fail("Invalid Mach-O file (0 too small).");
282
283 // Fat header is always BIG-ENDIAN
284 fh_be = (struct fat_header *)ii->img;
285 fh.magic = ntohl(fh_be->magic);
286 fh.nfat_arch = ntohl(fh_be->nfat_arch);
287 if (fh.magic == FAT_MAGIC) {
288 // Look for a good architecture.
289 struct fat_arch *arch_be;
290 struct fat_arch arch;
291 Int f;
292 if (ii->img_szB < sizeof(struct fat_header)
293 + fh.nfat_arch * sizeof(struct fat_arch))
294 fail("Invalid Mach-O file (1 too small).");
295
296 for (f = 0, arch_be = (struct fat_arch *)(fh_be+1);
297 f < fh.nfat_arch;
298 f++, arch_be++) {
299 Int cputype;
300 # if defined(PLAT_x86_darwin)
301 cputype = CPU_TYPE_X86;
302 # elif defined(PLAT_amd64_darwin)
303 cputype = CPU_TYPE_X86_64;
304 # else
305 # error "unknown architecture"
306 # endif
307 arch.cputype = ntohl(arch_be->cputype);
308 arch.cpusubtype = ntohl(arch_be->cpusubtype);
309 arch.offset = ntohl(arch_be->offset);
310 arch.size = ntohl(arch_be->size);
311 if (arch.cputype == cputype) {
312 if (ii->img_szB < arch.offset + arch.size)
313 fail("Invalid Mach-O file (2 too small).");
314 ii->macho_img = ii->img + arch.offset;
315 ii->macho_img_szB = arch.size;
316 break;
317 }
318 }
319 if (f == fh.nfat_arch)
320 fail("No acceptable architecture found in fat file.");
321 }
322
323 /* Sanity check what we found. */
324
325 /* assured by logic above */
326 assert(ii->img_szB >= sizeof(struct fat_header));
327
328 if (ii->macho_img_szB < sizeof(struct mach_header_64))
329 fail("Invalid Mach-O file (3 too small).");
330
331 if (ii->macho_img_szB > ii->img_szB)
332 fail("Invalid Mach-O file (thin bigger than fat).");
333
334 if (ii->macho_img >= ii->img
335 && ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB) {
336 /* thin entirely within fat, as expected */
337 } else {
338 fail("Invalid Mach-O file (thin not inside fat).");
339 }
340
341 mh = (struct mach_header_64 *)ii->macho_img;
342 if (mh->magic == MH_MAGIC) {
343 assert(ii->img);
344 assert(ii->macho_img);
345 assert(ii->img_szB > 0);
346 assert(ii->macho_img_szB > 0);
347 assert(ii->macho_img >= ii->img);
348 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
349 return 32;
350 }
351 if (mh->magic != MH_MAGIC_64)
352 fail("Invalid Mach-O file (bad magic).");
353
354 if (ii->macho_img_szB < sizeof(struct mach_header_64) + mh->sizeofcmds)
355 fail("Invalid Mach-O file (4 too small).");
356 }
357
358 assert(ii->img);
359 assert(ii->macho_img);
360 assert(ii->img_szB > 0);
361 assert(ii->macho_img_szB > 0);
362 assert(ii->macho_img >= ii->img);
363 assert(ii->macho_img + ii->macho_img_szB <= ii->img + ii->img_szB);
364 return 64;
365 }
366
367
368 /*------------------------------------------------------------*/
369 /*--- ---*/
370 /*--- Mach-O top-level processing ---*/
371 /*--- ---*/
372 /*------------------------------------------------------------*/
373
modify_macho_loadcmds(HChar * filename,ULong expected_stack_start,ULong expected_stack_size)374 void modify_macho_loadcmds ( HChar* filename,
375 ULong expected_stack_start,
376 ULong expected_stack_size )
377 {
378 ImageInfo ii;
379 memset(&ii, 0, sizeof(ii));
380
381 Int size = map_image_aboard( &ii, filename );
382 if (size == 32) {
383 fprintf(stderr, "fixup_macho_loadcmds: Is 32-bit MachO file;"
384 " no modifications needed.\n");
385 goto out;
386 }
387
388 assert(size == 64);
389
390 assert(ii.macho_img != NULL && ii.macho_img_szB > 0);
391
392 /* Poke around in the Mach-O header, to find some important
393 stuff.
394 * the location of the __UNIXSTACK load command, if any
395 * the location of the __LINKEDIT load command, if any
396 * the initial RSP value as stated in the LC_UNIXTHREAD
397 */
398
399 /* The collected data */
400 ULong init_rsp = 0;
401 Bool have_rsp = False;
402 struct segment_command_64* seg__unixstack = NULL;
403 struct segment_command_64* seg__linkedit = NULL;
404 struct segment_command_64* seg__pagezero = NULL;
405
406 /* Loop over the load commands and fill in the above 4 variables. */
407
408 { struct mach_header_64 *mh = (struct mach_header_64 *)ii.macho_img;
409 struct load_command *cmd;
410 Int c;
411
412 for (c = 0, cmd = (struct load_command *)(mh+1);
413 c < mh->ncmds;
414 c++, cmd = (struct load_command *)(cmd->cmdsize
415 + (unsigned long)cmd)) {
416 if (DEBUGPRINTING)
417 printf("load cmd: offset %4lu size %3d kind %2d = ",
418 (unsigned long)((UChar*)cmd - (UChar*)ii.macho_img),
419 cmd->cmdsize, cmd->cmd);
420
421 switch (cmd->cmd) {
422 case LC_SEGMENT_64:
423 if (DEBUGPRINTING)
424 printf("LC_SEGMENT_64");
425 break;
426 case LC_SYMTAB:
427 if (DEBUGPRINTING)
428 printf("LC_SYMTAB");
429 break;
430 case LC_DYSYMTAB:
431 if (DEBUGPRINTING)
432 printf("LC_DYSYMTAB");
433 break;
434 case LC_UUID:
435 if (DEBUGPRINTING)
436 printf("LC_UUID");
437 break;
438 case LC_UNIXTHREAD:
439 if (DEBUGPRINTING)
440 printf("LC_UNIXTHREAD");
441 break;
442 default:
443 if (DEBUGPRINTING)
444 printf("???");
445 fail("unexpected load command in Mach header");
446 break;
447 }
448 if (DEBUGPRINTING)
449 printf("\n");
450
451 /* Note what the stated initial RSP value is, so we can
452 check it is as expected. */
453 if (cmd->cmd == LC_UNIXTHREAD) {
454 struct thread_command* tcmd = (struct thread_command*)cmd;
455 UInt* w32s = (UInt*)( (UChar*)tcmd + sizeof(*tcmd) );
456 if (DEBUGPRINTING)
457 printf("UnixThread: flavor %u = ", w32s[0]);
458 if (w32s[0] == x86_THREAD_STATE64 && !have_rsp) {
459 if (DEBUGPRINTING)
460 printf("x86_THREAD_STATE64\n");
461 x86_thread_state64_t* state64
462 = (x86_thread_state64_t*)(&w32s[2]);
463 have_rsp = True;
464 init_rsp = state64->__rsp;
465 if (DEBUGPRINTING)
466 printf("rsp = 0x%llx\n", init_rsp);
467 } else {
468 if (DEBUGPRINTING)
469 printf("???");
470 }
471 if (DEBUGPRINTING)
472 printf("\n");
473 }
474
475 if (cmd->cmd == LC_SEGMENT_64) {
476 struct segment_command_64 *seg = (struct segment_command_64 *)cmd;
477 if (0 == strcmp(seg->segname, "__LINKEDIT"))
478 seg__linkedit = seg;
479 if (0 == strcmp(seg->segname, "__UNIXSTACK"))
480 seg__unixstack = seg;
481 if (0 == strcmp(seg->segname, "__PAGEZERO"))
482 seg__pagezero = seg;
483 }
484
485 }
486 }
487
488 /*
489 Actions are then as follows:
490
491 * (always) check the RSP value is as expected, and abort if not
492
493 * if there's a UNIXSTACK load command, check it is as expected.
494 If not abort, if yes, do nothing more.
495
496 * (so there's no UNIXSTACK load command). if there's a LINKEDIT
497 load command, check if it is minimally usable (has 0 for
498 nsects and flags). If yes, convert it to a UNIXSTACK load
499 command. If there is none, or is unusable, then we're out of
500 options and have to abort.
501 */
502 if (!have_rsp)
503 fail("Can't find / check initial RSP setting");
504 if (init_rsp != expected_stack_start + expected_stack_size)
505 fail("Initial RSP value not as expected");
506
507 fprintf(stderr, "fixup_macho_loadcmds: "
508 "initial RSP is as expected (0x%llx)\n",
509 expected_stack_start + expected_stack_size );
510
511 if (seg__unixstack) {
512 struct segment_command_64 *seg = seg__unixstack;
513 if (seg->vmaddr != expected_stack_start)
514 fail("has __UNIXSTACK, but wrong ::vmaddr");
515 if (seg->vmsize != expected_stack_size)
516 fail("has __UNIXSTACK, but wrong ::vmsize");
517 if (seg->maxprot != 7)
518 fail("has __UNIXSTACK, but wrong ::maxprot (should be 7)");
519 if (seg->initprot != 3)
520 fail("has __UNIXSTACK, but wrong ::initprot (should be 3)");
521 if (seg->nsects != 0)
522 fail("has __UNIXSTACK, but wrong ::nsects (should be 0)");
523 if (seg->flags != 0)
524 fail("has __UNIXSTACK, but wrong ::flags (should be 0)");
525 /* looks ok */
526 fprintf(stderr, "fixup_macho_loadcmds: "
527 "acceptable __UNIXSTACK present; no modifications.\n" );
528 goto maybe_mash_pagezero;
529 }
530
531 if (seg__linkedit) {
532 struct segment_command_64 *seg = seg__linkedit;
533 if (seg->nsects != 0)
534 fail("has __LINKEDIT, but wrong ::nsects (should be 0)");
535 if (seg->flags != 0)
536 fail("has __LINKEDIT, but wrong ::flags (should be 0)");
537 fprintf(stderr, "fixup_macho_loadcmds: "
538 "no __UNIXSTACK present.\n" );
539 fprintf(stderr, "fixup_macho_loadcmds: "
540 "converting __LINKEDIT to __UNIXSTACK.\n" );
541 strcpy(seg->segname, "__UNIXSTACK");
542 seg->vmaddr = expected_stack_start;
543 seg->vmsize = expected_stack_size;
544 seg->fileoff = 0;
545 seg->filesize = 0;
546 seg->maxprot = 7;
547 seg->initprot = 3;
548 /* success */
549 goto maybe_mash_pagezero;
550 }
551
552 /* out of options */
553 fail("no __UNIXSTACK found and no usable __LINKEDIT found; "
554 "out of options.");
555 /* NOTREACHED */
556
557 maybe_mash_pagezero:
558 /* Deal with Problem (2) as documented above. */
559 # if DARWIN_VERS >= DARWIN_10_10
560 assert(size == 64);
561 if (!seg__pagezero) {
562 fail("Can't find __PAGEZERO to modify; can't continue.");
563 }
564 fprintf(stderr, "fixup_macho_loadcmds: "
565 "changing __PAGEZERO.vmaddr from %p to 0x0.\n",
566 (void*)seg__pagezero->vmaddr);
567 seg__pagezero->vmaddr = 0;
568 # endif
569
570 out:
571 if (ii.img)
572 unmap_image(&ii);
573 }
574
575
is_plausible_tool_exe_name(HChar * nm)576 static Bool is_plausible_tool_exe_name ( HChar* nm )
577 {
578 HChar* p;
579 if (!nm)
580 return False;
581
582 // Does it end with this string?
583 p = strstr(nm, "-x86-darwin");
584 if (p && 0 == strcmp(p, "-x86-darwin"))
585 return True;
586
587 p = strstr(nm, "-amd64-darwin");
588 if (p && 0 == strcmp(p, "-amd64-darwin"))
589 return True;
590
591 return False;
592 }
593
594
main(int argc,char ** argv)595 int main ( int argc, char** argv )
596 {
597 Int r;
598 ULong req_stack_addr = 0;
599 ULong req_stack_size = 0;
600
601 if (argc != 4)
602 fail("args: -stack_addr-arg -stack_size-arg "
603 "name-of-tool-executable-to-modify");
604
605 r= sscanf(argv[1], "0x%llx", &req_stack_addr);
606 if (r != 1) fail("invalid stack_addr arg");
607
608 r= sscanf(argv[2], "0x%llx", &req_stack_size);
609 if (r != 1) fail("invalid stack_size arg");
610
611 fprintf(stderr, "fixup_macho_loadcmds: "
612 "requested stack_addr (top) 0x%llx, "
613 "stack_size 0x%llx\n", req_stack_addr, req_stack_size );
614
615 if (!is_plausible_tool_exe_name(argv[3]))
616 fail("implausible tool exe name -- not of the form *-{x86,amd64}-darwin");
617
618 fprintf(stderr, "fixup_macho_loadcmds: examining tool exe: %s\n",
619 argv[3] );
620 modify_macho_loadcmds( argv[3], req_stack_addr - req_stack_size,
621 req_stack_size );
622
623 return 0;
624 }
625
626 /*
627 cmd LC_SEGMENT_64
628 cmdsize 72
629 segname __LINKEDIT
630 vmaddr 0x0000000138dea000
631 vmsize 0x00000000000ad000
632 fileoff 2658304
633 filesize 705632
634 maxprot 0x00000007
635 initprot 0x00000001
636 nsects 0
637 flags 0x0
638 */
639
640 /*
641 cmd LC_SEGMENT_64
642 cmdsize 72
643 segname __UNIXSTACK
644 vmaddr 0x0000000133800000
645 vmsize 0x0000000000800000
646 fileoff 2498560
647 filesize 0
648 maxprot 0x00000007
649 initprot 0x00000003
650 nsects 0
651 flags 0x0
652 */
653