• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <fcntl.h>
2 #include <inttypes.h>
3 #include <mach-o/compact_unwind_encoding.h>
4 #include <mach-o/loader.h>
5 #include <mach-o/nlist.h>
6 #include <mach/machine.h>
7 #include <stdbool.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/errno.h>
13 #include <sys/mman.h>
14 #include <sys/stat.h>
15 #include <sys/types.h>
16 
17 enum {
18   UNWIND_ARM64_MODE_MASK = 0x0F000000,
19   UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
20   UNWIND_ARM64_MODE_DWARF = 0x03000000,
21   UNWIND_ARM64_MODE_FRAME = 0x04000000,
22 
23   UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
24   UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
25   UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
26   UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
27   UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
28   UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
29   UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
30   UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
31   UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800,
32 
33   UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK = 0x00FFF000,
34   UNWIND_ARM64_DWARF_SECTION_OFFSET = 0x00FFFFFF,
35 };
36 
37 enum {
38   UNWIND_ARM_MODE_MASK = 0x0F000000,
39   UNWIND_ARM_MODE_FRAME = 0x01000000,
40   UNWIND_ARM_MODE_FRAME_D = 0x02000000,
41   UNWIND_ARM_MODE_DWARF = 0x04000000,
42 
43   UNWIND_ARM_FRAME_STACK_ADJUST_MASK = 0x00C00000,
44 
45   UNWIND_ARM_FRAME_FIRST_PUSH_R4 = 0x00000001,
46   UNWIND_ARM_FRAME_FIRST_PUSH_R5 = 0x00000002,
47   UNWIND_ARM_FRAME_FIRST_PUSH_R6 = 0x00000004,
48 
49   UNWIND_ARM_FRAME_SECOND_PUSH_R8 = 0x00000008,
50   UNWIND_ARM_FRAME_SECOND_PUSH_R9 = 0x00000010,
51   UNWIND_ARM_FRAME_SECOND_PUSH_R10 = 0x00000020,
52   UNWIND_ARM_FRAME_SECOND_PUSH_R11 = 0x00000040,
53   UNWIND_ARM_FRAME_SECOND_PUSH_R12 = 0x00000080,
54 
55   UNWIND_ARM_FRAME_D_REG_COUNT_MASK = 0x00000700,
56 
57   UNWIND_ARM_DWARF_SECTION_OFFSET = 0x00FFFFFF,
58 };
59 
60 #define EXTRACT_BITS(value, mask)                                              \
61   ((value >> __builtin_ctz(mask)) & (((1 << __builtin_popcount(mask))) - 1))
62 
63 // A quick sketch of a program which can parse the compact unwind info
64 // used on Darwin systems for exception handling.  The output of
65 // unwinddump will be more authoritative/reliable but this program
66 // can dump at least the UNWIND_X86_64_MODE_RBP_FRAME format entries
67 // correctly.
68 
69 struct symbol {
70   uint64_t file_address;
71   const char *name;
72 };
73 
symbol_compare(const void * a,const void * b)74 int symbol_compare(const void *a, const void *b) {
75   return (int)((struct symbol *)a)->file_address -
76          ((struct symbol *)b)->file_address;
77 }
78 
79 struct baton {
80   cpu_type_t cputype;
81 
82   uint8_t *mach_header_start;    // pointer into this program's address space
83   uint8_t *compact_unwind_start; // pointer into this program's address space
84 
85   int addr_size; // 4 or 8 bytes, the size of addresses in this file
86 
87   uint64_t text_segment_vmaddr; // __TEXT segment vmaddr
88   uint64_t text_segment_file_offset;
89 
90   uint64_t text_section_vmaddr; // __TEXT,__text section vmaddr
91   uint64_t text_section_file_offset;
92 
93   uint64_t eh_section_file_address; // the file address of the __TEXT,__eh_frame
94                                     // section
95 
96   uint8_t
97       *lsda_array_start; // for the currently-being-processed first-level index
98   uint8_t
99       *lsda_array_end; // the lsda_array_start for the NEXT first-level index
100 
101   struct symbol *symbols;
102   int symbols_count;
103 
104   uint64_t *function_start_addresses;
105   int function_start_addresses_count;
106 
107   int current_index_table_number;
108 
109   struct unwind_info_section_header unwind_header;
110   struct unwind_info_section_header_index_entry first_level_index_entry;
111   struct unwind_info_compressed_second_level_page_header
112       compressed_second_level_page_header;
113   struct unwind_info_regular_second_level_page_header
114       regular_second_level_page_header;
115 };
116 
read_leb128(uint8_t ** offset)117 uint64_t read_leb128(uint8_t **offset) {
118   uint64_t result = 0;
119   int shift = 0;
120   while (1) {
121     uint8_t byte = **offset;
122     *offset = *offset + 1;
123     result |= (byte & 0x7f) << shift;
124     if ((byte & 0x80) == 0)
125       break;
126     shift += 7;
127   }
128 
129   return result;
130 }
131 
132 // step through the load commands in a thin mach-o binary,
133 // find the cputype and the start of the __TEXT,__unwind_info
134 // section, return a pointer to that section or NULL if not found.
135 
scan_macho_load_commands(struct baton * baton)136 static void scan_macho_load_commands(struct baton *baton) {
137   struct symtab_command symtab_cmd;
138   uint64_t linkedit_segment_vmaddr;
139   uint64_t linkedit_segment_file_offset;
140 
141   baton->compact_unwind_start = 0;
142 
143   uint32_t *magic = (uint32_t *)baton->mach_header_start;
144 
145   if (*magic != MH_MAGIC && *magic != MH_MAGIC_64) {
146     printf("Unexpected magic number 0x%x in header, exiting.", *magic);
147     exit(1);
148   }
149 
150   bool is_64bit = false;
151   if (*magic == MH_MAGIC_64)
152     is_64bit = true;
153 
154   uint8_t *offset = baton->mach_header_start;
155 
156   struct mach_header mh;
157   memcpy(&mh, offset, sizeof(struct mach_header));
158   if (is_64bit)
159     offset += sizeof(struct mach_header_64);
160   else
161     offset += sizeof(struct mach_header);
162 
163   if (is_64bit)
164     baton->addr_size = 8;
165   else
166     baton->addr_size = 4;
167 
168   baton->cputype = mh.cputype;
169 
170   uint8_t *start_of_load_commands = offset;
171 
172   uint32_t cur_cmd = 0;
173   while (cur_cmd < mh.ncmds &&
174          (offset - start_of_load_commands) < mh.sizeofcmds) {
175     struct load_command lc;
176     uint32_t *lc_cmd = (uint32_t *)offset;
177     uint32_t *lc_cmdsize = (uint32_t *)offset + 1;
178     uint8_t *start_of_this_load_cmd = offset;
179 
180     if (*lc_cmd == LC_SEGMENT || *lc_cmd == LC_SEGMENT_64) {
181       char segment_name[17];
182       segment_name[0] = '\0';
183       uint32_t nsects = 0;
184       uint64_t segment_offset = 0;
185       uint64_t segment_vmaddr = 0;
186 
187       if (*lc_cmd == LC_SEGMENT_64) {
188         struct segment_command_64 seg;
189         memcpy(&seg, offset, sizeof(struct segment_command_64));
190         memcpy(&segment_name, &seg.segname, 16);
191         segment_name[16] = '\0';
192         nsects = seg.nsects;
193         segment_offset = seg.fileoff;
194         segment_vmaddr = seg.vmaddr;
195         offset += sizeof(struct segment_command_64);
196         if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
197           printf("Segment '%s' is encrypted.\n", segment_name);
198         }
199       }
200 
201       if (*lc_cmd == LC_SEGMENT) {
202         struct segment_command seg;
203         memcpy(&seg, offset, sizeof(struct segment_command));
204         memcpy(&segment_name, &seg.segname, 16);
205         segment_name[16] = '\0';
206         nsects = seg.nsects;
207         segment_offset = seg.fileoff;
208         segment_vmaddr = seg.vmaddr;
209         offset += sizeof(struct segment_command);
210         if ((seg.flags & SG_PROTECTED_VERSION_1) == SG_PROTECTED_VERSION_1) {
211           printf("Segment '%s' is encrypted.\n", segment_name);
212         }
213       }
214 
215       if (nsects != 0 && strcmp(segment_name, "__TEXT") == 0) {
216         baton->text_segment_vmaddr = segment_vmaddr;
217         baton->text_segment_file_offset = segment_offset;
218 
219         uint32_t current_sect = 0;
220         while (current_sect < nsects &&
221                (offset - start_of_this_load_cmd) < *lc_cmdsize) {
222           char sect_name[17];
223           memcpy(&sect_name, offset, 16);
224           sect_name[16] = '\0';
225           if (strcmp(sect_name, "__unwind_info") == 0) {
226             if (is_64bit) {
227               struct section_64 sect;
228               memset(&sect, 0, sizeof(struct section_64));
229               memcpy(&sect, offset, sizeof(struct section_64));
230               baton->compact_unwind_start =
231                   baton->mach_header_start + sect.offset;
232             } else {
233               struct section sect;
234               memset(&sect, 0, sizeof(struct section));
235               memcpy(&sect, offset, sizeof(struct section));
236               baton->compact_unwind_start =
237                   baton->mach_header_start + sect.offset;
238             }
239           }
240           if (strcmp(sect_name, "__eh_frame") == 0) {
241             if (is_64bit) {
242               struct section_64 sect;
243               memset(&sect, 0, sizeof(struct section_64));
244               memcpy(&sect, offset, sizeof(struct section_64));
245               baton->eh_section_file_address = sect.addr;
246             } else {
247               struct section sect;
248               memset(&sect, 0, sizeof(struct section));
249               memcpy(&sect, offset, sizeof(struct section));
250               baton->eh_section_file_address = sect.addr;
251             }
252           }
253           if (strcmp(sect_name, "__text") == 0) {
254             if (is_64bit) {
255               struct section_64 sect;
256               memset(&sect, 0, sizeof(struct section_64));
257               memcpy(&sect, offset, sizeof(struct section_64));
258               baton->text_section_vmaddr = sect.addr;
259               baton->text_section_file_offset = sect.offset;
260             } else {
261               struct section sect;
262               memset(&sect, 0, sizeof(struct section));
263               memcpy(&sect, offset, sizeof(struct section));
264               baton->text_section_vmaddr = sect.addr;
265             }
266           }
267           if (is_64bit) {
268             offset += sizeof(struct section_64);
269           } else {
270             offset += sizeof(struct section);
271           }
272         }
273       }
274 
275       if (strcmp(segment_name, "__LINKEDIT") == 0) {
276         linkedit_segment_vmaddr = segment_vmaddr;
277         linkedit_segment_file_offset = segment_offset;
278       }
279     }
280 
281     if (*lc_cmd == LC_SYMTAB) {
282       memcpy(&symtab_cmd, offset, sizeof(struct symtab_command));
283     }
284 
285     if (*lc_cmd == LC_DYSYMTAB) {
286       struct dysymtab_command dysymtab_cmd;
287       memcpy(&dysymtab_cmd, offset, sizeof(struct dysymtab_command));
288 
289       int nlist_size = 12;
290       if (is_64bit)
291         nlist_size = 16;
292 
293       char *string_table =
294           (char *)(baton->mach_header_start + symtab_cmd.stroff);
295       uint8_t *local_syms = baton->mach_header_start + symtab_cmd.symoff +
296                             (dysymtab_cmd.ilocalsym * nlist_size);
297       int local_syms_count = dysymtab_cmd.nlocalsym;
298       uint8_t *exported_syms = baton->mach_header_start + symtab_cmd.symoff +
299                                (dysymtab_cmd.iextdefsym * nlist_size);
300       int exported_syms_count = dysymtab_cmd.nextdefsym;
301 
302       // We're only going to create records for a small number of these symbols
303       // but to
304       // simplify the memory management I'll allocate enough space to store all
305       // of them.
306       baton->symbols = (struct symbol *)malloc(
307           sizeof(struct symbol) * (local_syms_count + exported_syms_count));
308       baton->symbols_count = 0;
309 
310       for (int i = 0; i < local_syms_count; i++) {
311         struct nlist_64 nlist;
312         memset(&nlist, 0, sizeof(struct nlist_64));
313         if (is_64bit) {
314           memcpy(&nlist, local_syms + (i * nlist_size),
315                  sizeof(struct nlist_64));
316         } else {
317           struct nlist nlist_32;
318           memset(&nlist_32, 0, sizeof(struct nlist));
319           memcpy(&nlist_32, local_syms + (i * nlist_size),
320                  sizeof(struct nlist));
321           nlist.n_un.n_strx = nlist_32.n_un.n_strx;
322           nlist.n_type = nlist_32.n_type;
323           nlist.n_sect = nlist_32.n_sect;
324           nlist.n_desc = nlist_32.n_desc;
325           nlist.n_value = nlist_32.n_value;
326         }
327         if ((nlist.n_type & N_STAB) == 0 &&
328             ((nlist.n_type & N_EXT) == 1 ||
329              ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
330             nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
331           baton->symbols[baton->symbols_count].file_address = nlist.n_value;
332           if (baton->cputype == CPU_TYPE_ARM)
333             baton->symbols[baton->symbols_count].file_address =
334                 baton->symbols[baton->symbols_count].file_address & ~1;
335           baton->symbols[baton->symbols_count].name =
336               string_table + nlist.n_un.n_strx;
337           baton->symbols_count++;
338         }
339       }
340 
341       for (int i = 0; i < exported_syms_count; i++) {
342         struct nlist_64 nlist;
343         memset(&nlist, 0, sizeof(struct nlist_64));
344         if (is_64bit) {
345           memcpy(&nlist, exported_syms + (i * nlist_size),
346                  sizeof(struct nlist_64));
347         } else {
348           struct nlist nlist_32;
349           memcpy(&nlist_32, exported_syms + (i * nlist_size),
350                  sizeof(struct nlist));
351           nlist.n_un.n_strx = nlist_32.n_un.n_strx;
352           nlist.n_type = nlist_32.n_type;
353           nlist.n_sect = nlist_32.n_sect;
354           nlist.n_desc = nlist_32.n_desc;
355           nlist.n_value = nlist_32.n_value;
356         }
357         if ((nlist.n_type & N_STAB) == 0 &&
358             ((nlist.n_type & N_EXT) == 1 ||
359              ((nlist.n_type & N_TYPE) == N_TYPE && nlist.n_sect != NO_SECT)) &&
360             nlist.n_value != 0 && nlist.n_value != baton->text_segment_vmaddr) {
361           baton->symbols[baton->symbols_count].file_address = nlist.n_value;
362           if (baton->cputype == CPU_TYPE_ARM)
363             baton->symbols[baton->symbols_count].file_address =
364                 baton->symbols[baton->symbols_count].file_address & ~1;
365           baton->symbols[baton->symbols_count].name =
366               string_table + nlist.n_un.n_strx;
367           baton->symbols_count++;
368         }
369       }
370 
371       qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
372             symbol_compare);
373     }
374 
375     if (*lc_cmd == LC_FUNCTION_STARTS) {
376       struct linkedit_data_command function_starts_cmd;
377       memcpy(&function_starts_cmd, offset,
378              sizeof(struct linkedit_data_command));
379 
380       uint8_t *funcstarts_offset =
381           baton->mach_header_start + function_starts_cmd.dataoff;
382       uint8_t *function_end = funcstarts_offset + function_starts_cmd.datasize;
383       int count = 0;
384 
385       while (funcstarts_offset < function_end) {
386         if (read_leb128(&funcstarts_offset) != 0) {
387           count++;
388         }
389       }
390 
391       baton->function_start_addresses =
392           (uint64_t *)malloc(sizeof(uint64_t) * count);
393       baton->function_start_addresses_count = count;
394 
395       funcstarts_offset =
396           baton->mach_header_start + function_starts_cmd.dataoff;
397       uint64_t current_pc = baton->text_segment_vmaddr;
398       int i = 0;
399       while (funcstarts_offset < function_end) {
400         uint64_t func_start = read_leb128(&funcstarts_offset);
401         if (func_start != 0) {
402           current_pc += func_start;
403           baton->function_start_addresses[i++] = current_pc;
404         }
405       }
406     }
407 
408     offset = start_of_this_load_cmd + *lc_cmdsize;
409     cur_cmd++;
410   }
411 
412   // Augment the symbol table with the function starts table -- adding symbol
413   // entries
414   // for functions that were stripped.
415 
416   int unnamed_functions_to_add = 0;
417   for (int i = 0; i < baton->function_start_addresses_count; i++) {
418     struct symbol search_key;
419     search_key.file_address = baton->function_start_addresses[i];
420     if (baton->cputype == CPU_TYPE_ARM)
421       search_key.file_address = search_key.file_address & ~1;
422     struct symbol *sym =
423         bsearch(&search_key, baton->symbols, baton->symbols_count,
424                 sizeof(struct symbol), symbol_compare);
425     if (sym == NULL)
426       unnamed_functions_to_add++;
427   }
428 
429   baton->symbols = (struct symbol *)realloc(
430       baton->symbols, sizeof(struct symbol) *
431                           (baton->symbols_count + unnamed_functions_to_add));
432 
433   int current_unnamed_symbol = 1;
434   int number_symbols_added = 0;
435   for (int i = 0; i < baton->function_start_addresses_count; i++) {
436     struct symbol search_key;
437     search_key.file_address = baton->function_start_addresses[i];
438     if (baton->cputype == CPU_TYPE_ARM)
439       search_key.file_address = search_key.file_address & ~1;
440     struct symbol *sym =
441         bsearch(&search_key, baton->symbols, baton->symbols_count,
442                 sizeof(struct symbol), symbol_compare);
443     if (sym == NULL) {
444       char *name;
445       asprintf(&name, "unnamed function #%d", current_unnamed_symbol++);
446       baton->symbols[baton->symbols_count + number_symbols_added].file_address =
447           baton->function_start_addresses[i];
448       baton->symbols[baton->symbols_count + number_symbols_added].name = name;
449       number_symbols_added++;
450     }
451   }
452   baton->symbols_count += number_symbols_added;
453   qsort(baton->symbols, baton->symbols_count, sizeof(struct symbol),
454         symbol_compare);
455 
456   //    printf ("function start addresses\n");
457   //    for (int i = 0; i < baton->function_start_addresses_count; i++)
458   //    {
459   //        printf ("0x%012llx\n", baton->function_start_addresses[i]);
460   //    }
461 
462   //    printf ("symbol table names & addresses\n");
463   //    for (int i = 0; i < baton->symbols_count; i++)
464   //    {
465   //        printf ("0x%012llx %s\n", baton->symbols[i].file_address,
466   //        baton->symbols[i].name);
467   //    }
468 }
469 
print_encoding_x86_64(struct baton baton,uint8_t * function_start,uint32_t encoding)470 void print_encoding_x86_64(struct baton baton, uint8_t *function_start,
471                            uint32_t encoding) {
472   int mode = encoding & UNWIND_X86_64_MODE_MASK;
473   switch (mode) {
474   case UNWIND_X86_64_MODE_RBP_FRAME: {
475     printf("frame func: CFA is rbp+%d ", 16);
476     printf(" rip=[CFA-8] rbp=[CFA-16]");
477     uint32_t saved_registers_offset =
478         EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_OFFSET);
479 
480     uint32_t saved_registers_locations =
481         EXTRACT_BITS(encoding, UNWIND_X86_64_RBP_FRAME_REGISTERS);
482 
483     saved_registers_offset += 2;
484 
485     for (int i = 0; i < 5; i++) {
486       switch (saved_registers_locations & 0x7) {
487       case UNWIND_X86_64_REG_NONE:
488         break;
489       case UNWIND_X86_64_REG_RBX:
490         printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
491         break;
492       case UNWIND_X86_64_REG_R12:
493         printf(" r12=[CFA-%d]", saved_registers_offset * 8);
494         break;
495       case UNWIND_X86_64_REG_R13:
496         printf(" r13=[CFA-%d]", saved_registers_offset * 8);
497         break;
498       case UNWIND_X86_64_REG_R14:
499         printf(" r14=[CFA-%d]", saved_registers_offset * 8);
500         break;
501       case UNWIND_X86_64_REG_R15:
502         printf(" r15=[CFA-%d]", saved_registers_offset * 8);
503         break;
504       }
505       saved_registers_offset--;
506       saved_registers_locations >>= 3;
507     }
508   } break;
509 
510   case UNWIND_X86_64_MODE_STACK_IND:
511   case UNWIND_X86_64_MODE_STACK_IMMD: {
512     uint32_t stack_size =
513         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
514     uint32_t register_count =
515         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_COUNT);
516     uint32_t permutation =
517         EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_REG_PERMUTATION);
518 
519     if (mode == UNWIND_X86_64_MODE_STACK_IND && function_start) {
520       uint32_t stack_adjust =
521           EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_ADJUST);
522 
523       // offset into the function instructions; 0 == beginning of first
524       // instruction
525       uint32_t offset_to_subl_insn =
526           EXTRACT_BITS(encoding, UNWIND_X86_64_FRAMELESS_STACK_SIZE);
527 
528       stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
529 
530       stack_size += stack_adjust * 8;
531 
532       printf("large stack ");
533     }
534 
535     if (mode == UNWIND_X86_64_MODE_STACK_IND) {
536       printf("frameless function: stack size %d, register count %d ",
537              stack_size * 8, register_count);
538     } else {
539       printf("frameless function: stack size %d, register count %d ",
540              stack_size, register_count);
541     }
542 
543     if (register_count == 0) {
544       printf(" no registers saved");
545     } else {
546 
547       // We need to include (up to) 6 registers in 10 bits.
548       // That would be 18 bits if we just used 3 bits per reg to indicate
549       // the order they're saved on the stack.
550       //
551       // This is done with Lehmer code permutation, e.g. see
552       // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
553       int permunreg[6];
554 
555       // This decodes the variable-base number in the 10 bits
556       // and gives us the Lehmer code sequence which can then
557       // be decoded.
558 
559       switch (register_count) {
560       case 6:
561         permunreg[0] = permutation / 120; // 120 == 5!
562         permutation -= (permunreg[0] * 120);
563         permunreg[1] = permutation / 24; // 24 == 4!
564         permutation -= (permunreg[1] * 24);
565         permunreg[2] = permutation / 6; // 6 == 3!
566         permutation -= (permunreg[2] * 6);
567         permunreg[3] = permutation / 2; // 2 == 2!
568         permutation -= (permunreg[3] * 2);
569         permunreg[4] = permutation; // 1 == 1!
570         permunreg[5] = 0;
571         break;
572       case 5:
573         permunreg[0] = permutation / 120;
574         permutation -= (permunreg[0] * 120);
575         permunreg[1] = permutation / 24;
576         permutation -= (permunreg[1] * 24);
577         permunreg[2] = permutation / 6;
578         permutation -= (permunreg[2] * 6);
579         permunreg[3] = permutation / 2;
580         permutation -= (permunreg[3] * 2);
581         permunreg[4] = permutation;
582         break;
583       case 4:
584         permunreg[0] = permutation / 60;
585         permutation -= (permunreg[0] * 60);
586         permunreg[1] = permutation / 12;
587         permutation -= (permunreg[1] * 12);
588         permunreg[2] = permutation / 3;
589         permutation -= (permunreg[2] * 3);
590         permunreg[3] = permutation;
591         break;
592       case 3:
593         permunreg[0] = permutation / 20;
594         permutation -= (permunreg[0] * 20);
595         permunreg[1] = permutation / 4;
596         permutation -= (permunreg[1] * 4);
597         permunreg[2] = permutation;
598         break;
599       case 2:
600         permunreg[0] = permutation / 5;
601         permutation -= (permunreg[0] * 5);
602         permunreg[1] = permutation;
603         break;
604       case 1:
605         permunreg[0] = permutation;
606         break;
607       }
608 
609       // Decode the Lehmer code for this permutation of
610       // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
611 
612       int registers[6];
613       bool used[7] = {false, false, false, false, false, false, false};
614       for (int i = 0; i < register_count; i++) {
615         int renum = 0;
616         for (int j = 1; j < 7; j++) {
617           if (used[j] == false) {
618             if (renum == permunreg[i]) {
619               registers[i] = j;
620               used[j] = true;
621               break;
622             }
623             renum++;
624           }
625         }
626       }
627 
628       if (mode == UNWIND_X86_64_MODE_STACK_IND) {
629         printf(" CFA is rsp+%d ", stack_size);
630       } else {
631         printf(" CFA is rsp+%d ", stack_size * 8);
632       }
633 
634       uint32_t saved_registers_offset = 1;
635       printf(" rip=[CFA-%d]", saved_registers_offset * 8);
636       saved_registers_offset++;
637 
638       for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
639         switch (registers[i]) {
640         case UNWIND_X86_64_REG_NONE:
641           break;
642         case UNWIND_X86_64_REG_RBX:
643           printf(" rbx=[CFA-%d]", saved_registers_offset * 8);
644           saved_registers_offset++;
645           break;
646         case UNWIND_X86_64_REG_R12:
647           printf(" r12=[CFA-%d]", saved_registers_offset * 8);
648           saved_registers_offset++;
649           break;
650         case UNWIND_X86_64_REG_R13:
651           printf(" r13=[CFA-%d]", saved_registers_offset * 8);
652           saved_registers_offset++;
653           break;
654         case UNWIND_X86_64_REG_R14:
655           printf(" r14=[CFA-%d]", saved_registers_offset * 8);
656           saved_registers_offset++;
657           break;
658         case UNWIND_X86_64_REG_R15:
659           printf(" r15=[CFA-%d]", saved_registers_offset * 8);
660           saved_registers_offset++;
661           break;
662         case UNWIND_X86_64_REG_RBP:
663           printf(" rbp=[CFA-%d]", saved_registers_offset * 8);
664           saved_registers_offset++;
665           break;
666         }
667       }
668     }
669 
670   } break;
671 
672   case UNWIND_X86_64_MODE_DWARF: {
673     uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
674     printf(
675         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
676         ")",
677         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
678   } break;
679 
680   case 0: {
681     printf(" no unwind information");
682   } break;
683   }
684 }
685 
print_encoding_i386(struct baton baton,uint8_t * function_start,uint32_t encoding)686 void print_encoding_i386(struct baton baton, uint8_t *function_start,
687                          uint32_t encoding) {
688   int mode = encoding & UNWIND_X86_MODE_MASK;
689   switch (mode) {
690   case UNWIND_X86_MODE_EBP_FRAME: {
691     printf("frame func: CFA is ebp+%d ", 8);
692     printf(" eip=[CFA-4] ebp=[CFA-8]");
693     uint32_t saved_registers_offset =
694         EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_OFFSET);
695 
696     uint32_t saved_registers_locations =
697         EXTRACT_BITS(encoding, UNWIND_X86_EBP_FRAME_REGISTERS);
698 
699     saved_registers_offset += 2;
700 
701     for (int i = 0; i < 5; i++) {
702       switch (saved_registers_locations & 0x7) {
703       case UNWIND_X86_REG_NONE:
704         break;
705       case UNWIND_X86_REG_EBX:
706         printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
707         break;
708       case UNWIND_X86_REG_ECX:
709         printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
710         break;
711       case UNWIND_X86_REG_EDX:
712         printf(" edx=[CFA-%d]", saved_registers_offset * 4);
713         break;
714       case UNWIND_X86_REG_EDI:
715         printf(" edi=[CFA-%d]", saved_registers_offset * 4);
716         break;
717       case UNWIND_X86_REG_ESI:
718         printf(" esi=[CFA-%d]", saved_registers_offset * 4);
719         break;
720       }
721       saved_registers_offset--;
722       saved_registers_locations >>= 3;
723     }
724   } break;
725 
726   case UNWIND_X86_MODE_STACK_IND:
727   case UNWIND_X86_MODE_STACK_IMMD: {
728     uint32_t stack_size =
729         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
730     uint32_t register_count =
731         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_COUNT);
732     uint32_t permutation =
733         EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_REG_PERMUTATION);
734 
735     if (mode == UNWIND_X86_MODE_STACK_IND && function_start) {
736       uint32_t stack_adjust =
737           EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_ADJUST);
738 
739       // offset into the function instructions; 0 == beginning of first
740       // instruction
741       uint32_t offset_to_subl_insn =
742           EXTRACT_BITS(encoding, UNWIND_X86_FRAMELESS_STACK_SIZE);
743 
744       stack_size = *((uint32_t *)(function_start + offset_to_subl_insn));
745 
746       stack_size += stack_adjust * 4;
747 
748       printf("large stack ");
749     }
750 
751     if (mode == UNWIND_X86_MODE_STACK_IND) {
752       printf("frameless function: stack size %d, register count %d ",
753              stack_size, register_count);
754     } else {
755       printf("frameless function: stack size %d, register count %d ",
756              stack_size * 4, register_count);
757     }
758 
759     if (register_count == 0) {
760       printf(" no registers saved");
761     } else {
762 
763       // We need to include (up to) 6 registers in 10 bits.
764       // That would be 18 bits if we just used 3 bits per reg to indicate
765       // the order they're saved on the stack.
766       //
767       // This is done with Lehmer code permutation, e.g. see
768       // http://stackoverflow.com/questions/1506078/fast-permutation-number-permutation-mapping-algorithms
769       int permunreg[6];
770 
771       // This decodes the variable-base number in the 10 bits
772       // and gives us the Lehmer code sequence which can then
773       // be decoded.
774 
775       switch (register_count) {
776       case 6:
777         permunreg[0] = permutation / 120; // 120 == 5!
778         permutation -= (permunreg[0] * 120);
779         permunreg[1] = permutation / 24; // 24 == 4!
780         permutation -= (permunreg[1] * 24);
781         permunreg[2] = permutation / 6; // 6 == 3!
782         permutation -= (permunreg[2] * 6);
783         permunreg[3] = permutation / 2; // 2 == 2!
784         permutation -= (permunreg[3] * 2);
785         permunreg[4] = permutation; // 1 == 1!
786         permunreg[5] = 0;
787         break;
788       case 5:
789         permunreg[0] = permutation / 120;
790         permutation -= (permunreg[0] * 120);
791         permunreg[1] = permutation / 24;
792         permutation -= (permunreg[1] * 24);
793         permunreg[2] = permutation / 6;
794         permutation -= (permunreg[2] * 6);
795         permunreg[3] = permutation / 2;
796         permutation -= (permunreg[3] * 2);
797         permunreg[4] = permutation;
798         break;
799       case 4:
800         permunreg[0] = permutation / 60;
801         permutation -= (permunreg[0] * 60);
802         permunreg[1] = permutation / 12;
803         permutation -= (permunreg[1] * 12);
804         permunreg[2] = permutation / 3;
805         permutation -= (permunreg[2] * 3);
806         permunreg[3] = permutation;
807         break;
808       case 3:
809         permunreg[0] = permutation / 20;
810         permutation -= (permunreg[0] * 20);
811         permunreg[1] = permutation / 4;
812         permutation -= (permunreg[1] * 4);
813         permunreg[2] = permutation;
814         break;
815       case 2:
816         permunreg[0] = permutation / 5;
817         permutation -= (permunreg[0] * 5);
818         permunreg[1] = permutation;
819         break;
820       case 1:
821         permunreg[0] = permutation;
822         break;
823       }
824 
825       // Decode the Lehmer code for this permutation of
826       // the registers v. http://en.wikipedia.org/wiki/Lehmer_code
827 
828       int registers[6];
829       bool used[7] = {false, false, false, false, false, false, false};
830       for (int i = 0; i < register_count; i++) {
831         int renum = 0;
832         for (int j = 1; j < 7; j++) {
833           if (used[j] == false) {
834             if (renum == permunreg[i]) {
835               registers[i] = j;
836               used[j] = true;
837               break;
838             }
839             renum++;
840           }
841         }
842       }
843 
844       if (mode == UNWIND_X86_MODE_STACK_IND) {
845         printf(" CFA is esp+%d ", stack_size);
846       } else {
847         printf(" CFA is esp+%d ", stack_size * 4);
848       }
849 
850       uint32_t saved_registers_offset = 1;
851       printf(" eip=[CFA-%d]", saved_registers_offset * 4);
852       saved_registers_offset++;
853 
854       for (int i = (sizeof(registers) / sizeof(int)) - 1; i >= 0; i--) {
855         switch (registers[i]) {
856         case UNWIND_X86_REG_NONE:
857           break;
858         case UNWIND_X86_REG_EBX:
859           printf(" ebx=[CFA-%d]", saved_registers_offset * 4);
860           saved_registers_offset++;
861           break;
862         case UNWIND_X86_REG_ECX:
863           printf(" ecx=[CFA-%d]", saved_registers_offset * 4);
864           saved_registers_offset++;
865           break;
866         case UNWIND_X86_REG_EDX:
867           printf(" edx=[CFA-%d]", saved_registers_offset * 4);
868           saved_registers_offset++;
869           break;
870         case UNWIND_X86_REG_EDI:
871           printf(" edi=[CFA-%d]", saved_registers_offset * 4);
872           saved_registers_offset++;
873           break;
874         case UNWIND_X86_REG_ESI:
875           printf(" esi=[CFA-%d]", saved_registers_offset * 4);
876           saved_registers_offset++;
877           break;
878         case UNWIND_X86_REG_EBP:
879           printf(" ebp=[CFA-%d]", saved_registers_offset * 4);
880           saved_registers_offset++;
881           break;
882         }
883       }
884     }
885 
886   } break;
887 
888   case UNWIND_X86_MODE_DWARF: {
889     uint32_t dwarf_offset = encoding & UNWIND_X86_DWARF_SECTION_OFFSET;
890     printf(
891         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
892         ")",
893         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
894   } break;
895 
896   case 0: {
897     printf(" no unwind information");
898   } break;
899   }
900 }
901 
print_encoding_arm64(struct baton baton,uint8_t * function_start,uint32_t encoding)902 void print_encoding_arm64(struct baton baton, uint8_t *function_start,
903                           uint32_t encoding) {
904   const int wordsize = 8;
905   int mode = encoding & UNWIND_ARM64_MODE_MASK;
906   switch (mode) {
907   case UNWIND_ARM64_MODE_FRAME: {
908     printf("frame func: CFA is fp+%d ", 16);
909     printf(" pc=[CFA-8] fp=[CFA-16]");
910     int reg_pairs_saved_count = 1;
911     uint32_t saved_register_bits = encoding & 0xfff;
912     if (saved_register_bits & UNWIND_ARM64_FRAME_X19_X20_PAIR) {
913       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
914       cfa_offset -= wordsize;
915       printf(" x19=[CFA%d]", cfa_offset);
916       cfa_offset -= wordsize;
917       printf(" x20=[CFA%d]", cfa_offset);
918       reg_pairs_saved_count++;
919     }
920     if (saved_register_bits & UNWIND_ARM64_FRAME_X21_X22_PAIR) {
921       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
922       cfa_offset -= wordsize;
923       printf(" x21=[CFA%d]", cfa_offset);
924       cfa_offset -= wordsize;
925       printf(" x22=[CFA%d]", cfa_offset);
926       reg_pairs_saved_count++;
927     }
928     if (saved_register_bits & UNWIND_ARM64_FRAME_X23_X24_PAIR) {
929       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
930       cfa_offset -= wordsize;
931       printf(" x23=[CFA%d]", cfa_offset);
932       cfa_offset -= wordsize;
933       printf(" x24=[CFA%d]", cfa_offset);
934       reg_pairs_saved_count++;
935     }
936     if (saved_register_bits & UNWIND_ARM64_FRAME_X25_X26_PAIR) {
937       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
938       cfa_offset -= wordsize;
939       printf(" x25=[CFA%d]", cfa_offset);
940       cfa_offset -= wordsize;
941       printf(" x26=[CFA%d]", cfa_offset);
942       reg_pairs_saved_count++;
943     }
944     if (saved_register_bits & UNWIND_ARM64_FRAME_X27_X28_PAIR) {
945       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
946       cfa_offset -= wordsize;
947       printf(" x27=[CFA%d]", cfa_offset);
948       cfa_offset -= wordsize;
949       printf(" x28=[CFA%d]", cfa_offset);
950       reg_pairs_saved_count++;
951     }
952     if (saved_register_bits & UNWIND_ARM64_FRAME_D8_D9_PAIR) {
953       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
954       cfa_offset -= wordsize;
955       printf(" d8=[CFA%d]", cfa_offset);
956       cfa_offset -= wordsize;
957       printf(" d9=[CFA%d]", cfa_offset);
958       reg_pairs_saved_count++;
959     }
960     if (saved_register_bits & UNWIND_ARM64_FRAME_D10_D11_PAIR) {
961       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
962       cfa_offset -= wordsize;
963       printf(" d10=[CFA%d]", cfa_offset);
964       cfa_offset -= wordsize;
965       printf(" d11=[CFA%d]", cfa_offset);
966       reg_pairs_saved_count++;
967     }
968     if (saved_register_bits & UNWIND_ARM64_FRAME_D12_D13_PAIR) {
969       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
970       cfa_offset -= wordsize;
971       printf(" d12=[CFA%d]", cfa_offset);
972       cfa_offset -= wordsize;
973       printf(" d13=[CFA%d]", cfa_offset);
974       reg_pairs_saved_count++;
975     }
976     if (saved_register_bits & UNWIND_ARM64_FRAME_D14_D15_PAIR) {
977       int cfa_offset = reg_pairs_saved_count * -2 * wordsize;
978       cfa_offset -= wordsize;
979       printf(" d14=[CFA%d]", cfa_offset);
980       cfa_offset -= wordsize;
981       printf(" d15=[CFA%d]", cfa_offset);
982       reg_pairs_saved_count++;
983     }
984 
985   } break;
986 
987   case UNWIND_ARM64_MODE_FRAMELESS: {
988     uint32_t stack_size = encoding & UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK;
989     printf("frameless function: stack size %d ", stack_size * 16);
990 
991   } break;
992 
993   case UNWIND_ARM64_MODE_DWARF: {
994     uint32_t dwarf_offset = encoding & UNWIND_ARM64_DWARF_SECTION_OFFSET;
995     printf(
996         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
997         ")",
998         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
999   } break;
1000 
1001   case 0: {
1002     printf(" no unwind information");
1003   } break;
1004   }
1005 }
1006 
print_encoding_armv7(struct baton baton,uint8_t * function_start,uint32_t encoding)1007 void print_encoding_armv7(struct baton baton, uint8_t *function_start,
1008                           uint32_t encoding) {
1009   const int wordsize = 4;
1010   int mode = encoding & UNWIND_ARM_MODE_MASK;
1011   switch (mode) {
1012   case UNWIND_ARM_MODE_FRAME_D:
1013   case UNWIND_ARM_MODE_FRAME: {
1014     int stack_adjust =
1015         EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_STACK_ADJUST_MASK) * wordsize;
1016 
1017     printf("frame func: CFA is fp+%d ", (2 * wordsize) + stack_adjust);
1018     int cfa_offset = -stack_adjust;
1019 
1020     cfa_offset -= wordsize;
1021     printf(" pc=[CFA%d]", cfa_offset);
1022     cfa_offset -= wordsize;
1023     printf(" fp=[CFA%d]", cfa_offset);
1024 
1025     uint32_t saved_register_bits = encoding & 0xff;
1026     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R6) {
1027       cfa_offset -= wordsize;
1028       printf(" r6=[CFA%d]", cfa_offset);
1029     }
1030     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R5) {
1031       cfa_offset -= wordsize;
1032       printf(" r5=[CFA%d]", cfa_offset);
1033     }
1034     if (saved_register_bits & UNWIND_ARM_FRAME_FIRST_PUSH_R4) {
1035       cfa_offset -= wordsize;
1036       printf(" r4=[CFA%d]", cfa_offset);
1037     }
1038     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R12) {
1039       cfa_offset -= wordsize;
1040       printf(" r12=[CFA%d]", cfa_offset);
1041     }
1042     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R11) {
1043       cfa_offset -= wordsize;
1044       printf(" r11=[CFA%d]", cfa_offset);
1045     }
1046     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R10) {
1047       cfa_offset -= wordsize;
1048       printf(" r10=[CFA%d]", cfa_offset);
1049     }
1050     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R9) {
1051       cfa_offset -= wordsize;
1052       printf(" r9=[CFA%d]", cfa_offset);
1053     }
1054     if (saved_register_bits & UNWIND_ARM_FRAME_SECOND_PUSH_R8) {
1055       cfa_offset -= wordsize;
1056       printf(" r8=[CFA%d]", cfa_offset);
1057     }
1058 
1059     if (mode == UNWIND_ARM_MODE_FRAME_D) {
1060       uint32_t d_reg_bits =
1061           EXTRACT_BITS(encoding, UNWIND_ARM_FRAME_D_REG_COUNT_MASK);
1062       switch (d_reg_bits) {
1063       case 0:
1064         // vpush {d8}
1065         cfa_offset -= 8;
1066         printf(" d8=[CFA%d]", cfa_offset);
1067         break;
1068       case 1:
1069         // vpush {d10}
1070         // vpush {d8}
1071         cfa_offset -= 8;
1072         printf(" d10=[CFA%d]", cfa_offset);
1073         cfa_offset -= 8;
1074         printf(" d8=[CFA%d]", cfa_offset);
1075         break;
1076       case 2:
1077         // vpush {d12}
1078         // vpush {d10}
1079         // vpush {d8}
1080         cfa_offset -= 8;
1081         printf(" d12=[CFA%d]", cfa_offset);
1082         cfa_offset -= 8;
1083         printf(" d10=[CFA%d]", cfa_offset);
1084         cfa_offset -= 8;
1085         printf(" d8=[CFA%d]", cfa_offset);
1086         break;
1087       case 3:
1088         // vpush {d14}
1089         // vpush {d12}
1090         // vpush {d10}
1091         // vpush {d8}
1092         cfa_offset -= 8;
1093         printf(" d14=[CFA%d]", cfa_offset);
1094         cfa_offset -= 8;
1095         printf(" d12=[CFA%d]", cfa_offset);
1096         cfa_offset -= 8;
1097         printf(" d10=[CFA%d]", cfa_offset);
1098         cfa_offset -= 8;
1099         printf(" d8=[CFA%d]", cfa_offset);
1100         break;
1101       case 4:
1102         // vpush {d14}
1103         // vpush {d12}
1104         // sp = (sp - 24) & (-16);
1105         // vst   {d8, d9, d10}
1106         printf(" d14, d12, d10, d9, d8");
1107         break;
1108       case 5:
1109         // vpush {d14}
1110         // sp = (sp - 40) & (-16);
1111         // vst   {d8, d9, d10, d11}
1112         // vst   {d12}
1113         printf(" d14, d11, d10, d9, d8, d12");
1114         break;
1115       case 6:
1116         // sp = (sp - 56) & (-16);
1117         // vst   {d8, d9, d10, d11}
1118         // vst   {d12, d13, d14}
1119         printf(" d11, d10, d9, d8, d14, d13, d12");
1120         break;
1121       case 7:
1122         // sp = (sp - 64) & (-16);
1123         // vst   {d8, d9, d10, d11}
1124         // vst   {d12, d13, d14, d15}
1125         printf(" d11, d10, d9, d8, d15, d14, d13, d12");
1126         break;
1127       }
1128     }
1129   } break;
1130 
1131   case UNWIND_ARM_MODE_DWARF: {
1132     uint32_t dwarf_offset = encoding & UNWIND_ARM_DWARF_SECTION_OFFSET;
1133     printf(
1134         "DWARF unwind instructions: FDE at offset %d (file address 0x%" PRIx64
1135         ")",
1136         dwarf_offset, dwarf_offset + baton.eh_section_file_address);
1137   } break;
1138 
1139   case 0: {
1140     printf(" no unwind information");
1141   } break;
1142   }
1143 }
1144 
print_encoding(struct baton baton,uint8_t * function_start,uint32_t encoding)1145 void print_encoding(struct baton baton, uint8_t *function_start,
1146                     uint32_t encoding) {
1147 
1148   if (baton.cputype == CPU_TYPE_X86_64) {
1149     print_encoding_x86_64(baton, function_start, encoding);
1150   } else if (baton.cputype == CPU_TYPE_I386) {
1151     print_encoding_i386(baton, function_start, encoding);
1152   } else if (baton.cputype == CPU_TYPE_ARM64 || baton.cputype == CPU_TYPE_ARM64_32) {
1153     print_encoding_arm64(baton, function_start, encoding);
1154   } else if (baton.cputype == CPU_TYPE_ARM) {
1155     print_encoding_armv7(baton, function_start, encoding);
1156   } else {
1157     printf(" -- unsupported encoding arch -- ");
1158   }
1159 }
1160 
print_function_encoding(struct baton baton,uint32_t idx,uint32_t encoding,uint32_t entry_encoding_index,uint32_t entry_func_offset)1161 void print_function_encoding(struct baton baton, uint32_t idx,
1162                              uint32_t encoding, uint32_t entry_encoding_index,
1163                              uint32_t entry_func_offset) {
1164 
1165   char *entry_encoding_index_str = "";
1166   if (entry_encoding_index != (uint32_t)-1) {
1167     asprintf(&entry_encoding_index_str, ", encoding #%d", entry_encoding_index);
1168   } else {
1169     asprintf(&entry_encoding_index_str, "");
1170   }
1171 
1172   uint64_t file_address = baton.first_level_index_entry.functionOffset +
1173                           entry_func_offset + baton.text_segment_vmaddr;
1174 
1175   if (baton.cputype == CPU_TYPE_ARM)
1176     file_address = file_address & ~1;
1177 
1178   printf(
1179       "    func [%d] offset %d (file addr 0x%" PRIx64 ")%s, encoding is 0x%x",
1180       idx, entry_func_offset, file_address, entry_encoding_index_str, encoding);
1181 
1182   struct symbol *symbol = NULL;
1183   for (int i = 0; i < baton.symbols_count; i++) {
1184     if (i == baton.symbols_count - 1 &&
1185         baton.symbols[i].file_address <= file_address) {
1186       symbol = &(baton.symbols[i]);
1187       break;
1188     } else {
1189       if (baton.symbols[i].file_address <= file_address &&
1190           baton.symbols[i + 1].file_address > file_address) {
1191         symbol = &(baton.symbols[i]);
1192         break;
1193       }
1194     }
1195   }
1196 
1197   printf("\n         ");
1198   if (symbol) {
1199     int offset = file_address - symbol->file_address;
1200 
1201     // FIXME this is a poor heuristic - if we're greater than 16 bytes past the
1202     // start of the function, this is the unwind info for a stripped function.
1203     // In reality the compact unwind entry may not line up exactly with the
1204     // function bounds.
1205     if (offset >= 0) {
1206       printf("name: %s", symbol->name);
1207       if (offset > 0) {
1208         printf(" + %d", offset);
1209       }
1210     }
1211     printf("\n         ");
1212   }
1213 
1214   print_encoding(baton, baton.mach_header_start +
1215                             baton.first_level_index_entry.functionOffset +
1216                             baton.text_section_file_offset + entry_func_offset,
1217                  encoding);
1218 
1219   bool has_lsda = encoding & UNWIND_HAS_LSDA;
1220 
1221   if (has_lsda) {
1222     uint32_t func_offset =
1223         entry_func_offset + baton.first_level_index_entry.functionOffset;
1224 
1225     int lsda_entry_number = -1;
1226 
1227     uint32_t low = 0;
1228     uint32_t high = (baton.lsda_array_end - baton.lsda_array_start) /
1229                     sizeof(struct unwind_info_section_header_lsda_index_entry);
1230 
1231     while (low < high) {
1232       uint32_t mid = (low + high) / 2;
1233 
1234       uint8_t *mid_lsda_entry_addr =
1235           (baton.lsda_array_start +
1236            (mid * sizeof(struct unwind_info_section_header_lsda_index_entry)));
1237       struct unwind_info_section_header_lsda_index_entry mid_lsda_entry;
1238       memcpy(&mid_lsda_entry, mid_lsda_entry_addr,
1239              sizeof(struct unwind_info_section_header_lsda_index_entry));
1240       if (mid_lsda_entry.functionOffset == func_offset) {
1241         lsda_entry_number =
1242             (mid_lsda_entry_addr - baton.lsda_array_start) /
1243             sizeof(struct unwind_info_section_header_lsda_index_entry);
1244         break;
1245       } else if (mid_lsda_entry.functionOffset < func_offset) {
1246         low = mid + 1;
1247       } else {
1248         high = mid;
1249       }
1250     }
1251 
1252     if (lsda_entry_number != -1) {
1253       printf(", LSDA entry #%d", lsda_entry_number);
1254     } else {
1255       printf(", LSDA entry not found");
1256     }
1257   }
1258 
1259   uint32_t pers_idx = EXTRACT_BITS(encoding, UNWIND_PERSONALITY_MASK);
1260   if (pers_idx != 0) {
1261     pers_idx--; // Change 1-based to 0-based index
1262     printf(", personality entry #%d", pers_idx);
1263   }
1264 
1265   printf("\n");
1266 }
1267 
print_second_level_index_regular(struct baton baton)1268 void print_second_level_index_regular(struct baton baton) {
1269   uint8_t *page_entries =
1270       baton.compact_unwind_start +
1271       baton.first_level_index_entry.secondLevelPagesSectionOffset +
1272       baton.regular_second_level_page_header.entryPageOffset;
1273   uint32_t entries_count = baton.regular_second_level_page_header.entryCount;
1274 
1275   uint8_t *offset = page_entries;
1276 
1277   uint32_t idx = 0;
1278   while (idx < entries_count) {
1279     uint32_t func_offset = *((uint32_t *)(offset));
1280     uint32_t encoding = *((uint32_t *)(offset + 4));
1281 
1282     // UNWIND_SECOND_LEVEL_REGULAR entries have a funcOffset which includes the
1283     // functionOffset from the containing index table already.
1284     // UNWIND_SECOND_LEVEL_COMPRESSED
1285     // entries only have the offset from the containing index table
1286     // functionOffset.
1287     // So strip off the containing index table functionOffset value here so they
1288     // can
1289     // be treated the same at the lower layers.
1290 
1291     print_function_encoding(baton, idx, encoding, (uint32_t)-1,
1292                             func_offset -
1293                                 baton.first_level_index_entry.functionOffset);
1294     idx++;
1295     offset += 8;
1296   }
1297 }
1298 
print_second_level_index_compressed(struct baton baton)1299 void print_second_level_index_compressed(struct baton baton) {
1300   uint8_t *this_index =
1301       baton.compact_unwind_start +
1302       baton.first_level_index_entry.secondLevelPagesSectionOffset;
1303   uint8_t *start_of_entries =
1304       this_index + baton.compressed_second_level_page_header.entryPageOffset;
1305   uint8_t *offset = start_of_entries;
1306   for (uint16_t idx = 0;
1307        idx < baton.compressed_second_level_page_header.entryCount; idx++) {
1308     uint32_t entry = *((uint32_t *)offset);
1309     offset += 4;
1310     uint32_t encoding;
1311 
1312     uint32_t entry_encoding_index =
1313         UNWIND_INFO_COMPRESSED_ENTRY_ENCODING_INDEX(entry);
1314     uint32_t entry_func_offset =
1315         UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(entry);
1316 
1317     if (entry_encoding_index < baton.unwind_header.commonEncodingsArrayCount) {
1318       // encoding is in common table in section header
1319       encoding =
1320           *((uint32_t *)(baton.compact_unwind_start +
1321                          baton.unwind_header.commonEncodingsArraySectionOffset +
1322                          (entry_encoding_index * sizeof(uint32_t))));
1323     } else {
1324       // encoding is in page specific table
1325       uint32_t page_encoding_index =
1326           entry_encoding_index - baton.unwind_header.commonEncodingsArrayCount;
1327       encoding = *((uint32_t *)(this_index +
1328                                 baton.compressed_second_level_page_header
1329                                     .encodingsPageOffset +
1330                                 (page_encoding_index * sizeof(uint32_t))));
1331     }
1332 
1333     print_function_encoding(baton, idx, encoding, entry_encoding_index,
1334                             entry_func_offset);
1335   }
1336 }
1337 
print_second_level_index(struct baton baton)1338 void print_second_level_index(struct baton baton) {
1339   uint8_t *index_start =
1340       baton.compact_unwind_start +
1341       baton.first_level_index_entry.secondLevelPagesSectionOffset;
1342 
1343   if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_REGULAR) {
1344     struct unwind_info_regular_second_level_page_header header;
1345     memcpy(&header, index_start,
1346            sizeof(struct unwind_info_regular_second_level_page_header));
1347     printf(
1348         "  UNWIND_SECOND_LEVEL_REGULAR #%d entryPageOffset %d, entryCount %d\n",
1349         baton.current_index_table_number, header.entryPageOffset,
1350         header.entryCount);
1351     baton.regular_second_level_page_header = header;
1352     print_second_level_index_regular(baton);
1353   }
1354 
1355   if ((*(uint32_t *)index_start) == UNWIND_SECOND_LEVEL_COMPRESSED) {
1356     struct unwind_info_compressed_second_level_page_header header;
1357     memcpy(&header, index_start,
1358            sizeof(struct unwind_info_compressed_second_level_page_header));
1359     printf("  UNWIND_SECOND_LEVEL_COMPRESSED #%d entryPageOffset %d, "
1360            "entryCount %d, encodingsPageOffset %d, encodingsCount %d\n",
1361            baton.current_index_table_number, header.entryPageOffset,
1362            header.entryCount, header.encodingsPageOffset,
1363            header.encodingsCount);
1364     baton.compressed_second_level_page_header = header;
1365     print_second_level_index_compressed(baton);
1366   }
1367 }
1368 
print_index_sections(struct baton baton)1369 void print_index_sections(struct baton baton) {
1370   uint8_t *index_section_offset =
1371       baton.compact_unwind_start + baton.unwind_header.indexSectionOffset;
1372   uint32_t index_count = baton.unwind_header.indexCount;
1373 
1374   uint32_t cur_idx = 0;
1375 
1376   uint8_t *offset = index_section_offset;
1377   while (cur_idx < index_count) {
1378     baton.current_index_table_number = cur_idx;
1379     struct unwind_info_section_header_index_entry index_entry;
1380     memcpy(&index_entry, offset,
1381            sizeof(struct unwind_info_section_header_index_entry));
1382     printf("index section #%d: functionOffset %d, "
1383            "secondLevelPagesSectionOffset %d, lsdaIndexArraySectionOffset %d\n",
1384            cur_idx, index_entry.functionOffset,
1385            index_entry.secondLevelPagesSectionOffset,
1386            index_entry.lsdaIndexArraySectionOffset);
1387 
1388     // secondLevelPagesSectionOffset == 0 means this is a sentinel entry
1389     if (index_entry.secondLevelPagesSectionOffset != 0) {
1390       struct unwind_info_section_header_index_entry next_index_entry;
1391       memcpy(&next_index_entry,
1392              offset + sizeof(struct unwind_info_section_header_index_entry),
1393              sizeof(struct unwind_info_section_header_index_entry));
1394 
1395       baton.lsda_array_start =
1396           baton.compact_unwind_start + index_entry.lsdaIndexArraySectionOffset;
1397       baton.lsda_array_end = baton.compact_unwind_start +
1398                              next_index_entry.lsdaIndexArraySectionOffset;
1399 
1400       uint8_t *lsda_entry_offset = baton.lsda_array_start;
1401       uint32_t lsda_count = 0;
1402       while (lsda_entry_offset < baton.lsda_array_end) {
1403         struct unwind_info_section_header_lsda_index_entry lsda_entry;
1404         memcpy(&lsda_entry, lsda_entry_offset,
1405                sizeof(struct unwind_info_section_header_lsda_index_entry));
1406         uint64_t function_file_address =
1407             baton.first_level_index_entry.functionOffset +
1408             lsda_entry.functionOffset + baton.text_segment_vmaddr;
1409         uint64_t lsda_file_address =
1410             lsda_entry.lsdaOffset + baton.text_segment_vmaddr;
1411         printf("    LSDA [%d] functionOffset %d (%d) (file address 0x%" PRIx64
1412                "), lsdaOffset %d (file address 0x%" PRIx64 ")\n",
1413                lsda_count, lsda_entry.functionOffset,
1414                lsda_entry.functionOffset - index_entry.functionOffset,
1415                function_file_address, lsda_entry.lsdaOffset, lsda_file_address);
1416         lsda_count++;
1417         lsda_entry_offset +=
1418             sizeof(struct unwind_info_section_header_lsda_index_entry);
1419       }
1420 
1421       printf("\n");
1422 
1423       baton.first_level_index_entry = index_entry;
1424       print_second_level_index(baton);
1425     }
1426 
1427     printf("\n");
1428 
1429     cur_idx++;
1430     offset += sizeof(struct unwind_info_section_header_index_entry);
1431   }
1432 }
1433 
main(int argc,char ** argv)1434 int main(int argc, char **argv) {
1435   struct stat st;
1436   char *file = argv[0];
1437   if (argc > 1)
1438     file = argv[1];
1439   int fd = open(file, O_RDONLY);
1440   if (fd == -1) {
1441     printf("Failed to open '%s'\n", file);
1442     exit(1);
1443   }
1444   fstat(fd, &st);
1445   uint8_t *file_mem =
1446       (uint8_t *)mmap(0, st.st_size, PROT_READ, MAP_PRIVATE | MAP_FILE, fd, 0);
1447   if (file_mem == MAP_FAILED) {
1448     printf("Failed to mmap() '%s'\n", file);
1449   }
1450 
1451   FILE *f = fopen("a.out", "r");
1452 
1453   struct baton baton;
1454   baton.mach_header_start = file_mem;
1455   baton.symbols = NULL;
1456   baton.symbols_count = 0;
1457   baton.function_start_addresses = NULL;
1458   baton.function_start_addresses_count = 0;
1459 
1460   scan_macho_load_commands(&baton);
1461 
1462   if (baton.compact_unwind_start == NULL) {
1463     printf("could not find __TEXT,__unwind_info section\n");
1464     exit(1);
1465   }
1466 
1467   struct unwind_info_section_header header;
1468   memcpy(&header, baton.compact_unwind_start,
1469          sizeof(struct unwind_info_section_header));
1470   printf("Header:\n");
1471   printf("  version %u\n", header.version);
1472   printf("  commonEncodingsArraySectionOffset is %d\n",
1473          header.commonEncodingsArraySectionOffset);
1474   printf("  commonEncodingsArrayCount is %d\n",
1475          header.commonEncodingsArrayCount);
1476   printf("  personalityArraySectionOffset is %d\n",
1477          header.personalityArraySectionOffset);
1478   printf("  personalityArrayCount is %d\n", header.personalityArrayCount);
1479   printf("  indexSectionOffset is %d\n", header.indexSectionOffset);
1480   printf("  indexCount is %d\n", header.indexCount);
1481 
1482   uint8_t *common_encodings =
1483       baton.compact_unwind_start + header.commonEncodingsArraySectionOffset;
1484   uint32_t encoding_idx = 0;
1485   while (encoding_idx < header.commonEncodingsArrayCount) {
1486     uint32_t encoding = *((uint32_t *)common_encodings);
1487     printf("    Common Encoding [%d]: 0x%x ", encoding_idx, encoding);
1488     print_encoding(baton, NULL, encoding);
1489     printf("\n");
1490     common_encodings += sizeof(uint32_t);
1491     encoding_idx++;
1492   }
1493 
1494   uint8_t *pers_arr =
1495       baton.compact_unwind_start + header.personalityArraySectionOffset;
1496   uint32_t pers_idx = 0;
1497   while (pers_idx < header.personalityArrayCount) {
1498     int32_t pers_delta = *((int32_t *)(baton.compact_unwind_start +
1499                                        header.personalityArraySectionOffset +
1500                                        (pers_idx * sizeof(uint32_t))));
1501     printf("    Personality [%d]: personality function ptr @ offset %d (file "
1502            "address 0x%" PRIx64 ")\n",
1503            pers_idx, pers_delta, baton.text_segment_vmaddr + pers_delta);
1504     pers_idx++;
1505     pers_arr += sizeof(uint32_t);
1506   }
1507 
1508   printf("\n");
1509 
1510   baton.unwind_header = header;
1511 
1512   print_index_sections(baton);
1513 
1514   return 0;
1515 }
1516