1# Insert GAS CFI directives ("control frame information") into x86-32 asm input 2# 3# CFI directives tell the assembler how to generate "stack frame" debug info 4# This information can tell a debugger (like gdb) how to find the current stack 5# frame at any point in the program code, and how to find the values which 6# various registers had at higher points in the call stack 7# With this information, the debugger can show a backtrace, and you can move up 8# and down the call stack and examine the values of local variables 9 10BEGIN { 11 # don't put CFI data in the .eh_frame ELF section (which we don't keep) 12 print ".cfi_sections .debug_frame" 13 14 # only emit CFI directives inside a function 15 in_function = 0 16 17 # emit .loc directives with line numbers from original source 18 printf ".file 1 \"%s\"\n", ARGV[1] 19 line_number = 0 20 21 # used to detect "call label; label:" trick 22 called = "" 23} 24 25function get_const1() { 26 # for instructions with 2 operands, get 1st operand (assuming it is constant) 27 match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/) 28 return parse_const(substr($0, RSTART, RLENGTH-1)) 29} 30 31function canonicalize_reg(register) { 32 if (match(register, /^e/)) 33 return register 34 else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc 35 return "e" substr(register, 1, 1) "x" 36 else # AX, BX, CX, etc 37 return "e" register 38} 39function get_reg() { 40 # only use if you already know there is 1 and only 1 register 41 match($0, /%e?([abcd][hlx]|si|di|bp)/) 42 return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1)) 43} 44function get_reg1() { 45 # for instructions with 2 operands, get 1st operand (assuming it is register) 46 match($0, /%e?([abcd][hlx]|si|di|bp),/) 47 return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2)) 48} 49function get_reg2() { 50 # for instructions with 2 operands, get 2nd operand (assuming it is register) 51 match($0, /,%e?([abcd][hlx]|si|di|bp)/) 52 return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2)) 53} 54 55function adjust_sp_offset(delta) { 56 if (in_function) 57 printf ".cfi_adjust_cfa_offset %d\n", delta 58} 59 60{ 61 line_number = line_number + 1 62 63 # clean the input up before doing anything else 64 # delete comments 65 gsub(/(#|\/\/).*/, "") 66 67 # canonicalize whitespace 68 gsub(/[ \t]+/, " ") # mawk doesn't understand \s 69 gsub(/ *, */, ",") 70 gsub(/ *: */, ": ") 71 gsub(/ $/, "") 72 gsub(/^ /, "") 73} 74 75# check for assembler directives which we care about 76/^\.(section|data|text)/ { 77 # a .cfi_startproc/.cfi_endproc pair should be within the same section 78 # otherwise, clang will choke when generating ELF output 79 if (in_function) { 80 print ".cfi_endproc" 81 in_function = 0 82 } 83} 84/^\.type [a-zA-Z0-9_]+,@function/ { 85 functions[substr($2, 1, length($2)-10)] = 1 86} 87# not interested in assembler directives beyond this, just pass them through 88/^\./ { 89 print 90 next 91} 92 93/^[a-zA-Z0-9_]+:/ { 94 label = substr($1, 1, length($1)-1) # drop trailing : 95 96 if (called == label) { 97 # note adjustment of stack pointer from "call label; label:" 98 adjust_sp_offset(4) 99 } 100 101 if (functions[label]) { 102 if (in_function) 103 print ".cfi_endproc" 104 105 in_function = 1 106 print ".cfi_startproc" 107 108 for (register in saved) 109 delete saved[register] 110 for (register in dirty) 111 delete dirty[register] 112 } 113 114 # an instruction may follow on the same line, so continue processing 115} 116 117/^$/ { next } 118 119{ 120 called = "" 121 printf ".loc 1 %d\n", line_number 122 print 123} 124 125# KEEPING UP WITH THE STACK POINTER 126# We do NOT attempt to understand foolish and ridiculous tricks like stashing 127# the stack pointer and then using %esp as a scratch register, or bitshifting 128# it or taking its square root or anything stupid like that. 129# %esp should only be adjusted by pushing/popping or adding/subtracting constants 130# 131/pushl?/ { 132 if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/)) 133 adjust_sp_offset(2) 134 else 135 adjust_sp_offset(4) 136} 137/popl?/ { 138 if (match($0, / %(ax|bx|cx|dx|di|si|bp|sp)/)) 139 adjust_sp_offset(-2) 140 else 141 adjust_sp_offset(-4) 142} 143/addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(-get_const1()) } 144/subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%esp/ { adjust_sp_offset(get_const1()) } 145 146/call/ { 147 if (match($0, /call [0-9]+f/)) # "forward" label 148 called = substr($0, RSTART+5, RLENGTH-6) 149 else if (match($0, /call [0-9a-zA-Z_]+/)) 150 called = substr($0, RSTART+5, RLENGTH-5) 151} 152 153# TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME 154# 155/pushl? %e(ax|bx|cx|dx|si|di|bp)/ { # don't match "push (%reg)" 156 # if a register is being pushed, and its value has not changed since the 157 # beginning of this function, the pushed value can be used when printing 158 # local variables at the next level up the stack 159 # emit '.cfi_rel_offset' for that 160 161 if (in_function) { 162 register = get_reg() 163 if (!saved[register] && !dirty[register]) { 164 printf ".cfi_rel_offset %s,0\n", register 165 saved[register] = 1 166 } 167 } 168} 169 170/movl? %e(ax|bx|cx|dx|si|di|bp),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%esp\)/ { 171 if (in_function) { 172 register = get_reg() 173 if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%esp\)/)) { 174 offset = parse_const(substr($0, RSTART, RLENGTH-6)) 175 } else { 176 offset = 0 177 } 178 if (!saved[register] && !dirty[register]) { 179 printf ".cfi_rel_offset %s,%d\n", register, offset 180 saved[register] = 1 181 } 182 } 183} 184 185# IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED 186# ...then we want to know about it. 187# 188function trashed(register) { 189 if (in_function && !saved[register] && !dirty[register]) { 190 printf ".cfi_undefined %s\n", register 191 } 192 dirty[register] = 1 193} 194# this does NOT exhaustively check for all possible instructions which could 195# overwrite a register value inherited from the caller (just the common ones) 196/mov.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) } 197/(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%e?([abcd][hlx]|si|di|bp)$/ { 198 trashed(get_reg2()) 199} 200/^i?mul [^,]*$/ { trashed("eax"); trashed("edx") } 201/^i?mul.*,%e?([abcd][hlx]|si|di|bp)$/ { trashed(get_reg2()) } 202/^i?div/ { trashed("eax"); trashed("edx") } 203/(dec|inc|not|neg|pop) %e?([abcd][hlx]|si|di|bp)/ { trashed(get_reg()) } 204/cpuid/ { trashed("eax"); trashed("ebx"); trashed("ecx"); trashed("edx") } 205 206END { 207 if (in_function) 208 print ".cfi_endproc" 209} 210