1#!/usr/bin/python 2 3"""Disassemble the code stored in a tombstone. 4 5The classes in this module use an interface, ProcessLine, so that they can be 6chained together to do arbitrary procerssing. The current classes support 7disassembling the bytes embedded in tombstones and printing output to stdout. 8""" 9 10 11import re 12import subprocess 13import sys 14import tempfile 15import architecture 16 17 18STANDARD_PROLOGUE = """ 19 .type _start, %function 20 .globl _start 21_start: 22""" 23 24 25THUMB_PROLOGUE = STANDARD_PROLOGUE + """ 26 .code 16 27 .thumb_func 28 .type thumb_start, %function 29thumb_start: 30""" 31 32 33def Disassemble(line_generator): 34 abi_line = re.compile("(ABI: \'(.*)\')") 35 abi = None 36 tools = None 37 # Process global headers 38 for line in line_generator: 39 yield line 40 abi_header = abi_line.search(line) 41 if abi_header: 42 abi = abi_header.group(2) 43 # Look up the tools here so we don't do a lookup for each code block. 44 tools = architecture.Architecture(abi) 45 break 46 # The rest of the file consists of: 47 # o Lines that should pass through unchanged 48 # o Blocks of register values, which follow a 'pid: ...' line and end with 49 # 'backtrace:' line 50 # o Blocks of code represented as words, which start with 'code around ...' 51 # and end with a line that doesn't look like a list of words. 52 # 53 # The only constraint on the ordering of these blocks is that the register 54 # values must come before the first code block. 55 # 56 # It's easiest to nest register processing in the codeblock search loop. 57 register_list_re = re.compile('^pid: ') 58 codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)') 59 register_text = {} 60 for line in line_generator: 61 yield line 62 if register_list_re.search(line): 63 register_text = {} 64 for output in ProcessRegisterList(line_generator, register_text): 65 yield output 66 code_match = codeblock_re.search(line) 67 if code_match: 68 code_reg = ''.join(code_match.groups('')) 69 for output in ProcessCodeBlock( 70 abi, tools, code_reg, register_text, line_generator): 71 yield output 72 73 74def ProcessRegisterList(line_generator, rval): 75 for line in line_generator: 76 yield line 77 if line.startswith('backtrace:'): 78 return 79 # The register list is indented and consists of alternating name, value 80 # pairs. 81 if line.startswith(' '): 82 words = line.split() 83 assert len(words) % 2 == 0 84 for index in range(0, len(words), 2): 85 rval[words[index]] = words[index + 1] 86 87 88def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator): 89 program_counter = register_text[register_name] 90 program_counter_val = int(program_counter, 16) 91 scratch_file = tempfile.NamedTemporaryFile(suffix='.s') 92 # ARM code comes in two flavors: arm and thumb. Figure out the one 93 # to use by peeking in the cpsr. 94 if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20: 95 scratch_file.write(THUMB_PROLOGUE) 96 else: 97 scratch_file.write(STANDARD_PROLOGUE) 98 # Retains the hexadecimal text for the start of the block 99 start_address = None 100 # Maintains a numeric counter for the address of the current byte 101 current_address = None 102 # Handle the 3 differnt file formats that we've observerd. 103 if len(program_counter) == 8: 104 block_line_len = [67] 105 block_num_words = 4 106 else: 107 assert len(program_counter) == 16 108 block_line_len = [57, 73] 109 block_num_words = 2 110 # Now generate assembly from the bytes in the code block. 111 for line in line_generator: 112 words = line.split() 113 # Be conservative and stop interpreting if the line length is wrong 114 # We can't count words because spaces can appear in the text representation 115 # of the memory. 116 if len(line) not in block_line_len: 117 break 118 # Double check the address at the start of each line 119 if current_address is None: 120 start_address = words[0] 121 current_address = int(start_address, 16) 122 else: 123 assert current_address == int(words[0], 16) 124 for word in words[1:block_num_words+1]: 125 # Handle byte swapping 126 for byte in tools.WordToBytes(word): 127 # Emit a label at the desired program counter. 128 # This will cause the disassembler to resynchronize at this point, 129 # allowing us to position the arrow and also ensuring that we decode 130 # the instruction properly. 131 if current_address == program_counter_val: 132 scratch_file.write('program_counter_was_here:\n') 133 scratch_file.write(' .byte 0x%s\n' % byte) 134 current_address += 1 135 scratch_file.flush() 136 # Assemble the scratch file and relocate it to the block address with the 137 # linker. 138 object_file = tempfile.NamedTemporaryFile(suffix='.o') 139 subprocess.check_call(tools.Assemble([ 140 '-o', object_file.name, scratch_file.name])) 141 scratch_file.close() 142 143 # Work around ARM data tagging: rename $d to $t. 144 if abi.startswith('arm'): 145 subprocess.check_call( 146 ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name]) 147 148 linked_file = tempfile.NamedTemporaryFile(suffix='.o') 149 cmd = tools.Link([ 150 '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name]) 151 subprocess.check_call(cmd) 152 object_file.close() 153 disassembler = subprocess.Popen(tools.Disassemble([ 154 '-S', linked_file.name]), stdout=subprocess.PIPE) 155 # Skip some of the annoying assembler headers. 156 emit = False 157 start_pattern = start_address + ' ' 158 # objdump padding varies between 32 bit and 64 bit architectures 159 arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val) 160 for line in disassembler.stdout: 161 emit = emit or line.startswith(start_pattern) 162 if emit and len(line) > 1 and line.find('program_counter_was_here') == -1: 163 if arrow_pattern.search(line): 164 yield '--->' + line 165 else: 166 yield ' ' + line 167 linked_file.close() 168 yield '\n' 169 170 171def main(argv): 172 for fn in argv[1:]: 173 for line in Disassemble(open(fn, 'r')): 174 print line, 175 176 177if __name__ == '__main__': 178 main(sys.argv) 179