#!/usr/bin/python """Disassemble the code stored in a tombstone. The classes in this module use an interface, ProcessLine, so that they can be chained together to do arbitrary procerssing. The current classes support disassembling the bytes embedded in tombstones and printing output to stdout. """ import re import subprocess import sys import tempfile import architecture STANDARD_PROLOGUE = """ .type _start, %function .globl _start _start: """ THUMB_PROLOGUE = STANDARD_PROLOGUE + """ .code 16 .thumb_func .type thumb_start, %function thumb_start: """ def Disassemble(line_generator): abi_line = re.compile("(ABI: \'(.*)\')") abi = None tools = None # Process global headers for line in line_generator: yield line abi_header = abi_line.search(line) if abi_header: abi = abi_header.group(2) # Look up the tools here so we don't do a lookup for each code block. tools = architecture.Architecture(abi) break # The rest of the file consists of: # o Lines that should pass through unchanged # o Blocks of register values, which follow a 'pid: ...' line and end with # 'backtrace:' line # o Blocks of code represented as words, which start with 'code around ...' # and end with a line that doesn't look like a list of words. # # The only constraint on the ordering of these blocks is that the register # values must come before the first code block. # # It's easiest to nest register processing in the codeblock search loop. register_list_re = re.compile('^pid: ') codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)') register_text = {} for line in line_generator: yield line if register_list_re.search(line): register_text = {} for output in ProcessRegisterList(line_generator, register_text): yield output code_match = codeblock_re.search(line) if code_match: code_reg = ''.join(code_match.groups('')) for output in ProcessCodeBlock( abi, tools, code_reg, register_text, line_generator): yield output def ProcessRegisterList(line_generator, rval): for line in line_generator: yield line if line.startswith('backtrace:'): return # The register list is indented and consists of alternating name, value # pairs. if line.startswith(' '): words = line.split() assert len(words) % 2 == 0 for index in range(0, len(words), 2): rval[words[index]] = words[index + 1] def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator): program_counter = register_text[register_name] program_counter_val = int(program_counter, 16) scratch_file = tempfile.NamedTemporaryFile(suffix='.s') # ARM code comes in two flavors: arm and thumb. Figure out the one # to use by peeking in the cpsr. if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20: scratch_file.write(THUMB_PROLOGUE) else: scratch_file.write(STANDARD_PROLOGUE) # Retains the hexadecimal text for the start of the block start_address = None # Maintains a numeric counter for the address of the current byte current_address = None # Handle the 3 differnt file formats that we've observerd. if len(program_counter) == 8: block_line_len = [67] block_num_words = 4 else: assert len(program_counter) == 16 block_line_len = [57, 73] block_num_words = 2 # Now generate assembly from the bytes in the code block. for line in line_generator: words = line.split() # Be conservative and stop interpreting if the line length is wrong # We can't count words because spaces can appear in the text representation # of the memory. if len(line) not in block_line_len: break # Double check the address at the start of each line if current_address is None: start_address = words[0] current_address = int(start_address, 16) else: assert current_address == int(words[0], 16) for word in words[1:block_num_words+1]: # Handle byte swapping for byte in tools.WordToBytes(word): # Emit a label at the desired program counter. # This will cause the disassembler to resynchronize at this point, # allowing us to position the arrow and also ensuring that we decode # the instruction properly. if current_address == program_counter_val: scratch_file.write('program_counter_was_here:\n') scratch_file.write(' .byte 0x%s\n' % byte) current_address += 1 scratch_file.flush() # Assemble the scratch file and relocate it to the block address with the # linker. object_file = tempfile.NamedTemporaryFile(suffix='.o') subprocess.check_call(tools.Assemble([ '-o', object_file.name, scratch_file.name])) scratch_file.close() # Work around ARM data tagging: rename $d to $t. if abi.startswith('arm'): subprocess.check_call( ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name]) linked_file = tempfile.NamedTemporaryFile(suffix='.o') cmd = tools.Link([ '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name]) subprocess.check_call(cmd) object_file.close() disassembler = subprocess.Popen(tools.Disassemble([ '-S', linked_file.name]), stdout=subprocess.PIPE) # Skip some of the annoying assembler headers. emit = False start_pattern = start_address + ' ' # objdump padding varies between 32 bit and 64 bit architectures arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val) for line in disassembler.stdout: emit = emit or line.startswith(start_pattern) if emit and len(line) > 1 and line.find('program_counter_was_here') == -1: if arrow_pattern.search(line): yield '--->' + line else: yield ' ' + line linked_file.close() yield '\n' def main(argv): for fn in argv[1:]: for line in Disassemble(open(fn, 'r')): print line, if __name__ == '__main__': main(sys.argv)