1#!/usr/bin/env python 2# 3# Copyright 2013 The Chromium Authors. All rights reserved. 4# Use of this source code is governed by a BSD-style license that can be 5# found in the LICENSE file. 6 7import base64 8import os 9import sys 10import re 11 12from optparse import OptionParser 13 14"""Extracts the list of resident symbols of a library loaded in a process. 15 16This scripts combines the extended output of memdump for a given process 17(obtained through memdump -x PID) and the symbol table of a .so loaded in that 18process (obtained through nm -C lib-with-symbols.so), filtering out only those 19symbols that, at the time of the snapshot, were resident in memory (that are, 20the symbols which start address belongs to a mapped page of the .so which was 21resident at the time of the snapshot). 22The aim is to perform a "code coverage"-like profiling of a binary, intersecting 23run-time information (list of resident pages) and debug symbols. 24""" 25 26_PAGE_SIZE = 4096 27 28 29def _TestBit(word, bit): 30 assert(bit >= 0 and bit < 8) 31 return not not ((word >> bit) & 1) 32 33 34def _HexAddr(addr): 35 return hex(addr)[2:].zfill(8) 36 37 38def _GetResidentPagesSet(memdump_contents, lib_name, verbose): 39 """Parses the memdump output and extracts the resident page set for lib_name. 40 Args: 41 memdump_contents: Array of strings (lines) of a memdump output. 42 lib_name: A string containing the name of the library.so to be matched. 43 verbose: Print a verbose header for each mapping matched. 44 45 Returns: 46 A set of resident pages (the key is the page index) for all the 47 mappings matching .*lib_name. 48 """ 49 resident_pages = set() 50 MAP_RX = re.compile( 51 r'^([0-9a-f]+)-([0-9a-f]+) ([\w-]+) ([0-9a-f]+) .* "(.*)" \[(.*)\]$') 52 for line in memdump_contents: 53 line = line.rstrip('\r\n') 54 if line.startswith('[ PID'): 55 continue 56 57 r = MAP_RX.match(line) 58 if not r: 59 sys.stderr.write('Skipping %s from %s\n' % (line, memdump_file)) 60 continue 61 62 map_start = int(r.group(1), 16) 63 map_end = int(r.group(2), 16) 64 prot = r.group(3) 65 offset = int(r.group(4), 16) 66 assert(offset % _PAGE_SIZE == 0) 67 lib = r.group(5) 68 enc_bitmap = r.group(6) 69 70 if not lib.endswith(lib_name): 71 continue 72 73 bitmap = base64.b64decode(enc_bitmap) 74 map_pages_count = (map_end - map_start + 1) / _PAGE_SIZE 75 bitmap_pages_count = len(bitmap) * 8 76 77 if verbose: 78 print 'Found %s: mapped %d pages in mode %s @ offset %s.' % ( 79 lib, map_pages_count, prot, _HexAddr(offset)) 80 print ' Map range in the process VA: [%s - %s]. Len: %s' % ( 81 _HexAddr(map_start), 82 _HexAddr(map_end), 83 _HexAddr(map_pages_count * _PAGE_SIZE)) 84 print ' Corresponding addresses in the binary: [%s - %s]. Len: %s' % ( 85 _HexAddr(offset), 86 _HexAddr(offset + map_end - map_start), 87 _HexAddr(map_pages_count * _PAGE_SIZE)) 88 print ' Bitmap: %d pages' % bitmap_pages_count 89 print '' 90 91 assert(bitmap_pages_count >= map_pages_count) 92 for i in xrange(map_pages_count): 93 bitmap_idx = i / 8 94 bitmap_off = i % 8 95 if (bitmap_idx < len(bitmap) and 96 _TestBit(ord(bitmap[bitmap_idx]), bitmap_off)): 97 resident_pages.add(offset / _PAGE_SIZE + i) 98 return resident_pages 99 100 101def main(argv): 102 NM_RX = re.compile(r'^([0-9a-f]+)\s+.*$') 103 104 parser = OptionParser() 105 parser.add_option("-r", "--reverse", 106 action="store_true", dest="reverse", default=False, 107 help="Print out non present symbols") 108 parser.add_option("-v", "--verbose", 109 action="store_true", dest="verbose", default=False, 110 help="Print out verbose debug information.") 111 112 (options, args) = parser.parse_args() 113 114 if len(args) != 3: 115 print 'Usage: %s [-v] memdump.file nm.file library.so' % ( 116 os.path.basename(argv[0])) 117 return 1 118 119 memdump_file = args[0] 120 nm_file = args[1] 121 lib_name = args[2] 122 123 if memdump_file == '-': 124 memdump_contents = sys.stdin.readlines() 125 else: 126 memdump_contents = open(memdump_file, 'r').readlines() 127 resident_pages = _GetResidentPagesSet(memdump_contents, 128 lib_name, 129 options.verbose) 130 131 # Process the nm symbol table, filtering out the resident symbols. 132 nm_fh = open(nm_file, 'r') 133 for line in nm_fh: 134 line = line.rstrip('\r\n') 135 # Skip undefined symbols (lines with no address). 136 if line.startswith(' '): 137 continue 138 139 r = NM_RX.match(line) 140 if not r: 141 sys.stderr.write('Skipping %s from %s\n' % (line, nm_file)) 142 continue 143 144 sym_addr = int(r.group(1), 16) 145 sym_page = sym_addr / _PAGE_SIZE 146 last_sym_matched = (sym_page in resident_pages) 147 if (sym_page in resident_pages) != options.reverse: 148 print line 149 return 0 150 151if __name__ == '__main__': 152 sys.exit(main(sys.argv)) 153