1#!/usr/bin/python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import glob 23import os 24import re 25import subprocess 26 27ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"] 28if not ANDROID_BUILD_TOP: 29 ANDROID_BUILD_TOP = "." 30 31def FindSymbolsDir(): 32 saveddir = os.getcwd() 33 os.chdir(ANDROID_BUILD_TOP) 34 try: 35 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core " 36 "SRC_TARGET_DIR=build/target make -f build/core/config.mk " 37 "dumpvar-abs-TARGET_OUT_UNSTRIPPED") 38 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout 39 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip()) 40 finally: 41 os.chdir(saveddir) 42 43SYMBOLS_DIR = FindSymbolsDir() 44 45ARCH = "arm" 46 47TOOLCHAIN = None 48 49def ToolPath(tool, toolchain=None): 50 """Return a fully-qualified path to the specified tool""" 51 if not toolchain: 52 toolchain = FindToolchain() 53 return glob.glob(os.path.join(toolchain, "*-" + tool))[0] 54 55def FindToolchain(): 56 """Returns the toolchain matching ARCH. Assumes that you're lunched 57 such that the necessary toolchain is either your primary or secondary. 58 TODO: we could make this 'just work' for most users by just globbing the 59 newest toolchains for every architecture out of prebuilts/, but other 60 parts of this tool assume you're lunched correctly anyway.""" 61 global TOOLCHAIN 62 if TOOLCHAIN is not None: 63 return TOOLCHAIN 64 65 # We use slightly different names from GCC, and there's only one toolchain 66 # for x86/x86_64. 67 gcc_arch = ARCH 68 if gcc_arch == "arm64": 69 gcc_arch = "aarch64" 70 elif gcc_arch == "mips": 71 gcc_arch = "mipsel" 72 elif gcc_arch == "x86": 73 gcc_arch = "x86_64" 74 75 tc1 = os.environ["ANDROID_TOOLCHAIN"] 76 tc2 = os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] 77 78 if ("/" + gcc_arch + "-linux-") in tc1: 79 toolchain = tc1 80 elif ("/" + gcc_arch + "-linux-") in tc2: 81 toolchain = tc2 82 else: 83 raise Exception("Could not find tool chain for %s" % (gcc_arch)) 84 85 if not os.path.exists(ToolPath("addr2line", toolchain)): 86 raise Exception("No addr2line for %s" % (toolchain)) 87 88 TOOLCHAIN = toolchain 89 print "Using toolchain from: %s" % TOOLCHAIN 90 return TOOLCHAIN 91 92def SymbolInformation(lib, addr): 93 """Look up symbol information about an address. 94 95 Args: 96 lib: library (or executable) pathname containing symbols 97 addr: string hexidecimal address 98 99 Returns: 100 A list of the form [(source_symbol, source_location, 101 object_symbol_with_offset)]. 102 103 If the function has been inlined then the list may contain 104 more than one element with the symbols for the most deeply 105 nested inlined location appearing first. The list is 106 always non-empty, even if no information is available. 107 108 Usually you want to display the source_location and 109 object_symbol_with_offset from the last element in the list. 110 """ 111 info = SymbolInformationForSet(lib, set([addr])) 112 return (info and info.get(addr)) or [(None, None, None)] 113 114 115def SymbolInformationForSet(lib, unique_addrs): 116 """Look up symbol information for a set of addresses from the given library. 117 118 Args: 119 lib: library (or executable) pathname containing symbols 120 unique_addrs: set of hexidecimal addresses 121 122 Returns: 123 A dictionary of the form {addr: [(source_symbol, source_location, 124 object_symbol_with_offset)]} where each address has a list of 125 associated symbols and locations. The list is always non-empty. 126 127 If the function has been inlined then the list may contain 128 more than one element with the symbols for the most deeply 129 nested inlined location appearing first. The list is 130 always non-empty, even if no information is available. 131 132 Usually you want to display the source_location and 133 object_symbol_with_offset from the last element in the list. 134 """ 135 if not lib: 136 return None 137 138 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 139 if not addr_to_line: 140 return None 141 142 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 143 if not addr_to_objdump: 144 return None 145 146 result = {} 147 for addr in unique_addrs: 148 source_info = addr_to_line.get(addr) 149 if not source_info: 150 source_info = [(None, None)] 151 if addr in addr_to_objdump: 152 (object_symbol, object_offset) = addr_to_objdump.get(addr) 153 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 154 object_offset) 155 else: 156 object_symbol_with_offset = None 157 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 158 for (source_symbol, source_location) in source_info] 159 160 return result 161 162 163def CallAddr2LineForSet(lib, unique_addrs): 164 """Look up line and symbol information for a set of addresses. 165 166 Args: 167 lib: library (or executable) pathname containing symbols 168 unique_addrs: set of string hexidecimal addresses look up. 169 170 Returns: 171 A dictionary of the form {addr: [(symbol, file:line)]} where 172 each address has a list of associated symbols and locations 173 or an empty list if no symbol information was found. 174 175 If the function has been inlined then the list may contain 176 more than one element with the symbols for the most deeply 177 nested inlined location appearing first. 178 """ 179 if not lib: 180 return None 181 182 183 symbols = SYMBOLS_DIR + lib 184 if not os.path.exists(symbols): 185 return None 186 187 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 188 "--demangle", "--exe=" + symbols] 189 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 190 191 result = {} 192 addrs = sorted(unique_addrs) 193 for addr in addrs: 194 child.stdin.write("0x%s\n" % addr) 195 child.stdin.flush() 196 records = [] 197 first = True 198 while True: 199 symbol = child.stdout.readline().strip() 200 if symbol == "??": 201 symbol = None 202 location = child.stdout.readline().strip() 203 if location == "??:0": 204 location = None 205 if symbol is None and location is None: 206 break 207 records.append((symbol, location)) 208 if first: 209 # Write a blank line as a sentinel so we know when to stop 210 # reading inlines from the output. 211 # The blank line will cause addr2line to emit "??\n??:0\n". 212 child.stdin.write("\n") 213 first = False 214 result[addr] = records 215 child.stdin.close() 216 child.stdout.close() 217 return result 218 219 220def StripPC(addr): 221 """Strips the Thumb bit a program counter address when appropriate. 222 223 Args: 224 addr: the program counter address 225 226 Returns: 227 The stripped program counter address. 228 """ 229 global ARCH 230 231 if ARCH == "arm": 232 return addr & ~1 233 return addr 234 235def CallObjdumpForSet(lib, unique_addrs): 236 """Use objdump to find out the names of the containing functions. 237 238 Args: 239 lib: library (or executable) pathname containing symbols 240 unique_addrs: set of string hexidecimal addresses to find the functions for. 241 242 Returns: 243 A dictionary of the form {addr: (string symbol, offset)}. 244 """ 245 if not lib: 246 return None 247 248 symbols = SYMBOLS_DIR + lib 249 if not os.path.exists(symbols): 250 return None 251 252 symbols = SYMBOLS_DIR + lib 253 if not os.path.exists(symbols): 254 return None 255 256 addrs = sorted(unique_addrs) 257 start_addr_dec = str(StripPC(int(addrs[0], 16))) 258 stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8) 259 cmd = [ToolPath("objdump"), 260 "--section=.text", 261 "--demangle", 262 "--disassemble", 263 "--start-address=" + start_addr_dec, 264 "--stop-address=" + stop_addr_dec, 265 symbols] 266 267 # Function lines look like: 268 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 269 # We pull out the address and function first. Then we check for an optional 270 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 271 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 272 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 273 274 # A disassembly line looks like: 275 # 177b2: b510 push {r4, lr} 276 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 277 278 current_symbol = None # The current function symbol in the disassembly. 279 current_symbol_addr = 0 # The address of the current function. 280 addr_index = 0 # The address that we are currently looking for. 281 282 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 283 result = {} 284 for line in stream: 285 # Is it a function line like: 286 # 000177b0 <android::IBinder::~IBinder()>: 287 components = func_regexp.match(line) 288 if components: 289 # This is a new function, so record the current function and its address. 290 current_symbol_addr = int(components.group(1), 16) 291 current_symbol = components.group(2) 292 293 # Does it have an optional offset like: "foo(..)+0x2c"? 294 components = offset_regexp.match(current_symbol) 295 if components: 296 current_symbol = components.group(1) 297 offset = components.group(2) 298 if offset: 299 current_symbol_addr -= int(offset, 16) 300 301 # Is it an disassembly line like: 302 # 177b2: b510 push {r4, lr} 303 components = asm_regexp.match(line) 304 if components: 305 addr = components.group(1) 306 target_addr = addrs[addr_index] 307 i_addr = int(addr, 16) 308 i_target = StripPC(int(target_addr, 16)) 309 if i_addr == i_target: 310 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 311 addr_index += 1 312 if addr_index >= len(addrs): 313 break 314 stream.close() 315 316 return result 317 318 319def CallCppFilt(mangled_symbol): 320 cmd = [ToolPath("c++filt")] 321 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 322 process.stdin.write(mangled_symbol) 323 process.stdin.write("\n") 324 process.stdin.close() 325 demangled_symbol = process.stdout.readline().strip() 326 process.stdout.close() 327 return demangled_symbol 328 329def FormatSymbolWithOffset(symbol, offset): 330 if offset == 0: 331 return symbol 332 return "%s+%d" % (symbol, offset) 333