1#!/usr/bin/python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import glob 23import os 24import platform 25import re 26import subprocess 27import unittest 28 29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"] 30if not ANDROID_BUILD_TOP: 31 ANDROID_BUILD_TOP = "." 32 33def FindSymbolsDir(): 34 saveddir = os.getcwd() 35 os.chdir(ANDROID_BUILD_TOP) 36 try: 37 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core " 38 "SRC_TARGET_DIR=build/target make -f build/core/config.mk " 39 "dumpvar-abs-TARGET_OUT_UNSTRIPPED") 40 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout 41 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip()) 42 finally: 43 os.chdir(saveddir) 44 45SYMBOLS_DIR = FindSymbolsDir() 46 47ARCH = "arm" 48 49 50# These are private. Do not access them from other modules. 51_CACHED_TOOLCHAIN = None 52_CACHED_TOOLCHAIN_ARCH = None 53 54 55def ToolPath(tool, toolchain=None): 56 """Return a fully-qualified path to the specified tool""" 57 if not toolchain: 58 toolchain = FindToolchain() 59 return glob.glob(os.path.join(toolchain, "*-" + tool))[0] 60 61 62def FindToolchain(): 63 """Returns the toolchain matching ARCH.""" 64 global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH 65 if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH: 66 return _CACHED_TOOLCHAIN 67 68 # We use slightly different names from GCC, and there's only one toolchain 69 # for x86/x86_64. Note that these are the names of the top-level directory 70 # rather than the _different_ names used lower down the directory hierarchy! 71 gcc_dir = ARCH 72 if gcc_dir == "arm64": 73 gcc_dir = "aarch64" 74 elif gcc_dir == "mips64": 75 gcc_dir = "mips" 76 elif gcc_dir == "x86_64": 77 gcc_dir = "x86" 78 79 os_name = platform.system().lower(); 80 81 available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir)) 82 if len(available_toolchains) == 0: 83 raise Exception("Could not find tool chain for %s" % (ARCH)) 84 85 toolchain = sorted(available_toolchains)[-1] 86 87 if not os.path.exists(ToolPath("addr2line", toolchain)): 88 raise Exception("No addr2line for %s" % (toolchain)) 89 90 _CACHED_TOOLCHAIN = toolchain 91 _CACHED_TOOLCHAIN_ARCH = ARCH 92 print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN) 93 return _CACHED_TOOLCHAIN 94 95 96def SymbolInformation(lib, addr): 97 """Look up symbol information about an address. 98 99 Args: 100 lib: library (or executable) pathname containing symbols 101 addr: string hexidecimal address 102 103 Returns: 104 A list of the form [(source_symbol, source_location, 105 object_symbol_with_offset)]. 106 107 If the function has been inlined then the list may contain 108 more than one element with the symbols for the most deeply 109 nested inlined location appearing first. The list is 110 always non-empty, even if no information is available. 111 112 Usually you want to display the source_location and 113 object_symbol_with_offset from the last element in the list. 114 """ 115 info = SymbolInformationForSet(lib, set([addr])) 116 return (info and info.get(addr)) or [(None, None, None)] 117 118 119def SymbolInformationForSet(lib, unique_addrs): 120 """Look up symbol information for a set of addresses from the given library. 121 122 Args: 123 lib: library (or executable) pathname containing symbols 124 unique_addrs: set of hexidecimal addresses 125 126 Returns: 127 A dictionary of the form {addr: [(source_symbol, source_location, 128 object_symbol_with_offset)]} where each address has a list of 129 associated symbols and locations. The list is always non-empty. 130 131 If the function has been inlined then the list may contain 132 more than one element with the symbols for the most deeply 133 nested inlined location appearing first. The list is 134 always non-empty, even if no information is available. 135 136 Usually you want to display the source_location and 137 object_symbol_with_offset from the last element in the list. 138 """ 139 if not lib: 140 return None 141 142 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 143 if not addr_to_line: 144 return None 145 146 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 147 if not addr_to_objdump: 148 return None 149 150 result = {} 151 for addr in unique_addrs: 152 source_info = addr_to_line.get(addr) 153 if not source_info: 154 source_info = [(None, None)] 155 if addr in addr_to_objdump: 156 (object_symbol, object_offset) = addr_to_objdump.get(addr) 157 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 158 object_offset) 159 else: 160 object_symbol_with_offset = None 161 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 162 for (source_symbol, source_location) in source_info] 163 164 return result 165 166 167def CallAddr2LineForSet(lib, unique_addrs): 168 """Look up line and symbol information for a set of addresses. 169 170 Args: 171 lib: library (or executable) pathname containing symbols 172 unique_addrs: set of string hexidecimal addresses look up. 173 174 Returns: 175 A dictionary of the form {addr: [(symbol, file:line)]} where 176 each address has a list of associated symbols and locations 177 or an empty list if no symbol information was found. 178 179 If the function has been inlined then the list may contain 180 more than one element with the symbols for the most deeply 181 nested inlined location appearing first. 182 """ 183 if not lib: 184 return None 185 186 symbols = SYMBOLS_DIR + lib 187 if not os.path.exists(symbols): 188 return None 189 190 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 191 "--demangle", "--exe=" + symbols] 192 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 193 194 result = {} 195 addrs = sorted(unique_addrs) 196 for addr in addrs: 197 child.stdin.write("0x%s\n" % addr) 198 child.stdin.flush() 199 records = [] 200 first = True 201 while True: 202 symbol = child.stdout.readline().strip() 203 if symbol == "??": 204 symbol = None 205 location = child.stdout.readline().strip() 206 if location == "??:0": 207 location = None 208 if symbol is None and location is None: 209 break 210 records.append((symbol, location)) 211 if first: 212 # Write a blank line as a sentinel so we know when to stop 213 # reading inlines from the output. 214 # The blank line will cause addr2line to emit "??\n??:0\n". 215 child.stdin.write("\n") 216 first = False 217 result[addr] = records 218 child.stdin.close() 219 child.stdout.close() 220 return result 221 222 223def StripPC(addr): 224 """Strips the Thumb bit a program counter address when appropriate. 225 226 Args: 227 addr: the program counter address 228 229 Returns: 230 The stripped program counter address. 231 """ 232 global ARCH 233 if ARCH == "arm": 234 return addr & ~1 235 return addr 236 237 238def CallObjdumpForSet(lib, unique_addrs): 239 """Use objdump to find out the names of the containing functions. 240 241 Args: 242 lib: library (or executable) pathname containing symbols 243 unique_addrs: set of string hexidecimal addresses to find the functions for. 244 245 Returns: 246 A dictionary of the form {addr: (string symbol, offset)}. 247 """ 248 if not lib: 249 return None 250 251 symbols = SYMBOLS_DIR + lib 252 if not os.path.exists(symbols): 253 return None 254 255 symbols = SYMBOLS_DIR + lib 256 if not os.path.exists(symbols): 257 return None 258 259 addrs = sorted(unique_addrs) 260 start_addr_dec = str(StripPC(int(addrs[0], 16))) 261 stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8) 262 cmd = [ToolPath("objdump"), 263 "--section=.text", 264 "--demangle", 265 "--disassemble", 266 "--start-address=" + start_addr_dec, 267 "--stop-address=" + stop_addr_dec, 268 symbols] 269 270 # Function lines look like: 271 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 272 # We pull out the address and function first. Then we check for an optional 273 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 274 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 275 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 276 277 # A disassembly line looks like: 278 # 177b2: b510 push {r4, lr} 279 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 280 281 current_symbol = None # The current function symbol in the disassembly. 282 current_symbol_addr = 0 # The address of the current function. 283 addr_index = 0 # The address that we are currently looking for. 284 285 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 286 result = {} 287 for line in stream: 288 # Is it a function line like: 289 # 000177b0 <android::IBinder::~IBinder()>: 290 components = func_regexp.match(line) 291 if components: 292 # This is a new function, so record the current function and its address. 293 current_symbol_addr = int(components.group(1), 16) 294 current_symbol = components.group(2) 295 296 # Does it have an optional offset like: "foo(..)+0x2c"? 297 components = offset_regexp.match(current_symbol) 298 if components: 299 current_symbol = components.group(1) 300 offset = components.group(2) 301 if offset: 302 current_symbol_addr -= int(offset, 16) 303 304 # Is it an disassembly line like: 305 # 177b2: b510 push {r4, lr} 306 components = asm_regexp.match(line) 307 if components: 308 addr = components.group(1) 309 target_addr = addrs[addr_index] 310 i_addr = int(addr, 16) 311 i_target = StripPC(int(target_addr, 16)) 312 if i_addr == i_target: 313 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 314 addr_index += 1 315 if addr_index >= len(addrs): 316 break 317 stream.close() 318 319 return result 320 321 322def CallCppFilt(mangled_symbol): 323 cmd = [ToolPath("c++filt")] 324 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 325 process.stdin.write(mangled_symbol) 326 process.stdin.write("\n") 327 process.stdin.close() 328 demangled_symbol = process.stdout.readline().strip() 329 process.stdout.close() 330 return demangled_symbol 331 332 333def FormatSymbolWithOffset(symbol, offset): 334 if offset == 0: 335 return symbol 336 return "%s+%d" % (symbol, offset) 337 338 339 340class FindToolchainTests(unittest.TestCase): 341 def assert_toolchain_found(self, abi): 342 global ARCH 343 ARCH = abi 344 FindToolchain() # Will throw on failure. 345 346 def test_toolchains_found(self): 347 self.assert_toolchain_found("arm") 348 self.assert_toolchain_found("arm64") 349 self.assert_toolchain_found("mips") 350 self.assert_toolchain_found("x86") 351 self.assert_toolchain_found("x86_64") 352 353 354if __name__ == '__main__': 355 unittest.main() 356