1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21 22import argparse 23import os 24import os.path 25import shutil 26 27from simpleperf_report_lib import ReportLib 28from simpleperf_utils import ( 29 Addr2Nearestline, BinaryFinder, extant_dir, flatten_arg_list, is_windows, log_exit, log_info, 30 log_warning, ReadElf, SourceFileSearcher) 31 32 33class SourceLine(object): 34 def __init__(self, file_id, function, line): 35 self.file = file_id 36 self.function = function 37 self.line = line 38 39 @property 40 def file_key(self): 41 return self.file 42 43 @property 44 def function_key(self): 45 return (self.file, self.function) 46 47 @property 48 def line_key(self): 49 return (self.file, self.line) 50 51 52class Addr2Line(object): 53 """collect information of how to map [dso_name, vaddr] to [source_file:line]. 54 """ 55 56 def __init__(self, ndk_path, binary_cache_path, source_dirs): 57 binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path)) 58 self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True) 59 self.source_searcher = SourceFileSearcher(source_dirs) 60 61 def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int): 62 self.addr2line.add_addr(dso_path, func_addr, addr) 63 64 def convert_addrs_to_lines(self): 65 self.addr2line.convert_addrs_to_lines() 66 67 def get_sources(self, dso_path, addr): 68 dso = self.addr2line.get_dso(dso_path) 69 if not dso: 70 return [] 71 source = self.addr2line.get_addr_source(dso, addr) 72 if not source: 73 return [] 74 result = [] 75 for (source_file, source_line, function_name) in source: 76 source_file_path = self.source_searcher.get_real_path(source_file) 77 if not source_file_path: 78 source_file_path = source_file 79 result.append(SourceLine(source_file_path, function_name, source_line)) 80 return result 81 82 83class Period(object): 84 """event count information. It can be used to represent event count 85 of a line, a function, a source file, or a binary. It contains two 86 parts: period and acc_period. 87 When used for a line, period is the event count occurred when running 88 that line, acc_period is the accumulated event count occurred when 89 running that line and functions called by that line. Same thing applies 90 when it is used for a function, a source file, or a binary. 91 """ 92 93 def __init__(self, period=0, acc_period=0): 94 self.period = period 95 self.acc_period = acc_period 96 97 def __iadd__(self, other): 98 self.period += other.period 99 self.acc_period += other.acc_period 100 return self 101 102 103class DsoPeriod(object): 104 """Period for each shared library""" 105 106 def __init__(self, dso_name): 107 self.dso_name = dso_name 108 self.period = Period() 109 110 def add_period(self, period): 111 self.period += period 112 113 114class FilePeriod(object): 115 """Period for each source file""" 116 117 def __init__(self, file_id): 118 self.file = file_id 119 self.period = Period() 120 # Period for each line in the file. 121 self.line_dict = {} 122 # Period for each function in the source file. 123 self.function_dict = {} 124 125 def add_period(self, period): 126 self.period += period 127 128 def add_line_period(self, line, period): 129 a = self.line_dict.get(line) 130 if a is None: 131 self.line_dict[line] = a = Period() 132 a += period 133 134 def add_function_period(self, function_name, function_start_line, period): 135 a = self.function_dict.get(function_name) 136 if not a: 137 if function_start_line is None: 138 function_start_line = -1 139 self.function_dict[function_name] = a = [function_start_line, Period()] 140 a[1] += period 141 142 143class SourceFileAnnotator(object): 144 """group code for annotating source files""" 145 146 def __init__(self, config): 147 # check config variables 148 config_names = ['perf_data_list', 'source_dirs', 'comm_filters', 149 'pid_filters', 'tid_filters', 'dso_filters', 'ndk_path'] 150 for name in config_names: 151 if name not in config: 152 log_exit('config [%s] is missing' % name) 153 symfs_dir = 'binary_cache' 154 if not os.path.isdir(symfs_dir): 155 symfs_dir = None 156 kallsyms = 'binary_cache/kallsyms' 157 if not os.path.isfile(kallsyms): 158 kallsyms = None 159 160 # init member variables 161 self.config = config 162 self.symfs_dir = symfs_dir 163 self.kallsyms = kallsyms 164 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 165 if config.get('pid_filters'): 166 self.pid_filter = {int(x) for x in config['pid_filters']} 167 else: 168 self.pid_filter = None 169 if config.get('tid_filters'): 170 self.tid_filter = {int(x) for x in config['tid_filters']} 171 else: 172 self.tid_filter = None 173 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 174 175 config['annotate_dest_dir'] = 'annotated_files' 176 output_dir = config['annotate_dest_dir'] 177 if os.path.isdir(output_dir): 178 shutil.rmtree(output_dir) 179 os.makedirs(output_dir) 180 181 self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs')) 182 self.period = 0 183 self.dso_periods = {} 184 self.file_periods = {} 185 186 def annotate(self): 187 self._collect_addrs() 188 self._convert_addrs_to_lines() 189 self._generate_periods() 190 self._write_summary() 191 self._annotate_files() 192 193 def _collect_addrs(self): 194 """Read perf.data, collect all addresses we need to convert to 195 source file:line. 196 """ 197 for perf_data in self.config['perf_data_list']: 198 lib = ReportLib() 199 lib.SetRecordFile(perf_data) 200 if self.symfs_dir: 201 lib.SetSymfs(self.symfs_dir) 202 if self.kallsyms: 203 lib.SetKallsymsFile(self.kallsyms) 204 while True: 205 sample = lib.GetNextSample() 206 if sample is None: 207 lib.Close() 208 break 209 if not self._filter_sample(sample): 210 continue 211 symbols = [] 212 symbols.append(lib.GetSymbolOfCurrentSample()) 213 callchain = lib.GetCallChainOfCurrentSample() 214 for i in range(callchain.nr): 215 symbols.append(callchain.entries[i].symbol) 216 for symbol in symbols: 217 if self._filter_symbol(symbol): 218 build_id = lib.GetBuildIdForPath(symbol.dso_name) 219 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 220 symbol.vaddr_in_file) 221 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 222 symbol.symbol_addr) 223 224 def _filter_sample(self, sample): 225 """Return true if the sample can be used.""" 226 if self.comm_filter: 227 if sample.thread_comm not in self.comm_filter: 228 return False 229 if self.pid_filter: 230 if sample.pid not in self.pid_filter: 231 return False 232 if self.tid_filter: 233 if sample.tid not in self.tid_filter: 234 return False 235 return True 236 237 def _filter_symbol(self, symbol): 238 if not self.dso_filter or symbol.dso_name in self.dso_filter: 239 return True 240 return False 241 242 def _convert_addrs_to_lines(self): 243 self.addr2line.convert_addrs_to_lines() 244 245 def _generate_periods(self): 246 """read perf.data, collect Period for all types: 247 binaries, source files, functions, lines. 248 """ 249 for perf_data in self.config['perf_data_list']: 250 lib = ReportLib() 251 lib.SetRecordFile(perf_data) 252 if self.symfs_dir: 253 lib.SetSymfs(self.symfs_dir) 254 if self.kallsyms: 255 lib.SetKallsymsFile(self.kallsyms) 256 while True: 257 sample = lib.GetNextSample() 258 if sample is None: 259 lib.Close() 260 break 261 if not self._filter_sample(sample): 262 continue 263 self._generate_periods_for_sample(lib, sample) 264 265 def _generate_periods_for_sample(self, lib, sample): 266 symbols = [] 267 symbols.append(lib.GetSymbolOfCurrentSample()) 268 callchain = lib.GetCallChainOfCurrentSample() 269 for i in range(callchain.nr): 270 symbols.append(callchain.entries[i].symbol) 271 # Each sample has a callchain, but its period is only used once 272 # to add period for each function/source_line/source_file/binary. 273 # For example, if more than one entry in the callchain hits a 274 # function, the event count of that function is only increased once. 275 # Otherwise, we may get periods > 100%. 276 is_sample_used = False 277 used_dso_dict = {} 278 used_file_dict = {} 279 used_function_dict = {} 280 used_line_dict = {} 281 period = Period(sample.period, sample.period) 282 for j, symbol in enumerate(symbols): 283 if j == 1: 284 period = Period(0, sample.period) 285 if not self._filter_symbol(symbol): 286 continue 287 is_sample_used = True 288 # Add period to dso. 289 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 290 # Add period to source file. 291 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 292 for source in sources: 293 if source.file: 294 self._add_file_period(source, period, used_file_dict) 295 # Add period to line. 296 if source.line: 297 self._add_line_period(source, period, used_line_dict) 298 # Add period to function. 299 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 300 for source in sources: 301 if source.file: 302 self._add_file_period(source, period, used_file_dict) 303 if source.function: 304 self._add_function_period(source, period, used_function_dict) 305 306 if is_sample_used: 307 self.period += sample.period 308 309 def _add_dso_period(self, dso_name, period, used_dso_dict): 310 if dso_name not in used_dso_dict: 311 used_dso_dict[dso_name] = True 312 dso_period = self.dso_periods.get(dso_name) 313 if dso_period is None: 314 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 315 dso_period.add_period(period) 316 317 def _add_file_period(self, source, period, used_file_dict): 318 if source.file_key not in used_file_dict: 319 used_file_dict[source.file_key] = True 320 file_period = self.file_periods.get(source.file) 321 if file_period is None: 322 file_period = self.file_periods[source.file] = FilePeriod(source.file) 323 file_period.add_period(period) 324 325 def _add_line_period(self, source, period, used_line_dict): 326 if source.line_key not in used_line_dict: 327 used_line_dict[source.line_key] = True 328 file_period = self.file_periods[source.file] 329 file_period.add_line_period(source.line, period) 330 331 def _add_function_period(self, source, period, used_function_dict): 332 if source.function_key not in used_function_dict: 333 used_function_dict[source.function_key] = True 334 file_period = self.file_periods[source.file] 335 file_period.add_function_period(source.function, source.line, period) 336 337 def _write_summary(self): 338 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 339 with open(summary, 'w') as f: 340 f.write('total period: %d\n\n' % self.period) 341 dso_periods = sorted(self.dso_periods.values(), 342 key=lambda x: x.period.acc_period, reverse=True) 343 for dso_period in dso_periods: 344 f.write('dso %s: %s\n' % (dso_period.dso_name, 345 self._get_percentage_str(dso_period.period))) 346 f.write('\n') 347 348 file_periods = sorted(self.file_periods.values(), 349 key=lambda x: x.period.acc_period, reverse=True) 350 for file_period in file_periods: 351 f.write('file %s: %s\n' % (file_period.file, 352 self._get_percentage_str(file_period.period))) 353 for file_period in file_periods: 354 f.write('\n\n%s: %s\n' % (file_period.file, 355 self._get_percentage_str(file_period.period))) 356 values = [] 357 for func_name in file_period.function_dict.keys(): 358 func_start_line, period = file_period.function_dict[func_name] 359 values.append((func_name, func_start_line, period)) 360 values = sorted(values, key=lambda x: x[2].acc_period, reverse=True) 361 for value in values: 362 f.write('\tfunction (%s): line %d, %s\n' % ( 363 value[0], value[1], self._get_percentage_str(value[2]))) 364 f.write('\n') 365 for line in sorted(file_period.line_dict.keys()): 366 f.write('\tline %d: %s\n' % ( 367 line, self._get_percentage_str(file_period.line_dict[line]))) 368 369 def _get_percentage_str(self, period, short=False): 370 s = 'acc_p: %f%%, p: %f%%' if short else 'accumulated_period: %f%%, period: %f%%' 371 return s % self._get_percentage(period) 372 373 def _get_percentage(self, period): 374 if self.period == 0: 375 return (0, 0) 376 acc_p = 100.0 * period.acc_period / self.period 377 p = 100.0 * period.period / self.period 378 return (acc_p, p) 379 380 def _annotate_files(self): 381 """Annotate Source files: add acc_period/period for each source file. 382 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 383 2. Annotate c++ source files. 384 """ 385 dest_dir = self.config['annotate_dest_dir'] 386 for key in self.file_periods: 387 from_path = key 388 if not os.path.isfile(from_path): 389 log_warning("can't find source file for path %s" % from_path) 390 continue 391 if from_path.startswith('/'): 392 to_path = os.path.join(dest_dir, from_path[1:]) 393 elif is_windows() and ':\\' in from_path: 394 to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep)) 395 else: 396 to_path = os.path.join(dest_dir, from_path) 397 is_java = from_path.endswith('.java') 398 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 399 400 def _annotate_file(self, from_path, to_path, file_period, is_java): 401 """Annotate a source file. 402 403 Annotate a source file in three steps: 404 1. In the first line, show periods of this file. 405 2. For each function, show periods of this function. 406 3. For each line not hitting the same line as functions, show 407 line periods. 408 """ 409 log_info('annotate file %s' % from_path) 410 with open(from_path, 'r') as rf: 411 lines = rf.readlines() 412 413 annotates = {} 414 for line in file_period.line_dict.keys(): 415 annotates[line] = self._get_percentage_str(file_period.line_dict[line], True) 416 for func_name in file_period.function_dict.keys(): 417 func_start_line, period = file_period.function_dict[func_name] 418 if func_start_line == -1: 419 continue 420 line = func_start_line - 1 if is_java else func_start_line 421 annotates[line] = '[func] ' + self._get_percentage_str(period, True) 422 annotates[1] = '[file] ' + self._get_percentage_str(file_period.period, True) 423 424 max_annotate_cols = 0 425 for key in annotates: 426 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 427 428 empty_annotate = ' ' * (max_annotate_cols + 6) 429 430 dirname = os.path.dirname(to_path) 431 if not os.path.isdir(dirname): 432 os.makedirs(dirname) 433 with open(to_path, 'w') as wf: 434 for line in range(1, len(lines) + 1): 435 annotate = annotates.get(line) 436 if annotate is None: 437 if not lines[line-1].strip(): 438 annotate = '' 439 else: 440 annotate = empty_annotate 441 else: 442 annotate = '/* ' + annotate + ( 443 ' ' * (max_annotate_cols - len(annotate))) + ' */' 444 wf.write(annotate) 445 wf.write(lines[line-1]) 446 447 448def main(): 449 parser = argparse.ArgumentParser(description=""" 450 Annotate source files based on profiling data. It reads line information from binary_cache 451 generated by app_profiler.py or binary_cache_builder.py, and generate annotated source 452 files in annotated_files directory.""") 453 parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help=""" 454 The paths of profiling data. Default is perf.data.""") 455 parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help=""" 456 Directories to find source files.""") 457 parser.add_argument('--comm', nargs='+', action='append', help=""" 458 Use samples only in threads with selected names.""") 459 parser.add_argument('--pid', nargs='+', action='append', help=""" 460 Use samples only in processes with selected process ids.""") 461 parser.add_argument('--tid', nargs='+', action='append', help=""" 462 Use samples only in threads with selected thread ids.""") 463 parser.add_argument('--dso', nargs='+', action='append', help=""" 464 Use samples only in selected binaries.""") 465 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 466 467 args = parser.parse_args() 468 config = {} 469 config['perf_data_list'] = flatten_arg_list(args.perf_data_list) 470 if not config['perf_data_list']: 471 config['perf_data_list'].append('perf.data') 472 config['source_dirs'] = flatten_arg_list(args.source_dirs) 473 config['comm_filters'] = flatten_arg_list(args.comm) 474 config['pid_filters'] = flatten_arg_list(args.pid) 475 config['tid_filters'] = flatten_arg_list(args.tid) 476 config['dso_filters'] = flatten_arg_list(args.dso) 477 config['ndk_path'] = args.ndk_path 478 479 annotator = SourceFileAnnotator(config) 480 annotator.annotate() 481 log_info('annotate finish successfully, please check result in annotated_files/.') 482 483 484if __name__ == '__main__': 485 main() 486