1#!/usr/bin/env python3 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""annotate.py: annotate source files based on perf.data. 19""" 20 21import logging 22import os 23import os.path 24import shutil 25from texttable import Texttable 26from typing import Dict, Union 27 28from simpleperf_report_lib import ReportLib 29from simpleperf_utils import ( 30 Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, flatten_arg_list, is_windows, 31 log_exit, ReadElf, SourceFileSearcher) 32 33 34class SourceLine(object): 35 def __init__(self, file_id, function, line): 36 self.file = file_id 37 self.function = function 38 self.line = line 39 40 @property 41 def file_key(self): 42 return self.file 43 44 @property 45 def function_key(self): 46 return (self.file, self.function) 47 48 @property 49 def line_key(self): 50 return (self.file, self.line) 51 52 53class Addr2Line(object): 54 """collect information of how to map [dso_name, vaddr] to [source_file:line]. 55 """ 56 57 def __init__(self, ndk_path, binary_cache_path, source_dirs): 58 binary_finder = BinaryFinder(binary_cache_path, ReadElf(ndk_path)) 59 self.addr2line = Addr2Nearestline(ndk_path, binary_finder, True) 60 self.source_searcher = SourceFileSearcher(source_dirs) 61 62 def add_addr(self, dso_path: str, build_id: str, func_addr: int, addr: int): 63 self.addr2line.add_addr(dso_path, build_id, func_addr, addr) 64 65 def convert_addrs_to_lines(self): 66 self.addr2line.convert_addrs_to_lines(jobs=os.cpu_count()) 67 68 def get_sources(self, dso_path, addr): 69 dso = self.addr2line.get_dso(dso_path) 70 if not dso: 71 return [] 72 source = self.addr2line.get_addr_source(dso, addr) 73 if not source: 74 return [] 75 result = [] 76 for (source_file, source_line, function_name) in source: 77 source_file_path = self.source_searcher.get_real_path(source_file) 78 if not source_file_path: 79 source_file_path = source_file 80 result.append(SourceLine(source_file_path, function_name, source_line)) 81 return result 82 83 84class Period(object): 85 """event count information. It can be used to represent event count 86 of a line, a function, a source file, or a binary. It contains two 87 parts: period and acc_period. 88 When used for a line, period is the event count occurred when running 89 that line, acc_period is the accumulated event count occurred when 90 running that line and functions called by that line. Same thing applies 91 when it is used for a function, a source file, or a binary. 92 """ 93 94 def __init__(self, period=0, acc_period=0): 95 self.period = period 96 self.acc_period = acc_period 97 98 def __iadd__(self, other): 99 self.period += other.period 100 self.acc_period += other.acc_period 101 return self 102 103 104class DsoPeriod(object): 105 """Period for each shared library""" 106 107 def __init__(self, dso_name): 108 self.dso_name = dso_name 109 self.period = Period() 110 111 def add_period(self, period): 112 self.period += period 113 114 115class FilePeriod(object): 116 """Period for each source file""" 117 118 def __init__(self, file_id): 119 self.file = file_id 120 self.period = Period() 121 # Period for each line in the file. 122 self.line_dict = {} 123 # Period for each function in the source file. 124 self.function_dict = {} 125 126 def add_period(self, period): 127 self.period += period 128 129 def add_line_period(self, line, period): 130 a = self.line_dict.get(line) 131 if a is None: 132 self.line_dict[line] = a = Period() 133 a += period 134 135 def add_function_period(self, function_name, function_start_line, period): 136 a = self.function_dict.get(function_name) 137 if not a: 138 if function_start_line is None: 139 function_start_line = -1 140 self.function_dict[function_name] = a = [function_start_line, Period()] 141 a[1] += period 142 143 144class SourceFileAnnotator(object): 145 """group code for annotating source files""" 146 147 def __init__(self, config): 148 # check config variables 149 config_names = ['perf_data_list', 'source_dirs', 'dso_filters', 'ndk_path'] 150 for name in config_names: 151 if name not in config: 152 log_exit('config [%s] is missing' % name) 153 symfs_dir = 'binary_cache' 154 if not os.path.isdir(symfs_dir): 155 symfs_dir = None 156 kallsyms = 'binary_cache/kallsyms' 157 if not os.path.isfile(kallsyms): 158 kallsyms = None 159 160 # init member variables 161 self.config = config 162 self.symfs_dir = symfs_dir 163 self.kallsyms = kallsyms 164 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 165 166 config['annotate_dest_dir'] = 'annotated_files' 167 output_dir = config['annotate_dest_dir'] 168 if os.path.isdir(output_dir): 169 shutil.rmtree(output_dir) 170 os.makedirs(output_dir) 171 172 self.addr2line = Addr2Line(self.config['ndk_path'], symfs_dir, config.get('source_dirs')) 173 self.period = 0 174 self.dso_periods = {} 175 self.file_periods = {} 176 177 def annotate(self): 178 self._collect_addrs() 179 self._convert_addrs_to_lines() 180 self._generate_periods() 181 self._write_summary() 182 self._annotate_files() 183 184 def _collect_addrs(self): 185 """Read perf.data, collect all addresses we need to convert to 186 source file:line. 187 """ 188 for perf_data in self.config['perf_data_list']: 189 lib = ReportLib() 190 lib.SetRecordFile(perf_data) 191 if self.symfs_dir: 192 lib.SetSymfs(self.symfs_dir) 193 if self.kallsyms: 194 lib.SetKallsymsFile(self.kallsyms) 195 lib.SetReportOptions(self.config['report_lib_options']) 196 while True: 197 sample = lib.GetNextSample() 198 if sample is None: 199 lib.Close() 200 break 201 symbols = [] 202 symbols.append(lib.GetSymbolOfCurrentSample()) 203 callchain = lib.GetCallChainOfCurrentSample() 204 for i in range(callchain.nr): 205 symbols.append(callchain.entries[i].symbol) 206 for symbol in symbols: 207 if self._filter_symbol(symbol): 208 build_id = lib.GetBuildIdForPath(symbol.dso_name) 209 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 210 symbol.vaddr_in_file) 211 self.addr2line.add_addr(symbol.dso_name, build_id, symbol.symbol_addr, 212 symbol.symbol_addr) 213 214 def _filter_symbol(self, symbol): 215 if not self.dso_filter or symbol.dso_name in self.dso_filter: 216 return True 217 return False 218 219 def _convert_addrs_to_lines(self): 220 self.addr2line.convert_addrs_to_lines() 221 222 def _generate_periods(self): 223 """read perf.data, collect Period for all types: 224 binaries, source files, functions, lines. 225 """ 226 for perf_data in self.config['perf_data_list']: 227 lib = ReportLib() 228 lib.SetRecordFile(perf_data) 229 if self.symfs_dir: 230 lib.SetSymfs(self.symfs_dir) 231 if self.kallsyms: 232 lib.SetKallsymsFile(self.kallsyms) 233 lib.SetReportOptions(self.config['report_lib_options']) 234 while True: 235 sample = lib.GetNextSample() 236 if sample is None: 237 lib.Close() 238 break 239 self._generate_periods_for_sample(lib, sample) 240 241 def _generate_periods_for_sample(self, lib, sample): 242 symbols = [] 243 symbols.append(lib.GetSymbolOfCurrentSample()) 244 callchain = lib.GetCallChainOfCurrentSample() 245 for i in range(callchain.nr): 246 symbols.append(callchain.entries[i].symbol) 247 # Each sample has a callchain, but its period is only used once 248 # to add period for each function/source_line/source_file/binary. 249 # For example, if more than one entry in the callchain hits a 250 # function, the event count of that function is only increased once. 251 # Otherwise, we may get periods > 100%. 252 is_sample_used = False 253 used_dso_dict = {} 254 used_file_dict = {} 255 used_function_dict = {} 256 used_line_dict = {} 257 period = Period(sample.period, sample.period) 258 for j, symbol in enumerate(symbols): 259 if j == 1: 260 period = Period(0, sample.period) 261 if not self._filter_symbol(symbol): 262 continue 263 is_sample_used = True 264 # Add period to dso. 265 self._add_dso_period(symbol.dso_name, period, used_dso_dict) 266 # Add period to source file. 267 sources = self.addr2line.get_sources(symbol.dso_name, symbol.vaddr_in_file) 268 for source in sources: 269 if source.file: 270 self._add_file_period(source, period, used_file_dict) 271 # Add period to line. 272 if source.line: 273 self._add_line_period(source, period, used_line_dict) 274 # Add period to function. 275 sources = self.addr2line.get_sources(symbol.dso_name, symbol.symbol_addr) 276 for source in sources: 277 if source.file: 278 self._add_file_period(source, period, used_file_dict) 279 if source.function: 280 self._add_function_period(source, period, used_function_dict) 281 282 if is_sample_used: 283 self.period += sample.period 284 285 def _add_dso_period(self, dso_name: str, period: Period, used_dso_dict: Dict[str, bool]): 286 if dso_name not in used_dso_dict: 287 used_dso_dict[dso_name] = True 288 dso_period = self.dso_periods.get(dso_name) 289 if dso_period is None: 290 dso_period = self.dso_periods[dso_name] = DsoPeriod(dso_name) 291 dso_period.add_period(period) 292 293 def _add_file_period(self, source, period, used_file_dict): 294 if source.file_key not in used_file_dict: 295 used_file_dict[source.file_key] = True 296 file_period = self.file_periods.get(source.file) 297 if file_period is None: 298 file_period = self.file_periods[source.file] = FilePeriod(source.file) 299 file_period.add_period(period) 300 301 def _add_line_period(self, source, period, used_line_dict): 302 if source.line_key not in used_line_dict: 303 used_line_dict[source.line_key] = True 304 file_period = self.file_periods[source.file] 305 file_period.add_line_period(source.line, period) 306 307 def _add_function_period(self, source, period, used_function_dict): 308 if source.function_key not in used_function_dict: 309 used_function_dict[source.function_key] = True 310 file_period = self.file_periods[source.file] 311 file_period.add_function_period(source.function, source.line, period) 312 313 def _write_summary(self): 314 summary = os.path.join(self.config['annotate_dest_dir'], 'summary') 315 with open(summary, 'w') as f: 316 f.write('total period: %d\n\n' % self.period) 317 self._write_dso_summary(f) 318 self._write_file_summary(f) 319 320 file_periods = sorted(self.file_periods.values(), 321 key=lambda x: x.period.acc_period, reverse=True) 322 for file_period in file_periods: 323 self._write_function_line_summary(f, file_period) 324 325 def _write_dso_summary(self, summary_fh): 326 dso_periods = sorted(self.dso_periods.values(), 327 key=lambda x: x.period.acc_period, reverse=True) 328 table = Texttable(max_width=self.config['summary_width']) 329 table.set_cols_align(['l', 'l', 'l']) 330 table.add_row(['Total', 'Self', 'DSO']) 331 for dso_period in dso_periods: 332 total_str = self._get_period_str(dso_period.period.acc_period) 333 self_str = self._get_period_str(dso_period.period.period) 334 table.add_row([total_str, self_str, dso_period.dso_name]) 335 print(table.draw(), file=summary_fh) 336 print(file=summary_fh) 337 338 def _write_file_summary(self, summary_fh): 339 file_periods = sorted(self.file_periods.values(), 340 key=lambda x: x.period.acc_period, reverse=True) 341 table = Texttable(max_width=self.config['summary_width']) 342 table.set_cols_align(['l', 'l', 'l']) 343 table.add_row(['Total', 'Self', 'Source File']) 344 for file_period in file_periods: 345 total_str = self._get_period_str(file_period.period.acc_period) 346 self_str = self._get_period_str(file_period.period.period) 347 table.add_row([total_str, self_str, file_period.file]) 348 print(table.draw(), file=summary_fh) 349 print(file=summary_fh) 350 351 def _write_function_line_summary(self, summary_fh, file_period: FilePeriod): 352 table = Texttable(max_width=self.config['summary_width']) 353 table.set_cols_align(['l', 'l', 'l']) 354 table.add_row(['Total', 'Self', 'Function/Line in ' + file_period.file]) 355 values = [] 356 for func_name in file_period.function_dict.keys(): 357 func_start_line, period = file_period.function_dict[func_name] 358 values.append((func_name, func_start_line, period)) 359 values.sort(key=lambda x: x[2].acc_period, reverse=True) 360 for func_name, func_start_line, period in values: 361 total_str = self._get_period_str(period.acc_period) 362 self_str = self._get_period_str(period.period) 363 name = func_name + ' (line %d)' % func_start_line 364 table.add_row([total_str, self_str, name]) 365 for line in sorted(file_period.line_dict.keys()): 366 period = file_period.line_dict[line] 367 total_str = self._get_period_str(period.acc_period) 368 self_str = self._get_period_str(period.period) 369 name = 'line %d' % line 370 table.add_row([total_str, self_str, name]) 371 372 print(table.draw(), file=summary_fh) 373 print(file=summary_fh) 374 375 def _get_period_str(self, period: Union[Period, int]) -> str: 376 if isinstance(period, Period): 377 return 'Total %s, Self %s' % ( 378 self._get_period_str(period.acc_period), 379 self._get_period_str(period.period)) 380 if self.config['raw_period'] or self.period == 0: 381 return str(period) 382 return '%.2f%%' % (100.0 * period / self.period) 383 384 def _annotate_files(self): 385 """Annotate Source files: add acc_period/period for each source file. 386 1. Annotate java source files, which have $JAVA_SRC_ROOT prefix. 387 2. Annotate c++ source files. 388 """ 389 dest_dir = self.config['annotate_dest_dir'] 390 for key in self.file_periods: 391 from_path = key 392 if not os.path.isfile(from_path): 393 logging.warning("can't find source file for path %s" % from_path) 394 continue 395 if from_path.startswith('/'): 396 to_path = os.path.join(dest_dir, from_path[1:]) 397 elif is_windows() and ':\\' in from_path: 398 to_path = os.path.join(dest_dir, from_path.replace(':\\', os.sep)) 399 else: 400 to_path = os.path.join(dest_dir, from_path) 401 is_java = from_path.endswith('.java') 402 self._annotate_file(from_path, to_path, self.file_periods[key], is_java) 403 404 def _annotate_file(self, from_path, to_path, file_period, is_java): 405 """Annotate a source file. 406 407 Annotate a source file in three steps: 408 1. In the first line, show periods of this file. 409 2. For each function, show periods of this function. 410 3. For each line not hitting the same line as functions, show 411 line periods. 412 """ 413 logging.info('annotate file %s' % from_path) 414 with open(from_path, 'r') as rf: 415 lines = rf.readlines() 416 417 annotates = {} 418 for line in file_period.line_dict.keys(): 419 annotates[line] = self._get_period_str(file_period.line_dict[line]) 420 for func_name in file_period.function_dict.keys(): 421 func_start_line, period = file_period.function_dict[func_name] 422 if func_start_line == -1: 423 continue 424 line = func_start_line - 1 if is_java else func_start_line 425 annotates[line] = '[func] ' + self._get_period_str(period) 426 annotates[1] = '[file] ' + self._get_period_str(file_period.period) 427 428 max_annotate_cols = 0 429 for key in annotates: 430 max_annotate_cols = max(max_annotate_cols, len(annotates[key])) 431 432 empty_annotate = ' ' * (max_annotate_cols + 6) 433 434 dirname = os.path.dirname(to_path) 435 if not os.path.isdir(dirname): 436 os.makedirs(dirname) 437 with open(to_path, 'w') as wf: 438 for line in range(1, len(lines) + 1): 439 annotate = annotates.get(line) 440 if annotate is None: 441 if not lines[line-1].strip(): 442 annotate = '' 443 else: 444 annotate = empty_annotate 445 else: 446 annotate = '/* ' + annotate + ( 447 ' ' * (max_annotate_cols - len(annotate))) + ' */' 448 wf.write(annotate) 449 wf.write(lines[line-1]) 450 451 452def main(): 453 parser = BaseArgumentParser(description=""" 454 Annotate source files based on profiling data. It reads line information from binary_cache 455 generated by app_profiler.py or binary_cache_builder.py, and generate annotated source 456 files in annotated_files directory.""") 457 parser.add_argument('-i', '--perf_data_list', nargs='+', action='append', help=""" 458 The paths of profiling data. Default is perf.data.""") 459 parser.add_argument('-s', '--source_dirs', type=extant_dir, nargs='+', action='append', help=""" 460 Directories to find source files.""") 461 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 462 parser.add_argument('--raw-period', action='store_true', 463 help='show raw period instead of percentage') 464 parser.add_argument('--summary-width', type=int, default=80, help='max width of summary file') 465 sample_filter_group = parser.add_argument_group('Sample filter options') 466 sample_filter_group.add_argument('--dso', nargs='+', action='append', help=""" 467 Use samples only in selected binaries.""") 468 parser.add_report_lib_options(sample_filter_group=sample_filter_group) 469 470 args = parser.parse_args() 471 config = {} 472 config['perf_data_list'] = flatten_arg_list(args.perf_data_list) 473 if not config['perf_data_list']: 474 config['perf_data_list'].append('perf.data') 475 config['source_dirs'] = flatten_arg_list(args.source_dirs) 476 config['dso_filters'] = flatten_arg_list(args.dso) 477 config['ndk_path'] = args.ndk_path 478 config['raw_period'] = args.raw_period 479 config['summary_width'] = args.summary_width 480 config['report_lib_options'] = args.report_lib_options 481 482 annotator = SourceFileAnnotator(config) 483 annotator.annotate() 484 logging.info('annotate finish successfully, please check result in annotated_files/.') 485 486 487if __name__ == '__main__': 488 main() 489