1#!/usr/bin/env python3 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 python app_profiler.py 23 python pprof_proto_generator.py 24 pprof -text pprof.profile 25""" 26 27import argparse 28import os 29import os.path 30 31from simpleperf_report_lib import ReportLib 32from simpleperf_utils import (Addr2Nearestline, BinaryFinder, extant_dir, 33 flatten_arg_list, log_info, log_exit, ReadElf, ToolFinder) 34try: 35 import profile_pb2 36except ImportError: 37 log_exit('google.protobuf module is missing. Please install it first.') 38 39 40def load_pprof_profile(filename): 41 profile = profile_pb2.Profile() 42 with open(filename, "rb") as f: 43 profile.ParseFromString(f.read()) 44 return profile 45 46 47def store_pprof_profile(filename, profile): 48 with open(filename, 'wb') as f: 49 f.write(profile.SerializeToString()) 50 51 52class PprofProfilePrinter(object): 53 54 def __init__(self, profile): 55 self.profile = profile 56 self.string_table = profile.string_table 57 58 def show(self): 59 p = self.profile 60 sub_space = ' ' 61 print('Profile {') 62 print('%d sample_types' % len(p.sample_type)) 63 for i in range(len(p.sample_type)): 64 print('sample_type[%d] = ' % i, end='') 65 self.show_value_type(p.sample_type[i]) 66 print('%d samples' % len(p.sample)) 67 for i in range(len(p.sample)): 68 print('sample[%d]:' % i) 69 self.show_sample(p.sample[i], sub_space) 70 print('%d mappings' % len(p.mapping)) 71 for i in range(len(p.mapping)): 72 print('mapping[%d]:' % i) 73 self.show_mapping(p.mapping[i], sub_space) 74 print('%d locations' % len(p.location)) 75 for i in range(len(p.location)): 76 print('location[%d]:' % i) 77 self.show_location(p.location[i], sub_space) 78 for i in range(len(p.function)): 79 print('function[%d]:' % i) 80 self.show_function(p.function[i], sub_space) 81 print('%d strings' % len(p.string_table)) 82 for i in range(len(p.string_table)): 83 print('string[%d]: %s' % (i, p.string_table[i])) 84 print('drop_frames: %s' % self.string(p.drop_frames)) 85 print('keep_frames: %s' % self.string(p.keep_frames)) 86 print('time_nanos: %u' % p.time_nanos) 87 print('duration_nanos: %u' % p.duration_nanos) 88 print('period_type: ', end='') 89 self.show_value_type(p.period_type) 90 print('period: %u' % p.period) 91 for i in range(len(p.comment)): 92 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 93 print('default_sample_type: %d' % p.default_sample_type) 94 print('} // Profile') 95 print() 96 97 def show_value_type(self, value_type, space=''): 98 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 99 (space, value_type.type, value_type.unit, 100 self.string(value_type.type), self.string(value_type.unit))) 101 102 def show_sample(self, sample, space=''): 103 sub_space = space + ' ' 104 for i in range(len(sample.location_id)): 105 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 106 self.show_location_id(sample.location_id[i], sub_space) 107 for i in range(len(sample.value)): 108 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 109 for i in range(len(sample.label)): 110 print('%slabel[%d] = ', (space, i)) 111 112 def show_location_id(self, location_id, space=''): 113 location = self.profile.location[location_id - 1] 114 self.show_location(location, space) 115 116 def show_location(self, location, space=''): 117 sub_space = space + ' ' 118 print('%sid: %d' % (space, location.id)) 119 print('%smapping_id: %d' % (space, location.mapping_id)) 120 self.show_mapping_id(location.mapping_id, sub_space) 121 print('%saddress: %x' % (space, location.address)) 122 for i in range(len(location.line)): 123 print('%sline[%d]:' % (space, i)) 124 self.show_line(location.line[i], sub_space) 125 126 def show_mapping_id(self, mapping_id, space=''): 127 mapping = self.profile.mapping[mapping_id - 1] 128 self.show_mapping(mapping, space) 129 130 def show_mapping(self, mapping, space=''): 131 print('%sid: %d' % (space, mapping.id)) 132 print('%smemory_start: %x' % (space, mapping.memory_start)) 133 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 134 print('%sfile_offset: %x' % (space, mapping.file_offset)) 135 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 136 mapping.filename)) 137 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 138 mapping.build_id)) 139 print('%shas_functions: %s' % (space, mapping.has_functions)) 140 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 141 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 142 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 143 144 def show_line(self, line, space=''): 145 sub_space = space + ' ' 146 print('%sfunction_id: %d' % (space, line.function_id)) 147 self.show_function_id(line.function_id, sub_space) 148 print('%sline: %d' % (space, line.line)) 149 150 def show_function_id(self, function_id, space=''): 151 function = self.profile.function[function_id - 1] 152 self.show_function(function, space) 153 154 def show_function(self, function, space=''): 155 print('%sid: %d' % (space, function.id)) 156 print('%sname: %s' % (space, self.string(function.name))) 157 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 158 print('%sfilename: %s' % (space, self.string(function.filename))) 159 print('%sstart_line: %d' % (space, function.start_line)) 160 161 def string(self, string_id): 162 return self.string_table[string_id] 163 164 165class Sample(object): 166 167 def __init__(self): 168 self.location_ids = [] 169 self.values = {} 170 171 def add_location_id(self, location_id): 172 self.location_ids.append(location_id) 173 174 def add_value(self, sample_type_id, value): 175 self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value 176 177 def add_values(self, values): 178 for sample_type_id, value in values.items(): 179 self.add_value(sample_type_id, value) 180 181 @property 182 def key(self): 183 return tuple(self.location_ids) 184 185 186class Location(object): 187 188 def __init__(self, mapping_id, address, vaddr_in_dso): 189 self.id = -1 # unset 190 self.mapping_id = mapping_id 191 self.address = address 192 self.vaddr_in_dso = vaddr_in_dso 193 self.lines = [] 194 195 @property 196 def key(self): 197 return (self.mapping_id, self.address) 198 199 200class Line(object): 201 202 def __init__(self): 203 self.function_id = 0 204 self.line = 0 205 206 207class Mapping(object): 208 209 def __init__(self, start, end, pgoff, filename_id, build_id_id): 210 self.id = -1 # unset 211 self.memory_start = start 212 self.memory_limit = end 213 self.file_offset = pgoff 214 self.filename_id = filename_id 215 self.build_id_id = build_id_id 216 217 @property 218 def key(self): 219 return ( 220 self.memory_start, 221 self.memory_limit, 222 self.file_offset, 223 self.filename_id, 224 self.build_id_id) 225 226 227class Function(object): 228 229 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 230 self.id = -1 # unset 231 self.name_id = name_id 232 self.dso_name_id = dso_name_id 233 self.vaddr_in_dso = vaddr_in_dso 234 self.source_filename_id = 0 235 self.start_line = 0 236 237 @property 238 def key(self): 239 return (self.name_id, self.dso_name_id) 240 241 242# pylint: disable=no-member 243class PprofProfileGenerator(object): 244 245 def __init__(self, config): 246 self.config = config 247 self.lib = None 248 249 config['binary_cache_dir'] = 'binary_cache' 250 if not os.path.isdir(config['binary_cache_dir']): 251 config['binary_cache_dir'] = None 252 self.comm_filter = set(config['comm_filters']) if config.get('comm_filters') else None 253 if config.get('pid_filters'): 254 self.pid_filter = {int(x) for x in config['pid_filters']} 255 else: 256 self.pid_filter = None 257 if config.get('tid_filters'): 258 self.tid_filter = {int(x) for x in config['tid_filters']} 259 else: 260 self.tid_filter = None 261 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 262 self.max_chain_length = config['max_chain_length'] 263 self.profile = profile_pb2.Profile() 264 self.profile.string_table.append('') 265 self.string_table = {} 266 self.sample_types = {} 267 self.sample_map = {} 268 self.sample_list = [] 269 self.location_map = {} 270 self.location_list = [] 271 self.mapping_map = {} 272 self.mapping_list = [] 273 self.function_map = {} 274 self.function_list = [] 275 276 # Map from dso_name in perf.data to (binary path, build_id). 277 self.binary_map = {} 278 self.read_elf = ReadElf(self.config['ndk_path']) 279 self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf) 280 281 def load_record_file(self, record_file): 282 self.lib = ReportLib() 283 self.lib.SetRecordFile(record_file) 284 285 if self.config['binary_cache_dir']: 286 self.lib.SetSymfs(self.config['binary_cache_dir']) 287 kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms') 288 if os.path.isfile(kallsyms): 289 self.lib.SetKallsymsFile(kallsyms) 290 291 if self.config.get('show_art_frames'): 292 self.lib.ShowArtFrames() 293 for file_path in self.config['proguard_mapping_file'] or []: 294 self.lib.AddProguardMappingFile(file_path) 295 296 # Process all samples in perf.data, aggregate samples. 297 while True: 298 report_sample = self.lib.GetNextSample() 299 if report_sample is None: 300 self.lib.Close() 301 self.lib = None 302 break 303 event = self.lib.GetEventOfCurrentSample() 304 symbol = self.lib.GetSymbolOfCurrentSample() 305 callchain = self.lib.GetCallChainOfCurrentSample() 306 307 if not self._filter_report_sample(report_sample): 308 continue 309 310 sample_type_id = self.get_sample_type_id(event.name) 311 sample = Sample() 312 sample.add_value(sample_type_id, 1) 313 sample.add_value(sample_type_id + 1, report_sample.period) 314 if self._filter_symbol(symbol): 315 location_id = self.get_location_id(report_sample.ip, symbol) 316 sample.add_location_id(location_id) 317 for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): 318 entry = callchain.entries[i] 319 if self._filter_symbol(symbol): 320 location_id = self.get_location_id(entry.ip, entry.symbol) 321 sample.add_location_id(location_id) 322 if sample.location_ids: 323 self.add_sample(sample) 324 325 def gen(self): 326 # 1. Generate line info for locations and functions. 327 self.gen_source_lines() 328 329 # 2. Produce samples/locations/functions in profile. 330 for sample in self.sample_list: 331 self.gen_profile_sample(sample) 332 for mapping in self.mapping_list: 333 self.gen_profile_mapping(mapping) 334 for location in self.location_list: 335 self.gen_profile_location(location) 336 for function in self.function_list: 337 self.gen_profile_function(function) 338 339 return self.profile 340 341 def _filter_report_sample(self, sample): 342 """Return true if the sample can be used.""" 343 if self.comm_filter: 344 if sample.thread_comm not in self.comm_filter: 345 return False 346 if self.pid_filter: 347 if sample.pid not in self.pid_filter: 348 return False 349 if self.tid_filter: 350 if sample.tid not in self.tid_filter: 351 return False 352 return True 353 354 def _filter_symbol(self, symbol): 355 if not self.dso_filter or symbol.dso_name in self.dso_filter: 356 return True 357 return False 358 359 def get_string_id(self, str_value): 360 if not str_value: 361 return 0 362 str_id = self.string_table.get(str_value) 363 if str_id is not None: 364 return str_id 365 str_id = len(self.string_table) + 1 366 self.string_table[str_value] = str_id 367 self.profile.string_table.append(str_value) 368 return str_id 369 370 def get_string(self, str_id): 371 return self.profile.string_table[str_id] 372 373 def get_sample_type_id(self, name): 374 sample_type_id = self.sample_types.get(name) 375 if sample_type_id is not None: 376 return sample_type_id 377 sample_type_id = len(self.profile.sample_type) 378 sample_type = self.profile.sample_type.add() 379 sample_type.type = self.get_string_id('event_' + name + '_samples') 380 sample_type.unit = self.get_string_id('count') 381 sample_type = self.profile.sample_type.add() 382 sample_type.type = self.get_string_id('event_' + name + '_count') 383 sample_type.unit = self.get_string_id('count') 384 self.sample_types[name] = sample_type_id 385 return sample_type_id 386 387 def get_location_id(self, ip, symbol): 388 binary_path, build_id = self.get_binary(symbol.dso_name) 389 mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id) 390 location = Location(mapping_id, ip, symbol.vaddr_in_file) 391 function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr) 392 if function_id: 393 # Add Line only when it has a valid function id, see http://b/36988814. 394 # Default line info only contains the function name 395 line = Line() 396 line.function_id = function_id 397 location.lines.append(line) 398 399 exist_location = self.location_map.get(location.key) 400 if exist_location: 401 return exist_location.id 402 # location_id starts from 1 403 location.id = len(self.location_list) + 1 404 self.location_list.append(location) 405 self.location_map[location.key] = location 406 return location.id 407 408 def get_mapping_id(self, report_mapping, filename, build_id): 409 filename_id = self.get_string_id(filename) 410 build_id_id = self.get_string_id(build_id) 411 mapping = Mapping(report_mapping.start, report_mapping.end, 412 report_mapping.pgoff, filename_id, build_id_id) 413 exist_mapping = self.mapping_map.get(mapping.key) 414 if exist_mapping: 415 return exist_mapping.id 416 # mapping_id starts from 1 417 mapping.id = len(self.mapping_list) + 1 418 self.mapping_list.append(mapping) 419 self.mapping_map[mapping.key] = mapping 420 return mapping.id 421 422 def get_binary(self, dso_name): 423 """ Return (binary_path, build_id) for a given dso_name. """ 424 value = self.binary_map.get(dso_name) 425 if value: 426 return value 427 428 binary_path = dso_name 429 build_id = '' 430 431 # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. 432 # So read build id from the binary in binary_cache, and check it with build id in 433 # perf.data. 434 build_id_in_perf_data = self.lib.GetBuildIdForPath(dso_name) 435 # Try elf_path in binary cache. 436 elf_path = self.binary_finder.find_binary(dso_name, build_id_in_perf_data) 437 if elf_path: 438 build_id = build_id_in_perf_data 439 binary_path = str(elf_path) 440 441 # When there is no matching elf_path, try converting build_id in perf.data. 442 if not build_id and build_id_in_perf_data.startswith('0x'): 443 # Fallback to the way used by TrimZeroesFromBuildIDString() in quipper. 444 build_id = build_id_in_perf_data[2:] # remove '0x' 445 padding = '0' * 8 446 while build_id.endswith(padding): 447 build_id = build_id[:-len(padding)] 448 449 self.binary_map[dso_name] = (binary_path, build_id) 450 return (binary_path, build_id) 451 452 def get_mapping(self, mapping_id): 453 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 454 455 def get_function_id(self, name, dso_name, vaddr_in_file): 456 if name == 'unknown': 457 return 0 458 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 459 exist_function = self.function_map.get(function.key) 460 if exist_function: 461 return exist_function.id 462 # function_id starts from 1 463 function.id = len(self.function_list) + 1 464 self.function_list.append(function) 465 self.function_map[function.key] = function 466 return function.id 467 468 def get_function(self, function_id): 469 return self.function_list[function_id - 1] if function_id > 0 else None 470 471 def add_sample(self, sample): 472 exist_sample = self.sample_map.get(sample.key) 473 if exist_sample: 474 exist_sample.add_values(sample.values) 475 else: 476 self.sample_list.append(sample) 477 self.sample_map[sample.key] = sample 478 479 def gen_source_lines(self): 480 # 1. Create Addr2line instance 481 if not self.config.get('binary_cache_dir'): 482 log_info("Can't generate line information because binary_cache is missing.") 483 return 484 if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']): 485 log_info("Can't generate line information because can't find llvm-symbolizer.") 486 return 487 # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to 488 # pass binary_cache_dir to BinaryFinder. 489 binary_finder = BinaryFinder(None, self.read_elf) 490 addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True) 491 492 # 2. Put all needed addresses to it. 493 for location in self.location_list: 494 mapping = self.get_mapping(location.mapping_id) 495 dso_name = self.get_string(mapping.filename_id) 496 if location.lines: 497 function = self.get_function(location.lines[0].function_id) 498 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, location.vaddr_in_dso) 499 for function in self.function_list: 500 dso_name = self.get_string(function.dso_name_id) 501 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso) 502 503 # 3. Generate source lines. 504 addr2line.convert_addrs_to_lines() 505 506 # 4. Annotate locations and functions. 507 for location in self.location_list: 508 if not location.lines: 509 continue 510 mapping = self.get_mapping(location.mapping_id) 511 dso_name = self.get_string(mapping.filename_id) 512 dso = addr2line.get_dso(dso_name) 513 if not dso: 514 continue 515 sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) 516 if not sources: 517 continue 518 for (source_id, source) in enumerate(sources): 519 source_file, source_line, function_name = source 520 function_id = self.get_function_id(function_name, dso_name, 0) 521 if function_id == 0: 522 continue 523 if source_id == 0: 524 # Clear default line info 525 location.lines = [] 526 location.lines.append(self.add_line(source_file, source_line, function_id)) 527 528 for function in self.function_list: 529 dso_name = self.get_string(function.dso_name_id) 530 if function.vaddr_in_dso: 531 dso = addr2line.get_dso(dso_name) 532 if not dso: 533 continue 534 sources = addr2line.get_addr_source(dso, function.vaddr_in_dso) 535 if sources: 536 source_file, source_line, _ = sources[0] 537 function.source_filename_id = self.get_string_id(source_file) 538 function.start_line = source_line 539 540 def add_line(self, source_file, source_line, function_id): 541 line = Line() 542 function = self.get_function(function_id) 543 function.source_filename_id = self.get_string_id(source_file) 544 line.function_id = function_id 545 line.line = source_line 546 return line 547 548 def gen_profile_sample(self, sample): 549 profile_sample = self.profile.sample.add() 550 profile_sample.location_id.extend(sample.location_ids) 551 sample_type_count = len(self.sample_types) * 2 552 values = [0] * sample_type_count 553 for sample_type_id in sample.values: 554 values[sample_type_id] = sample.values[sample_type_id] 555 profile_sample.value.extend(values) 556 557 def gen_profile_mapping(self, mapping): 558 profile_mapping = self.profile.mapping.add() 559 profile_mapping.id = mapping.id 560 profile_mapping.memory_start = mapping.memory_start 561 profile_mapping.memory_limit = mapping.memory_limit 562 profile_mapping.file_offset = mapping.file_offset 563 profile_mapping.filename = mapping.filename_id 564 profile_mapping.build_id = mapping.build_id_id 565 profile_mapping.has_filenames = True 566 profile_mapping.has_functions = True 567 if self.config.get('binary_cache_dir'): 568 profile_mapping.has_line_numbers = True 569 profile_mapping.has_inline_frames = True 570 else: 571 profile_mapping.has_line_numbers = False 572 profile_mapping.has_inline_frames = False 573 574 def gen_profile_location(self, location): 575 profile_location = self.profile.location.add() 576 profile_location.id = location.id 577 profile_location.mapping_id = location.mapping_id 578 profile_location.address = location.address 579 for i in range(len(location.lines)): 580 line = profile_location.line.add() 581 line.function_id = location.lines[i].function_id 582 line.line = location.lines[i].line 583 584 def gen_profile_function(self, function): 585 profile_function = self.profile.function.add() 586 profile_function.id = function.id 587 profile_function.name = function.name_id 588 profile_function.system_name = function.name_id 589 profile_function.filename = function.source_filename_id 590 profile_function.start_line = function.start_line 591 592 593def main(): 594 parser = argparse.ArgumentParser(description='Generate pprof profile data in pprof.profile.') 595 parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') 596 parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help=""" 597 Set profiling data file to report. Default is perf.data""") 598 parser.add_argument('-o', '--output_file', default='pprof.profile', help=""" 599 The path of generated pprof profile data.""") 600 parser.add_argument('--comm', nargs='+', action='append', help=""" 601 Use samples only in threads with selected names.""") 602 parser.add_argument('--pid', nargs='+', action='append', help=""" 603 Use samples only in processes with selected process ids.""") 604 parser.add_argument('--tid', nargs='+', action='append', help=""" 605 Use samples only in threads with selected thread ids.""") 606 parser.add_argument('--dso', nargs='+', action='append', help=""" 607 Use samples only in selected binaries.""") 608 parser.add_argument('--max_chain_length', type=int, default=1000000000, help=""" 609 Maximum depth of samples to be converted.""") # Large value as infinity standin. 610 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 611 parser.add_argument('--show_art_frames', action='store_true', 612 help='Show frames of internal methods in the ART Java interpreter.') 613 parser.add_argument( 614 '--proguard-mapping-file', nargs='+', 615 help='Add proguard mapping file to de-obfuscate symbols') 616 617 args = parser.parse_args() 618 if args.show: 619 show_file = args.show[0] if args.show[0] else 'pprof.profile' 620 profile = load_pprof_profile(show_file) 621 printer = PprofProfilePrinter(profile) 622 printer.show() 623 return 624 625 config = {} 626 config['output_file'] = args.output_file 627 config['comm_filters'] = flatten_arg_list(args.comm) 628 config['pid_filters'] = flatten_arg_list(args.pid) 629 config['tid_filters'] = flatten_arg_list(args.tid) 630 config['dso_filters'] = flatten_arg_list(args.dso) 631 config['ndk_path'] = args.ndk_path 632 config['show_art_frames'] = args.show_art_frames 633 config['max_chain_length'] = args.max_chain_length 634 config['proguard_mapping_file'] = args.proguard_mapping_file 635 generator = PprofProfileGenerator(config) 636 for record_file in args.record_file: 637 generator.load_record_file(record_file) 638 profile = generator.gen() 639 store_pprof_profile(config['output_file'], profile) 640 641 642if __name__ == '__main__': 643 main() 644