1#!/usr/bin/env python3 2# 3# Copyright (C) 2017 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""pprof_proto_generator.py: read perf.data, generate pprof.profile, which can be 19 used by pprof. 20 21 Example: 22 ./app_profiler.py 23 ./pprof_proto_generator.py 24 pprof -text pprof.profile 25""" 26 27import logging 28import os 29import os.path 30import re 31import sys 32 33from simpleperf_report_lib import ReportLib 34from simpleperf_utils import (Addr2Nearestline, BaseArgumentParser, BinaryFinder, extant_dir, 35 flatten_arg_list, log_exit, ReadElf, ToolFinder) 36try: 37 import profile_pb2 38except ImportError as e: 39 log_exit(f'{e}\nprotobuf package is missing or too old. Please install it like ' + 40 '`pip install protobuf==4.21`.') 41 42 43# Some units of common event names 44EVENT_UNITS = { 45 'cpu-clock': 'nanoseconds', 46 'cpu-cycles': 'cpu-cycles', 47 'instructions': 'instructions', 48 'task-clock': 'nanoseconds', 49} 50 51 52def load_pprof_profile(filename): 53 profile = profile_pb2.Profile() 54 with open(filename, "rb") as f: 55 profile.ParseFromString(f.read()) 56 return profile 57 58 59def store_pprof_profile(filename, profile): 60 with open(filename, 'wb') as f: 61 f.write(profile.SerializeToString()) 62 63 64class PprofProfilePrinter(object): 65 66 def __init__(self, profile): 67 self.profile = profile 68 self.string_table = profile.string_table 69 70 def show(self): 71 p = self.profile 72 sub_space = ' ' 73 print('Profile {') 74 print('%d sample_types' % len(p.sample_type)) 75 for i in range(len(p.sample_type)): 76 print('sample_type[%d] = ' % i, end='') 77 self.show_value_type(p.sample_type[i]) 78 print('%d samples' % len(p.sample)) 79 for i in range(len(p.sample)): 80 print('sample[%d]:' % i) 81 self.show_sample(p.sample[i], sub_space) 82 print('%d mappings' % len(p.mapping)) 83 for i in range(len(p.mapping)): 84 print('mapping[%d]:' % i) 85 self.show_mapping(p.mapping[i], sub_space) 86 print('%d locations' % len(p.location)) 87 for i in range(len(p.location)): 88 print('location[%d]:' % i) 89 self.show_location(p.location[i], sub_space) 90 for i in range(len(p.function)): 91 print('function[%d]:' % i) 92 self.show_function(p.function[i], sub_space) 93 print('%d strings' % len(p.string_table)) 94 for i in range(len(p.string_table)): 95 print('string[%d]: %s' % (i, p.string_table[i])) 96 print('drop_frames: %s' % self.string(p.drop_frames)) 97 print('keep_frames: %s' % self.string(p.keep_frames)) 98 print('time_nanos: %u' % p.time_nanos) 99 print('duration_nanos: %u' % p.duration_nanos) 100 print('period_type: ', end='') 101 self.show_value_type(p.period_type) 102 print('period: %u' % p.period) 103 for i in range(len(p.comment)): 104 print('comment[%d] = %s' % (i, self.string(p.comment[i]))) 105 print('default_sample_type: %d' % p.default_sample_type) 106 print('} // Profile') 107 print() 108 109 def show_value_type(self, value_type, space=''): 110 print('%sValueType(typeID=%d, unitID=%d, type=%s, unit=%s)' % 111 (space, value_type.type, value_type.unit, 112 self.string(value_type.type), self.string(value_type.unit))) 113 114 def show_sample(self, sample, space=''): 115 sub_space = space + ' ' 116 for i in range(len(sample.location_id)): 117 print('%slocation_id[%d]: id %d' % (space, i, sample.location_id[i])) 118 self.show_location_id(sample.location_id[i], sub_space) 119 for i in range(len(sample.value)): 120 print('%svalue[%d] = %d' % (space, i, sample.value[i])) 121 for i in range(len(sample.label)): 122 print('%slabel[%d] = %s:%s' % (space, i, self.string(sample.label[i].key), 123 self.string(sample.label[i].str))) 124 125 def show_location_id(self, location_id, space=''): 126 location = self.profile.location[location_id - 1] 127 self.show_location(location, space) 128 129 def show_location(self, location, space=''): 130 sub_space = space + ' ' 131 print('%sid: %d' % (space, location.id)) 132 print('%smapping_id: %d' % (space, location.mapping_id)) 133 self.show_mapping_id(location.mapping_id, sub_space) 134 print('%saddress: %x' % (space, location.address)) 135 for i in range(len(location.line)): 136 print('%sline[%d]:' % (space, i)) 137 self.show_line(location.line[i], sub_space) 138 139 def show_mapping_id(self, mapping_id, space=''): 140 mapping = self.profile.mapping[mapping_id - 1] 141 self.show_mapping(mapping, space) 142 143 def show_mapping(self, mapping, space=''): 144 print('%sid: %d' % (space, mapping.id)) 145 print('%smemory_start: %x' % (space, mapping.memory_start)) 146 print('%smemory_limit: %x' % (space, mapping.memory_limit)) 147 print('%sfile_offset: %x' % (space, mapping.file_offset)) 148 print('%sfilename: %s(%d)' % (space, self.string(mapping.filename), 149 mapping.filename)) 150 print('%sbuild_id: %s(%d)' % (space, self.string(mapping.build_id), 151 mapping.build_id)) 152 print('%shas_functions: %s' % (space, mapping.has_functions)) 153 print('%shas_filenames: %s' % (space, mapping.has_filenames)) 154 print('%shas_line_numbers: %s' % (space, mapping.has_line_numbers)) 155 print('%shas_inline_frames: %s' % (space, mapping.has_inline_frames)) 156 157 def show_line(self, line, space=''): 158 sub_space = space + ' ' 159 print('%sfunction_id: %d' % (space, line.function_id)) 160 self.show_function_id(line.function_id, sub_space) 161 print('%sline: %d' % (space, line.line)) 162 163 def show_function_id(self, function_id, space=''): 164 function = self.profile.function[function_id - 1] 165 self.show_function(function, space) 166 167 def show_function(self, function, space=''): 168 print('%sid: %d' % (space, function.id)) 169 print('%sname: %s' % (space, self.string(function.name))) 170 print('%ssystem_name: %s' % (space, self.string(function.system_name))) 171 print('%sfilename: %s' % (space, self.string(function.filename))) 172 print('%sstart_line: %d' % (space, function.start_line)) 173 174 def string(self, string_id): 175 return self.string_table[string_id] 176 177 178class Label(object): 179 def __init__(self, key_id: int, str_id: int): 180 # See profile.Label.key 181 self.key_id = key_id 182 # See profile.Label.str 183 self.str_id = str_id 184 185 186class Sample(object): 187 188 def __init__(self): 189 self.location_ids = [] 190 self.values = {} 191 self.labels = [] 192 193 def add_location_id(self, location_id): 194 self.location_ids.append(location_id) 195 196 def add_value(self, sample_type_id, value): 197 self.values[sample_type_id] = self.values.get(sample_type_id, 0) + value 198 199 def add_values(self, values): 200 for sample_type_id, value in values.items(): 201 self.add_value(sample_type_id, value) 202 203 @property 204 def key(self): 205 return tuple(self.location_ids) 206 207 208class Location(object): 209 210 def __init__(self, mapping_id, address, vaddr_in_dso): 211 self.id = -1 # unset 212 self.mapping_id = mapping_id 213 self.address = address 214 self.vaddr_in_dso = vaddr_in_dso 215 self.lines = [] 216 217 @property 218 def key(self): 219 return (self.mapping_id, self.address) 220 221 222class Line(object): 223 224 def __init__(self): 225 self.function_id = 0 226 self.line = 0 227 228 229class Mapping(object): 230 231 def __init__(self, start, end, pgoff, filename_id, build_id_id): 232 self.id = -1 # unset 233 self.memory_start = start 234 self.memory_limit = end 235 self.file_offset = pgoff 236 self.filename_id = filename_id 237 self.build_id_id = build_id_id 238 239 @property 240 def key(self): 241 return ( 242 self.memory_start, 243 self.memory_limit, 244 self.file_offset, 245 self.filename_id, 246 self.build_id_id) 247 248 249class Function(object): 250 251 def __init__(self, name_id, dso_name_id, vaddr_in_dso): 252 self.id = -1 # unset 253 self.name_id = name_id 254 self.dso_name_id = dso_name_id 255 self.vaddr_in_dso = vaddr_in_dso 256 self.source_filename_id = 0 257 self.start_line = 0 258 259 @property 260 def key(self): 261 return (self.name_id, self.dso_name_id) 262 263 264# pylint: disable=no-member 265class PprofProfileGenerator(object): 266 267 def __init__(self, config): 268 self.config = config 269 self.lib = None 270 271 config['binary_cache_dir'] = 'binary_cache' 272 if not os.path.isdir(config['binary_cache_dir']): 273 config['binary_cache_dir'] = None 274 self.dso_filter = set(config['dso_filters']) if config.get('dso_filters') else None 275 self.max_chain_length = config['max_chain_length'] 276 self.profile = profile_pb2.Profile() 277 self.profile.string_table.append('') 278 self.string_table = {} 279 self.sample_types = {} 280 self.sample_map = {} 281 self.sample_list = [] 282 self.location_map = {} 283 self.location_list = [] 284 self.mapping_map = {} 285 self.mapping_list = [] 286 self.function_map = {} 287 self.function_list = [] 288 289 # Map from dso_name in perf.data to (binary path, build_id). 290 self.binary_map = {} 291 self.read_elf = ReadElf(self.config['ndk_path']) 292 self.binary_finder = BinaryFinder(config['binary_cache_dir'], self.read_elf) 293 294 def load_record_file(self, record_file): 295 self.lib = ReportLib() 296 self.lib.SetRecordFile(record_file) 297 298 if self.config['binary_cache_dir']: 299 self.lib.SetSymfs(self.config['binary_cache_dir']) 300 kallsyms = os.path.join(self.config['binary_cache_dir'], 'kallsyms') 301 if os.path.isfile(kallsyms): 302 self.lib.SetKallsymsFile(kallsyms) 303 304 if self.config.get('show_art_frames'): 305 self.lib.ShowArtFrames() 306 self.lib.SetReportOptions(self.config['report_lib_options']) 307 308 comments = [ 309 "Simpleperf Record Command:\n" + self.lib.GetRecordCmd(), 310 "Converted to pprof with:\n" + " ".join(sys.argv), 311 "Architecture:\n" + self.lib.GetArch(), 312 ] 313 for comment in comments: 314 self.profile.comment.append(self.get_string_id(comment)) 315 316 numbers_re = re.compile(r"\d+") 317 318 # Process all samples in perf.data, aggregate samples. 319 while True: 320 report_sample = self.lib.GetNextSample() 321 if report_sample is None: 322 self.lib.Close() 323 self.lib = None 324 break 325 event = self.lib.GetEventOfCurrentSample() 326 symbol = self.lib.GetSymbolOfCurrentSample() 327 callchain = self.lib.GetCallChainOfCurrentSample() 328 329 sample_type_id = self.get_sample_type_id(event.name) 330 sample = Sample() 331 sample.add_value(sample_type_id, 1) 332 sample.add_value(sample_type_id + 1, report_sample.period) 333 sample.labels.append(Label( 334 self.get_string_id("thread"), 335 self.get_string_id(report_sample.thread_comm))) 336 # Heuristic: threadpools doing similar work are often named as 337 # name-1, name-2, name-3. Combine threadpools into one label 338 # "name-%d" if they only differ by a number. 339 sample.labels.append(Label( 340 self.get_string_id("threadpool"), 341 self.get_string_id( 342 numbers_re.sub("%d", report_sample.thread_comm)))) 343 sample.labels.append(Label( 344 self.get_string_id("pid"), 345 self.get_string_id(str(report_sample.pid)))) 346 sample.labels.append(Label( 347 self.get_string_id("tid"), 348 self.get_string_id(str(report_sample.tid)))) 349 if self._filter_symbol(symbol): 350 location_id = self.get_location_id(report_sample.ip, symbol) 351 sample.add_location_id(location_id) 352 for i in range(max(0, callchain.nr - self.max_chain_length), callchain.nr): 353 entry = callchain.entries[i] 354 if self._filter_symbol(symbol): 355 location_id = self.get_location_id(entry.ip, entry.symbol) 356 sample.add_location_id(location_id) 357 if sample.location_ids: 358 self.add_sample(sample) 359 360 def gen(self, jobs: int): 361 # 1. Generate line info for locations and functions. 362 self.gen_source_lines(jobs) 363 364 # 2. Produce samples/locations/functions in profile. 365 for sample in self.sample_list: 366 self.gen_profile_sample(sample) 367 for mapping in self.mapping_list: 368 self.gen_profile_mapping(mapping) 369 for location in self.location_list: 370 self.gen_profile_location(location) 371 for function in self.function_list: 372 self.gen_profile_function(function) 373 374 return self.profile 375 376 def _filter_symbol(self, symbol): 377 if not self.dso_filter or symbol.dso_name in self.dso_filter: 378 return True 379 return False 380 381 def get_string_id(self, str_value): 382 if not str_value: 383 return 0 384 str_id = self.string_table.get(str_value) 385 if str_id is not None: 386 return str_id 387 str_id = len(self.string_table) + 1 388 self.string_table[str_value] = str_id 389 self.profile.string_table.append(str_value) 390 return str_id 391 392 def get_string(self, str_id): 393 return self.profile.string_table[str_id] 394 395 def get_sample_type_id(self, name): 396 sample_type_id = self.sample_types.get(name) 397 if sample_type_id is not None: 398 return sample_type_id 399 sample_type_id = len(self.profile.sample_type) 400 sample_type = self.profile.sample_type.add() 401 sample_type.type = self.get_string_id(name + '_samples') 402 sample_type.unit = self.get_string_id('samples') 403 sample_type = self.profile.sample_type.add() 404 sample_type.type = self.get_string_id(name) 405 units = EVENT_UNITS.get(name, 'count') 406 sample_type.unit = self.get_string_id(units) 407 self.sample_types[name] = sample_type_id 408 return sample_type_id 409 410 def get_location_id(self, ip, symbol): 411 binary_path, build_id = self.get_binary(symbol.dso_name) 412 mapping_id = self.get_mapping_id(symbol.mapping[0], binary_path, build_id) 413 location = Location(mapping_id, ip, symbol.vaddr_in_file) 414 function_id = self.get_function_id(symbol.symbol_name, binary_path, symbol.symbol_addr) 415 if function_id: 416 # Add Line only when it has a valid function id, see http://b/36988814. 417 # Default line info only contains the function name 418 line = Line() 419 line.function_id = function_id 420 location.lines.append(line) 421 422 exist_location = self.location_map.get(location.key) 423 if exist_location: 424 return exist_location.id 425 # location_id starts from 1 426 location.id = len(self.location_list) + 1 427 self.location_list.append(location) 428 self.location_map[location.key] = location 429 return location.id 430 431 def get_mapping_id(self, report_mapping, filename, build_id): 432 filename_id = self.get_string_id(filename) 433 build_id_id = self.get_string_id(build_id) 434 mapping = Mapping(report_mapping.start, report_mapping.end, 435 report_mapping.pgoff, filename_id, build_id_id) 436 exist_mapping = self.mapping_map.get(mapping.key) 437 if exist_mapping: 438 return exist_mapping.id 439 # mapping_id starts from 1 440 mapping.id = len(self.mapping_list) + 1 441 self.mapping_list.append(mapping) 442 self.mapping_map[mapping.key] = mapping 443 return mapping.id 444 445 def get_binary(self, dso_name): 446 """ Return (binary_path, build_id) for a given dso_name. """ 447 value = self.binary_map.get(dso_name) 448 if value: 449 return value 450 451 binary_path = dso_name 452 build_id = self.lib.GetBuildIdForPath(dso_name) 453 # Try elf_path in binary cache. 454 elf_path = self.binary_finder.find_binary(dso_name, build_id) 455 if elf_path: 456 binary_path = str(elf_path) 457 458 # The build ids in perf.data are padded to 20 bytes, but pprof needs without padding. 459 build_id = ReadElf.unpad_build_id(build_id) 460 self.binary_map[dso_name] = (binary_path, build_id) 461 return (binary_path, build_id) 462 463 def get_mapping(self, mapping_id): 464 return self.mapping_list[mapping_id - 1] if mapping_id > 0 else None 465 466 def get_function_id(self, name, dso_name, vaddr_in_file): 467 if name == 'unknown': 468 return 0 469 function = Function(self.get_string_id(name), self.get_string_id(dso_name), vaddr_in_file) 470 exist_function = self.function_map.get(function.key) 471 if exist_function: 472 return exist_function.id 473 # function_id starts from 1 474 function.id = len(self.function_list) + 1 475 self.function_list.append(function) 476 self.function_map[function.key] = function 477 return function.id 478 479 def get_function(self, function_id): 480 return self.function_list[function_id - 1] if function_id > 0 else None 481 482 def add_sample(self, sample): 483 exist_sample = self.sample_map.get(sample.key) 484 if exist_sample: 485 exist_sample.add_values(sample.values) 486 else: 487 self.sample_list.append(sample) 488 self.sample_map[sample.key] = sample 489 490 def gen_source_lines(self, jobs: int): 491 # 1. Create Addr2line instance 492 if not self.config.get('binary_cache_dir'): 493 logging.info("Can't generate line information because binary_cache is missing.") 494 return 495 if not ToolFinder.find_tool_path('llvm-symbolizer', self.config['ndk_path']): 496 logging.info("Can't generate line information because can't find llvm-symbolizer.") 497 return 498 # We have changed dso names to paths in binary_cache in self.get_binary(). So no need to 499 # pass binary_cache_dir to BinaryFinder. 500 binary_finder = BinaryFinder(None, self.read_elf) 501 addr2line = Addr2Nearestline(self.config['ndk_path'], binary_finder, True) 502 503 # 2. Put all needed addresses to it. 504 for location in self.location_list: 505 mapping = self.get_mapping(location.mapping_id) 506 dso_name = self.get_string(mapping.filename_id) 507 if location.lines: 508 function = self.get_function(location.lines[0].function_id) 509 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, location.vaddr_in_dso) 510 for function in self.function_list: 511 dso_name = self.get_string(function.dso_name_id) 512 addr2line.add_addr(dso_name, None, function.vaddr_in_dso, function.vaddr_in_dso) 513 514 # 3. Generate source lines. 515 addr2line.convert_addrs_to_lines(jobs) 516 517 # 4. Annotate locations and functions. 518 for location in self.location_list: 519 if not location.lines: 520 continue 521 mapping = self.get_mapping(location.mapping_id) 522 dso_name = self.get_string(mapping.filename_id) 523 dso = addr2line.get_dso(dso_name) 524 if not dso: 525 continue 526 sources = addr2line.get_addr_source(dso, location.vaddr_in_dso) 527 if not sources: 528 continue 529 for i, source in enumerate(sources): 530 source_file, source_line, function_name = source 531 if i == 0: 532 # Don't override original function name from report library, which is more 533 # accurate when proguard mapping file is given. 534 function_id = location.lines[0].function_id 535 # Clear default line info. 536 location.lines.clear() 537 else: 538 function_id = self.get_function_id(function_name, dso_name, 0) 539 if function_id == 0: 540 continue 541 location.lines.append(self.add_line(source_file, source_line, function_id)) 542 543 for function in self.function_list: 544 dso_name = self.get_string(function.dso_name_id) 545 if function.vaddr_in_dso: 546 dso = addr2line.get_dso(dso_name) 547 if not dso: 548 continue 549 sources = addr2line.get_addr_source(dso, function.vaddr_in_dso) 550 if sources: 551 source_file, source_line, _ = sources[0] 552 function.source_filename_id = self.get_string_id(source_file) 553 function.start_line = source_line 554 555 def add_line(self, source_file, source_line, function_id): 556 line = Line() 557 function = self.get_function(function_id) 558 function.source_filename_id = self.get_string_id(source_file) 559 line.function_id = function_id 560 line.line = source_line 561 return line 562 563 def gen_profile_sample(self, sample): 564 profile_sample = self.profile.sample.add() 565 profile_sample.location_id.extend(sample.location_ids) 566 sample_type_count = len(self.sample_types) * 2 567 values = [0] * sample_type_count 568 for sample_type_id in sample.values: 569 values[sample_type_id] = sample.values[sample_type_id] 570 profile_sample.value.extend(values) 571 572 for l in sample.labels: 573 label = profile_sample.label.add() 574 label.key = l.key_id 575 label.str = l.str_id 576 577 def gen_profile_mapping(self, mapping): 578 profile_mapping = self.profile.mapping.add() 579 profile_mapping.id = mapping.id 580 profile_mapping.memory_start = mapping.memory_start 581 profile_mapping.memory_limit = mapping.memory_limit 582 profile_mapping.file_offset = mapping.file_offset 583 profile_mapping.filename = mapping.filename_id 584 profile_mapping.build_id = mapping.build_id_id 585 profile_mapping.has_filenames = True 586 profile_mapping.has_functions = True 587 if self.config.get('binary_cache_dir'): 588 profile_mapping.has_line_numbers = True 589 profile_mapping.has_inline_frames = True 590 else: 591 profile_mapping.has_line_numbers = False 592 profile_mapping.has_inline_frames = False 593 594 def gen_profile_location(self, location): 595 profile_location = self.profile.location.add() 596 profile_location.id = location.id 597 profile_location.mapping_id = location.mapping_id 598 profile_location.address = location.address 599 for i in range(len(location.lines)): 600 line = profile_location.line.add() 601 line.function_id = location.lines[i].function_id 602 line.line = location.lines[i].line 603 604 def gen_profile_function(self, function): 605 profile_function = self.profile.function.add() 606 profile_function.id = function.id 607 profile_function.name = function.name_id 608 profile_function.system_name = function.name_id 609 profile_function.filename = function.source_filename_id 610 profile_function.start_line = function.start_line 611 612 613def main(): 614 parser = BaseArgumentParser(description='Generate pprof profile data in pprof.profile.') 615 parser.add_argument('--show', nargs='?', action='append', help='print existing pprof.profile.') 616 parser.add_argument('-i', '--record_file', nargs='+', default=['perf.data'], help=""" 617 Set profiling data file to report. Default is perf.data""") 618 parser.add_argument('-o', '--output_file', default='pprof.profile', help=""" 619 The path of generated pprof profile data.""") 620 parser.add_argument('--max_chain_length', type=int, default=1000000000, help=""" 621 Maximum depth of samples to be converted.""") # Large value as infinity standin. 622 parser.add_argument('--ndk_path', type=extant_dir, help='Set the path of a ndk release.') 623 parser.add_argument( 624 '-j', '--jobs', type=int, default=os.cpu_count(), 625 help='Use multithreading to speed up source code annotation.') 626 sample_filter_group = parser.add_argument_group('Sample filter options') 627 sample_filter_group.add_argument('--dso', nargs='+', action='append', help=""" 628 Use samples only in selected binaries.""") 629 parser.add_report_lib_options(sample_filter_group=sample_filter_group) 630 631 args = parser.parse_args() 632 if args.show: 633 show_file = args.show[0] if args.show[0] else 'pprof.profile' 634 profile = load_pprof_profile(show_file) 635 printer = PprofProfilePrinter(profile) 636 printer.show() 637 return 638 639 config = {} 640 config['output_file'] = args.output_file 641 config['dso_filters'] = flatten_arg_list(args.dso) 642 config['ndk_path'] = args.ndk_path 643 config['max_chain_length'] = args.max_chain_length 644 config['report_lib_options'] = args.report_lib_options 645 generator = PprofProfileGenerator(config) 646 for record_file in args.record_file: 647 generator.load_record_file(record_file) 648 profile = generator.gen(args.jobs) 649 store_pprof_profile(config['output_file'], profile) 650 logging.info("Report is generated at '%s' successfully." % config['output_file']) 651 652 653if __name__ == '__main__': 654 main() 655